webui: auto-refresh /props on inference start to resync model metadata (#16784)

* webui: auto-refresh /props on inference start to resync model metadata

- Add no-cache headers to /props and /slots
- Throttle slot checks to 30s
- Prevent concurrent fetches with promise guard
- Trigger refresh from chat streaming for legacy and ModelSelector
- Show dynamic serverWarning when using cached data

* fix: restore proper legacy behavior in webui by using unified /props refresh

Updated assistant message bubbles to show each message's stored model when available,
falling back to the current server model only when the per-message value is missing

When the model selector is disabled, now fetches /props and prioritizes that model name
over chunk metadata, then persists it with the streamed message so legacy mode properly
reflects the backend configuration

* fix: detect first valid SSE chunk and refresh server props once

* fix: removed the slots availability throttle constant and state

* webui: purge ai-generated cruft

* chore: update webui static build
This commit is contained in:
Pascal
2025-11-01 19:49:51 +01:00
committed by GitHub
parent e4a71599e5
commit 2f68ce7cfd
7 changed files with 180 additions and 70 deletions

Binary file not shown.

View File

@@ -85,8 +85,8 @@
let displayedModel = $derived((): string | null => {
if (!currentConfig.showModelInfo) return null;
if (currentConfig.modelSelectorEnabled) {
return message.model ?? null;
if (message.model) {
return message.model;
}
return serverModel;

View File

@@ -54,6 +54,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
// Generation parameters
temperature,
max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
conversationId,
abortController.signal
);
@@ -267,6 +269,7 @@ export class ChatService {
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onFirstValidChunk?: () => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
let firstValidChunkEmitted = false;
try {
let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
try {
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
firstValidChunkEmitted = true;
if (!abortSignal?.aborted) {
onFirstValidChunk?.();
}
}
const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;
const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
}
if (timings || promptProgress) {
this.updateProcessingState(timings, promptProgress, conversationId);
if (timings) {

View File

@@ -1,6 +1,7 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
import { config } from '$lib/stores/settings.svelte';
import { serverStore } from '$lib/stores/server.svelte';
import { normalizeModelName } from '$lib/utils/model-names';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
let resolvedModel: string | null = null;
let modelPersisted = false;
const currentConfig = config();
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
let serverPropsRefreshed = false;
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
const recordModel = (modelName: string, persistImmediately = true): void => {
const normalizedModel = normalizeModelName(modelName);
const refreshServerPropsOnce = () => {
if (serverPropsRefreshed) {
return;
}
serverPropsRefreshed = true;
const hasExistingProps = serverStore.serverProps !== null;
serverStore
.fetchServerProps({ silent: hasExistingProps })
.then(() => {
updateModelFromServerProps?.(true);
})
.catch((error) => {
console.warn('Failed to refresh server props after streaming started:', error);
});
};
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
const serverModelName = serverStore.modelName;
const preferredModelSource = preferServerPropsModel
? (serverModelName ?? modelName ?? null)
: (modelName ?? serverModelName ?? null);
if (!preferredModelSource) {
return;
}
const normalizedModel = normalizeModelName(preferredModelSource);
if (!normalizedModel || normalizedModel === resolvedModel) {
return;
@@ -388,6 +421,20 @@ class ChatStore {
}
};
if (preferServerPropsModel) {
updateModelFromServerProps = (persistImmediately = true) => {
const currentServerModel = serverStore.modelName;
if (!currentServerModel) {
return;
}
recordModel(currentServerModel, persistImmediately);
};
updateModelFromServerProps(false);
}
slotsService.startStreaming();
slotsService.setActiveConversation(assistantMessage.convId);
@@ -396,6 +443,9 @@ class ChatStore {
{
...this.getApiOptions(),
onFirstValidChunk: () => {
refreshServerPropsOnce();
},
onChunk: (chunk: string) => {
streamedContent += chunk;
this.setConversationStreaming(

View File

@@ -52,6 +52,7 @@ class ServerStore {
private _error = $state<string | null>(null);
private _serverWarning = $state<string | null>(null);
private _slotsEndpointAvailable = $state<boolean | null>(null);
private fetchServerPropsPromise: Promise<void> | null = null;
private readCachedServerProps(): ApiLlamaCppServerProps | null {
if (!browser) return null;
@@ -171,73 +172,65 @@ class ServerStore {
/**
* Fetches server properties from the server
*/
async fetchServerProps(): Promise<void> {
this._loading = true;
this._error = null;
this._serverWarning = null;
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
const { silent = false } = options;
const isSilent = silent && this._serverProps !== null;
try {
console.log('Fetching server properties...');
const props = await ChatService.getServerProps();
this._serverProps = props;
this.persistServerProps(props);
console.log('Server properties loaded:', props);
if (this.fetchServerPropsPromise) {
return this.fetchServerPropsPromise;
}
// Check slots endpoint availability after server props are loaded
await this.checkSlotsEndpointAvailability();
} catch (error) {
const hadCachedProps = this._serverProps !== null;
let errorMessage = 'Failed to connect to server';
let isOfflineLikeError = false;
let isServerSideError = false;
if (!isSilent) {
this._loading = true;
this._error = null;
this._serverWarning = null;
}
if (error instanceof Error) {
// Handle specific error types with user-friendly messages
if (error.name === 'TypeError' && error.message.includes('fetch')) {
errorMessage = 'Server is not running or unreachable';
isOfflineLikeError = true;
} else if (error.message.includes('ECONNREFUSED')) {
errorMessage = 'Connection refused - server may be offline';
isOfflineLikeError = true;
} else if (error.message.includes('ENOTFOUND')) {
errorMessage = 'Server not found - check server address';
isOfflineLikeError = true;
} else if (error.message.includes('ETIMEDOUT')) {
errorMessage = 'Request timed out - the server took too long to respond';
isOfflineLikeError = true;
} else if (error.message.includes('503')) {
errorMessage = 'Server temporarily unavailable - try again shortly';
isServerSideError = true;
} else if (error.message.includes('500')) {
errorMessage = 'Server error - check server logs';
isServerSideError = true;
} else if (error.message.includes('404')) {
errorMessage = 'Server endpoint not found';
} else if (error.message.includes('403') || error.message.includes('401')) {
errorMessage = 'Access denied';
const hadProps = this._serverProps !== null;
const fetchPromise = (async () => {
try {
const props = await ChatService.getServerProps();
this._serverProps = props;
this.persistServerProps(props);
this._error = null;
this._serverWarning = null;
await this.checkSlotsEndpointAvailability();
} catch (error) {
if (isSilent && hadProps) {
console.warn('Silent server props refresh failed, keeping cached data:', error);
return;
}
this.handleFetchServerPropsError(error, hadProps);
} finally {
if (!isSilent) {
this._loading = false;
}
this.fetchServerPropsPromise = null;
}
})();
let cachedProps: ApiLlamaCppServerProps | null = null;
this.fetchServerPropsPromise = fetchPromise;
if (!hadCachedProps) {
cachedProps = this.readCachedServerProps();
if (cachedProps) {
this._serverProps = cachedProps;
this._error = null;
await fetchPromise;
}
if (isOfflineLikeError || isServerSideError) {
this._serverWarning = errorMessage;
}
/**
* Handles fetch failures by attempting to recover cached server props and
* updating the user-facing error or warning state appropriately.
*/
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
console.warn(
'Failed to refresh server properties, using cached values from localStorage:',
errorMessage
);
} else {
this._error = errorMessage;
}
} else {
let cachedProps: ApiLlamaCppServerProps | null = null;
if (!hadProps) {
cachedProps = this.readCachedServerProps();
if (cachedProps) {
this._serverProps = cachedProps;
this._error = null;
if (isOfflineLikeError || isServerSideError) {
@@ -245,14 +238,66 @@ class ServerStore {
}
console.warn(
'Failed to refresh server properties, continuing with cached values:',
'Failed to refresh server properties, using cached values from localStorage:',
errorMessage
);
} else {
this._error = errorMessage;
}
console.error('Error fetching server properties:', error);
} finally {
this._loading = false;
} else {
this._error = null;
if (isOfflineLikeError || isServerSideError) {
this._serverWarning = errorMessage;
}
console.warn(
'Failed to refresh server properties, continuing with cached values:',
errorMessage
);
}
console.error('Error fetching server properties:', error);
}
private normalizeFetchError(error: unknown): {
errorMessage: string;
isOfflineLikeError: boolean;
isServerSideError: boolean;
} {
let errorMessage = 'Failed to connect to server';
let isOfflineLikeError = false;
let isServerSideError = false;
if (error instanceof Error) {
const message = error.message || '';
if (error.name === 'TypeError' && message.includes('fetch')) {
errorMessage = 'Server is not running or unreachable';
isOfflineLikeError = true;
} else if (message.includes('ECONNREFUSED')) {
errorMessage = 'Connection refused - server may be offline';
isOfflineLikeError = true;
} else if (message.includes('ENOTFOUND')) {
errorMessage = 'Server not found - check server address';
isOfflineLikeError = true;
} else if (message.includes('ETIMEDOUT')) {
errorMessage = 'Request timed out - the server took too long to respond';
isOfflineLikeError = true;
} else if (message.includes('503')) {
errorMessage = 'Server temporarily unavailable - try again shortly';
isServerSideError = true;
} else if (message.includes('500')) {
errorMessage = 'Server error - check server logs';
isServerSideError = true;
} else if (message.includes('404')) {
errorMessage = 'Server endpoint not found';
} else if (message.includes('403') || message.includes('401')) {
errorMessage = 'Access denied';
}
}
return { errorMessage, isOfflineLikeError, isServerSideError };
}
/**
@@ -264,6 +309,7 @@ class ServerStore {
this._serverWarning = null;
this._loading = false;
this._slotsEndpointAvailable = null;
this.fetchServerPropsPromise = null;
this.persistServerProps(null);
}
}

View File

@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
}
export interface ApiChatCompletionStreamChunk {
object?: string;
model?: string;
choices: Array<{
model?: string;

View File

@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
onFirstValidChunk?: () => void;
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
onError?: (error: Error) => void;
}