webui: auto-refresh /props on inference start to resync model metadata (#16784)

* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
2025-11-06 09:46:50 +00:00 · 2025-11-01 19:49:51 +01:00
parent e4a71599e5
commit 2f68ce7cfd
7 changed files with 180 additions and 70 deletions
--- a/tools/server/public/index.html.gz
+++ b/tools/server/public/index.html.gz
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -85,8 +85,8 @@
 	let displayedModel = $derived((): string | null => {
 		if (!currentConfig.showModelInfo) return null;
-		if (currentConfig.modelSelectorEnabled) {
+		if (message.model) {
-			return message.model ?? null;
+			return message.model;
 		}
 		return serverModel;
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -54,6 +54,7 @@ export class ChatService {
 			onError,
 			onReasoningChunk,
 			onModel,
 			onFirstValidChunk,
 			// Generation parameters
 			temperature,
 			max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
 					onError,
 					onReasoningChunk,
 					onModel,
 					onFirstValidChunk,
 					conversationId,
 					abortController.signal
 				);
@@ -267,6 +269,7 @@ export class ChatService {
 		onError?: (error: Error) => void,
 		onReasoningChunk?: (chunk: string) => void,
 		onModel?: (model: string) => void,
 		onFirstValidChunk?: () => void,
 		conversationId?: string,
 		abortSignal?: AbortSignal
 	): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
 		let lastTimings: ChatMessageTimings | undefined;
 		let streamFinished = false;
 		let modelEmitted = false;
 		let firstValidChunkEmitted = false;
 		try {
 			let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
 						try {
 							const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
-							const chunkModel = this.extractModelName(parsed);
+							if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
-							if (chunkModel && !modelEmitted) {
+								firstValidChunkEmitted = true;
-								modelEmitted = true;
+
-								onModel?.(chunkModel);
+								if (!abortSignal?.aborted) {
 									onFirstValidChunk?.();
 								}
 							}
 							const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
 							const timings = parsed.timings;
 							const promptProgress = parsed.prompt_progress;
 							const chunkModel = this.extractModelName(parsed);
 							if (chunkModel && !modelEmitted) {
 								modelEmitted = true;
 								onModel?.(chunkModel);
 							}
 							if (timings || promptProgress) {
 								this.updateProcessingState(timings, promptProgress, conversationId);
 								if (timings) {
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1,6 +1,7 @@
 import { DatabaseStore } from '$lib/stores/database';
 import { chatService, slotsService } from '$lib/services';
 import { config } from '$lib/stores/settings.svelte';
 import { serverStore } from '$lib/stores/server.svelte';
 import { normalizeModelName } from '$lib/utils/model-names';
 import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
 import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
 		let resolvedModel: string | null = null;
 		let modelPersisted = false;
 		const currentConfig = config();
 		const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
 		let serverPropsRefreshed = false;
 		let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
-		const recordModel = (modelName: string, persistImmediately = true): void => {
+		const refreshServerPropsOnce = () => {
-			const normalizedModel = normalizeModelName(modelName);
+			if (serverPropsRefreshed) {
 				return;
 			}
 			serverPropsRefreshed = true;
 			const hasExistingProps = serverStore.serverProps !== null;
 			serverStore
 				.fetchServerProps({ silent: hasExistingProps })
 				.then(() => {
 					updateModelFromServerProps?.(true);
 				})
 				.catch((error) => {
 					console.warn('Failed to refresh server props after streaming started:', error);
 				});
 		};
 		const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
 			const serverModelName = serverStore.modelName;
 			const preferredModelSource = preferServerPropsModel
 				? (serverModelName ?? modelName ?? null)
 				: (modelName ?? serverModelName ?? null);
 			if (!preferredModelSource) {
 				return;
 			}
 			const normalizedModel = normalizeModelName(preferredModelSource);
 			if (!normalizedModel || normalizedModel === resolvedModel) {
 				return;
@@ -388,6 +421,20 @@ class ChatStore {
 			}
 		};
 		if (preferServerPropsModel) {
 			updateModelFromServerProps = (persistImmediately = true) => {
 				const currentServerModel = serverStore.modelName;
 				if (!currentServerModel) {
 					return;
 				}
 				recordModel(currentServerModel, persistImmediately);
 			};
 			updateModelFromServerProps(false);
 		}
 		slotsService.startStreaming();
 		slotsService.setActiveConversation(assistantMessage.convId);
@@ -396,6 +443,9 @@ class ChatStore {
 			{
 				...this.getApiOptions(),
 				onFirstValidChunk: () => {
 					refreshServerPropsOnce();
 				},
 				onChunk: (chunk: string) => {
 					streamedContent += chunk;
 					this.setConversationStreaming(
--- a/tools/server/webui/src/lib/stores/server.svelte.ts
+++ b/tools/server/webui/src/lib/stores/server.svelte.ts
@@ -52,6 +52,7 @@ class ServerStore {
 	private _error = $state<string | null>(null);
 	private _serverWarning = $state<string | null>(null);
 	private _slotsEndpointAvailable = $state<boolean | null>(null);
 	private fetchServerPropsPromise: Promise<void> | null = null;
 	private readCachedServerProps(): ApiLlamaCppServerProps | null {
 		if (!browser) return null;
@@ -171,57 +172,63 @@ class ServerStore {
 	/**
 	 * Fetches server properties from the server
 	 */
-	async fetchServerProps(): Promise<void> {
+	async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
 		const { silent = false } = options;
 		const isSilent = silent && this._serverProps !== null;
 		if (this.fetchServerPropsPromise) {
 			return this.fetchServerPropsPromise;
 		}
 		if (!isSilent) {
 			this._loading = true;
 			this._error = null;
 			this._serverWarning = null;
 		}
 		const hadProps = this._serverProps !== null;
 		const fetchPromise = (async () => {
 			try {
 			console.log('Fetching server properties...');
 				const props = await ChatService.getServerProps();
 				this._serverProps = props;
 				this.persistServerProps(props);
-			console.log('Server properties loaded:', props);
+				this._error = null;
-
+				this._serverWarning = null;
 			// Check slots endpoint availability after server props are loaded
 				await this.checkSlotsEndpointAvailability();
 			} catch (error) {
-			const hadCachedProps = this._serverProps !== null;
+				if (isSilent && hadProps) {
-			let errorMessage = 'Failed to connect to server';
+					console.warn('Silent server props refresh failed, keeping cached data:', error);
-			let isOfflineLikeError = false;
+					return;
-			let isServerSideError = false;
+				}
-			if (error instanceof Error) {
+				this.handleFetchServerPropsError(error, hadProps);
-				// Handle specific error types with user-friendly messages
+			} finally {
-				if (error.name === 'TypeError' && error.message.includes('fetch')) {
+				if (!isSilent) {
-					errorMessage = 'Server is not running or unreachable';
+					this._loading = false;
 					isOfflineLikeError = true;
 				} else if (error.message.includes('ECONNREFUSED')) {
 					errorMessage = 'Connection refused - server may be offline';
 					isOfflineLikeError = true;
 				} else if (error.message.includes('ENOTFOUND')) {
 					errorMessage = 'Server not found - check server address';
 					isOfflineLikeError = true;
 				} else if (error.message.includes('ETIMEDOUT')) {
 					errorMessage = 'Request timed out - the server took too long to respond';
 					isOfflineLikeError = true;
 				} else if (error.message.includes('503')) {
 					errorMessage = 'Server temporarily unavailable - try again shortly';
 					isServerSideError = true;
 				} else if (error.message.includes('500')) {
 					errorMessage = 'Server error - check server logs';
 					isServerSideError = true;
 				} else if (error.message.includes('404')) {
 					errorMessage = 'Server endpoint not found';
 				} else if (error.message.includes('403') || error.message.includes('401')) {
 					errorMessage = 'Access denied';
 				}
 				this.fetchServerPropsPromise = null;
 			}
 		})();
 		this.fetchServerPropsPromise = fetchPromise;
 		await fetchPromise;
 	}
 	/**
 	 * Handles fetch failures by attempting to recover cached server props and
 	 * updating the user-facing error or warning state appropriately.
 	 */
 	private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
 		const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
 		let cachedProps: ApiLlamaCppServerProps | null = null;
-			if (!hadCachedProps) {
+		if (!hadProps) {
 			cachedProps = this.readCachedServerProps();
 			if (cachedProps) {
 				this._serverProps = cachedProps;
 				this._error = null;
@@ -249,10 +256,48 @@ class ServerStore {
 				errorMessage
 			);
 		}
 		console.error('Error fetching server properties:', error);
 		} finally {
 			this._loading = false;
 	}
 	private normalizeFetchError(error: unknown): {
 		errorMessage: string;
 		isOfflineLikeError: boolean;
 		isServerSideError: boolean;
 	} {
 		let errorMessage = 'Failed to connect to server';
 		let isOfflineLikeError = false;
 		let isServerSideError = false;
 		if (error instanceof Error) {
 			const message = error.message || '';
 			if (error.name === 'TypeError' && message.includes('fetch')) {
 				errorMessage = 'Server is not running or unreachable';
 				isOfflineLikeError = true;
 			} else if (message.includes('ECONNREFUSED')) {
 				errorMessage = 'Connection refused - server may be offline';
 				isOfflineLikeError = true;
 			} else if (message.includes('ENOTFOUND')) {
 				errorMessage = 'Server not found - check server address';
 				isOfflineLikeError = true;
 			} else if (message.includes('ETIMEDOUT')) {
 				errorMessage = 'Request timed out - the server took too long to respond';
 				isOfflineLikeError = true;
 			} else if (message.includes('503')) {
 				errorMessage = 'Server temporarily unavailable - try again shortly';
 				isServerSideError = true;
 			} else if (message.includes('500')) {
 				errorMessage = 'Server error - check server logs';
 				isServerSideError = true;
 			} else if (message.includes('404')) {
 				errorMessage = 'Server endpoint not found';
 			} else if (message.includes('403') || message.includes('401')) {
 				errorMessage = 'Access denied';
 			}
 		}
 		return { errorMessage, isOfflineLikeError, isServerSideError };
 	}
 	/**
@@ -264,6 +309,7 @@ class ServerStore {
 		this._serverWarning = null;
 		this._loading = false;
 		this._slotsEndpointAvailable = null;
 		this.fetchServerPropsPromise = null;
 		this.persistServerProps(null);
 	}
 }
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
 }
 export interface ApiChatCompletionStreamChunk {
 	object?: string;
 	model?: string;
 	choices: Array<{
 		model?: string;
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
 	onChunk?: (chunk: string) => void;
 	onReasoningChunk?: (chunk: string) => void;
 	onModel?: (model: string) => void;
 	onFirstValidChunk?: () => void;
 	onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
 	onError?: (error: Error) => void;
 }