diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 501fa455a2..b71690cc81 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 41774c6f87..d8f5630fd1 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -85,8 +85,8 @@ let displayedModel = $derived((): string | null => { if (!currentConfig.showModelInfo) return null; - if (currentConfig.modelSelectorEnabled) { - return message.model ?? null; + if (message.model) { + return message.model; } return serverModel; diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index df03b10251..414e060764 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -54,6 +54,7 @@ export class ChatService { onError, onReasoningChunk, onModel, + onFirstValidChunk, // Generation parameters temperature, max_tokens, @@ -201,6 +202,7 @@ export class ChatService { onError, onReasoningChunk, onModel, + onFirstValidChunk, conversationId, abortController.signal ); @@ -267,6 +269,7 @@ export class ChatService { onError?: (error: Error) => void, onReasoningChunk?: (chunk: string) => void, onModel?: (model: string) => void, + onFirstValidChunk?: () => void, conversationId?: string, abortSignal?: AbortSignal ): Promise { @@ -283,6 +286,7 @@ export class ChatService { let lastTimings: ChatMessageTimings | undefined; let streamFinished = false; let modelEmitted = false; + let firstValidChunkEmitted = false; try { let chunk = ''; @@ -311,10 +315,12 @@ export class ChatService { try { const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); - const chunkModel = this.extractModelName(parsed); - if (chunkModel && !modelEmitted) { - modelEmitted = true; - onModel?.(chunkModel); + if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') { + firstValidChunkEmitted = true; + + if (!abortSignal?.aborted) { + onFirstValidChunk?.(); + } } const content = parsed.choices[0]?.delta?.content; @@ -322,6 +328,12 @@ export class ChatService { const timings = parsed.timings; const promptProgress = parsed.prompt_progress; + const chunkModel = this.extractModelName(parsed); + if (chunkModel && !modelEmitted) { + modelEmitted = true; + onModel?.(chunkModel); + } + if (timings || promptProgress) { this.updateProcessingState(timings, promptProgress, conversationId); if (timings) { diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index a2e74a2e10..3f97a89183 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,6 +1,7 @@ import { DatabaseStore } from '$lib/stores/database'; import { chatService, slotsService } from '$lib/services'; import { config } from '$lib/stores/settings.svelte'; +import { serverStore } from '$lib/stores/server.svelte'; import { normalizeModelName } from '$lib/utils/model-names'; import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching'; import { browser } from '$app/environment'; @@ -362,9 +363,41 @@ class ChatStore { let resolvedModel: string | null = null; let modelPersisted = false; + const currentConfig = config(); + const preferServerPropsModel = !currentConfig.modelSelectorEnabled; + let serverPropsRefreshed = false; + let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null; - const recordModel = (modelName: string, persistImmediately = true): void => { - const normalizedModel = normalizeModelName(modelName); + const refreshServerPropsOnce = () => { + if (serverPropsRefreshed) { + return; + } + + serverPropsRefreshed = true; + + const hasExistingProps = serverStore.serverProps !== null; + + serverStore + .fetchServerProps({ silent: hasExistingProps }) + .then(() => { + updateModelFromServerProps?.(true); + }) + .catch((error) => { + console.warn('Failed to refresh server props after streaming started:', error); + }); + }; + + const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => { + const serverModelName = serverStore.modelName; + const preferredModelSource = preferServerPropsModel + ? (serverModelName ?? modelName ?? null) + : (modelName ?? serverModelName ?? null); + + if (!preferredModelSource) { + return; + } + + const normalizedModel = normalizeModelName(preferredModelSource); if (!normalizedModel || normalizedModel === resolvedModel) { return; @@ -388,6 +421,20 @@ class ChatStore { } }; + if (preferServerPropsModel) { + updateModelFromServerProps = (persistImmediately = true) => { + const currentServerModel = serverStore.modelName; + + if (!currentServerModel) { + return; + } + + recordModel(currentServerModel, persistImmediately); + }; + + updateModelFromServerProps(false); + } + slotsService.startStreaming(); slotsService.setActiveConversation(assistantMessage.convId); @@ -396,6 +443,9 @@ class ChatStore { { ...this.getApiOptions(), + onFirstValidChunk: () => { + refreshServerPropsOnce(); + }, onChunk: (chunk: string) => { streamedContent += chunk; this.setConversationStreaming( diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts index 1fd4afb040..c7056cc120 100644 --- a/tools/server/webui/src/lib/stores/server.svelte.ts +++ b/tools/server/webui/src/lib/stores/server.svelte.ts @@ -52,6 +52,7 @@ class ServerStore { private _error = $state(null); private _serverWarning = $state(null); private _slotsEndpointAvailable = $state(null); + private fetchServerPropsPromise: Promise | null = null; private readCachedServerProps(): ApiLlamaCppServerProps | null { if (!browser) return null; @@ -171,73 +172,65 @@ class ServerStore { /** * Fetches server properties from the server */ - async fetchServerProps(): Promise { - this._loading = true; - this._error = null; - this._serverWarning = null; + async fetchServerProps(options: { silent?: boolean } = {}): Promise { + const { silent = false } = options; + const isSilent = silent && this._serverProps !== null; - try { - console.log('Fetching server properties...'); - const props = await ChatService.getServerProps(); - this._serverProps = props; - this.persistServerProps(props); - console.log('Server properties loaded:', props); + if (this.fetchServerPropsPromise) { + return this.fetchServerPropsPromise; + } - // Check slots endpoint availability after server props are loaded - await this.checkSlotsEndpointAvailability(); - } catch (error) { - const hadCachedProps = this._serverProps !== null; - let errorMessage = 'Failed to connect to server'; - let isOfflineLikeError = false; - let isServerSideError = false; + if (!isSilent) { + this._loading = true; + this._error = null; + this._serverWarning = null; + } - if (error instanceof Error) { - // Handle specific error types with user-friendly messages - if (error.name === 'TypeError' && error.message.includes('fetch')) { - errorMessage = 'Server is not running or unreachable'; - isOfflineLikeError = true; - } else if (error.message.includes('ECONNREFUSED')) { - errorMessage = 'Connection refused - server may be offline'; - isOfflineLikeError = true; - } else if (error.message.includes('ENOTFOUND')) { - errorMessage = 'Server not found - check server address'; - isOfflineLikeError = true; - } else if (error.message.includes('ETIMEDOUT')) { - errorMessage = 'Request timed out - the server took too long to respond'; - isOfflineLikeError = true; - } else if (error.message.includes('503')) { - errorMessage = 'Server temporarily unavailable - try again shortly'; - isServerSideError = true; - } else if (error.message.includes('500')) { - errorMessage = 'Server error - check server logs'; - isServerSideError = true; - } else if (error.message.includes('404')) { - errorMessage = 'Server endpoint not found'; - } else if (error.message.includes('403') || error.message.includes('401')) { - errorMessage = 'Access denied'; + const hadProps = this._serverProps !== null; + + const fetchPromise = (async () => { + try { + const props = await ChatService.getServerProps(); + this._serverProps = props; + this.persistServerProps(props); + this._error = null; + this._serverWarning = null; + await this.checkSlotsEndpointAvailability(); + } catch (error) { + if (isSilent && hadProps) { + console.warn('Silent server props refresh failed, keeping cached data:', error); + return; } + + this.handleFetchServerPropsError(error, hadProps); + } finally { + if (!isSilent) { + this._loading = false; + } + + this.fetchServerPropsPromise = null; } + })(); - let cachedProps: ApiLlamaCppServerProps | null = null; + this.fetchServerPropsPromise = fetchPromise; - if (!hadCachedProps) { - cachedProps = this.readCachedServerProps(); - if (cachedProps) { - this._serverProps = cachedProps; - this._error = null; + await fetchPromise; + } - if (isOfflineLikeError || isServerSideError) { - this._serverWarning = errorMessage; - } + /** + * Handles fetch failures by attempting to recover cached server props and + * updating the user-facing error or warning state appropriately. + */ + private handleFetchServerPropsError(error: unknown, hadProps: boolean): void { + const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error); - console.warn( - 'Failed to refresh server properties, using cached values from localStorage:', - errorMessage - ); - } else { - this._error = errorMessage; - } - } else { + let cachedProps: ApiLlamaCppServerProps | null = null; + + if (!hadProps) { + cachedProps = this.readCachedServerProps(); + + if (cachedProps) { + this._serverProps = cachedProps; this._error = null; if (isOfflineLikeError || isServerSideError) { @@ -245,14 +238,66 @@ class ServerStore { } console.warn( - 'Failed to refresh server properties, continuing with cached values:', + 'Failed to refresh server properties, using cached values from localStorage:', errorMessage ); + } else { + this._error = errorMessage; } - console.error('Error fetching server properties:', error); - } finally { - this._loading = false; + } else { + this._error = null; + + if (isOfflineLikeError || isServerSideError) { + this._serverWarning = errorMessage; + } + + console.warn( + 'Failed to refresh server properties, continuing with cached values:', + errorMessage + ); } + + console.error('Error fetching server properties:', error); + } + + private normalizeFetchError(error: unknown): { + errorMessage: string; + isOfflineLikeError: boolean; + isServerSideError: boolean; + } { + let errorMessage = 'Failed to connect to server'; + let isOfflineLikeError = false; + let isServerSideError = false; + + if (error instanceof Error) { + const message = error.message || ''; + + if (error.name === 'TypeError' && message.includes('fetch')) { + errorMessage = 'Server is not running or unreachable'; + isOfflineLikeError = true; + } else if (message.includes('ECONNREFUSED')) { + errorMessage = 'Connection refused - server may be offline'; + isOfflineLikeError = true; + } else if (message.includes('ENOTFOUND')) { + errorMessage = 'Server not found - check server address'; + isOfflineLikeError = true; + } else if (message.includes('ETIMEDOUT')) { + errorMessage = 'Request timed out - the server took too long to respond'; + isOfflineLikeError = true; + } else if (message.includes('503')) { + errorMessage = 'Server temporarily unavailable - try again shortly'; + isServerSideError = true; + } else if (message.includes('500')) { + errorMessage = 'Server error - check server logs'; + isServerSideError = true; + } else if (message.includes('404')) { + errorMessage = 'Server endpoint not found'; + } else if (message.includes('403') || message.includes('401')) { + errorMessage = 'Access denied'; + } + } + + return { errorMessage, isOfflineLikeError, isServerSideError }; } /** @@ -264,6 +309,7 @@ class ServerStore { this._serverWarning = null; this._loading = false; this._slotsEndpointAvailable = null; + this.fetchServerPropsPromise = null; this.persistServerProps(null); } } diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 6d76ab1f68..6ebc43db0e 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest { } export interface ApiChatCompletionStreamChunk { + object?: string; model?: string; choices: Array<{ model?: string; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 659fb0c7d1..946ef015e9 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions { onChunk?: (chunk: string) => void; onReasoningChunk?: (chunk: string) => void; onModel?: (model: string) => void; + onFirstValidChunk?: () => void; onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void; onError?: (error: Error) => void; }