diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 501fa455a2..b71690cc81 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
index 41774c6f87..d8f5630fd1 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -85,8 +85,8 @@
let displayedModel = $derived((): string | null => {
if (!currentConfig.showModelInfo) return null;
- if (currentConfig.modelSelectorEnabled) {
- return message.model ?? null;
+ if (message.model) {
+ return message.model;
}
return serverModel;
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index df03b10251..414e060764 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -54,6 +54,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
+ onFirstValidChunk,
// Generation parameters
temperature,
max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
+ onFirstValidChunk,
conversationId,
abortController.signal
);
@@ -267,6 +269,7 @@ export class ChatService {
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
+ onFirstValidChunk?: () => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise {
@@ -283,6 +286,7 @@ export class ChatService {
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
+ let firstValidChunkEmitted = false;
try {
let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
try {
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
- const chunkModel = this.extractModelName(parsed);
- if (chunkModel && !modelEmitted) {
- modelEmitted = true;
- onModel?.(chunkModel);
+ if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
+ firstValidChunkEmitted = true;
+
+ if (!abortSignal?.aborted) {
+ onFirstValidChunk?.();
+ }
}
const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;
+ const chunkModel = this.extractModelName(parsed);
+ if (chunkModel && !modelEmitted) {
+ modelEmitted = true;
+ onModel?.(chunkModel);
+ }
+
if (timings || promptProgress) {
this.updateProcessingState(timings, promptProgress, conversationId);
if (timings) {
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index a2e74a2e10..3f97a89183 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1,6 +1,7 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
import { config } from '$lib/stores/settings.svelte';
+import { serverStore } from '$lib/stores/server.svelte';
import { normalizeModelName } from '$lib/utils/model-names';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
let resolvedModel: string | null = null;
let modelPersisted = false;
+ const currentConfig = config();
+ const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
+ let serverPropsRefreshed = false;
+ let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
- const recordModel = (modelName: string, persistImmediately = true): void => {
- const normalizedModel = normalizeModelName(modelName);
+ const refreshServerPropsOnce = () => {
+ if (serverPropsRefreshed) {
+ return;
+ }
+
+ serverPropsRefreshed = true;
+
+ const hasExistingProps = serverStore.serverProps !== null;
+
+ serverStore
+ .fetchServerProps({ silent: hasExistingProps })
+ .then(() => {
+ updateModelFromServerProps?.(true);
+ })
+ .catch((error) => {
+ console.warn('Failed to refresh server props after streaming started:', error);
+ });
+ };
+
+ const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
+ const serverModelName = serverStore.modelName;
+ const preferredModelSource = preferServerPropsModel
+ ? (serverModelName ?? modelName ?? null)
+ : (modelName ?? serverModelName ?? null);
+
+ if (!preferredModelSource) {
+ return;
+ }
+
+ const normalizedModel = normalizeModelName(preferredModelSource);
if (!normalizedModel || normalizedModel === resolvedModel) {
return;
@@ -388,6 +421,20 @@ class ChatStore {
}
};
+ if (preferServerPropsModel) {
+ updateModelFromServerProps = (persistImmediately = true) => {
+ const currentServerModel = serverStore.modelName;
+
+ if (!currentServerModel) {
+ return;
+ }
+
+ recordModel(currentServerModel, persistImmediately);
+ };
+
+ updateModelFromServerProps(false);
+ }
+
slotsService.startStreaming();
slotsService.setActiveConversation(assistantMessage.convId);
@@ -396,6 +443,9 @@ class ChatStore {
{
...this.getApiOptions(),
+ onFirstValidChunk: () => {
+ refreshServerPropsOnce();
+ },
onChunk: (chunk: string) => {
streamedContent += chunk;
this.setConversationStreaming(
diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts
index 1fd4afb040..c7056cc120 100644
--- a/tools/server/webui/src/lib/stores/server.svelte.ts
+++ b/tools/server/webui/src/lib/stores/server.svelte.ts
@@ -52,6 +52,7 @@ class ServerStore {
private _error = $state(null);
private _serverWarning = $state(null);
private _slotsEndpointAvailable = $state(null);
+ private fetchServerPropsPromise: Promise | null = null;
private readCachedServerProps(): ApiLlamaCppServerProps | null {
if (!browser) return null;
@@ -171,73 +172,65 @@ class ServerStore {
/**
* Fetches server properties from the server
*/
- async fetchServerProps(): Promise {
- this._loading = true;
- this._error = null;
- this._serverWarning = null;
+ async fetchServerProps(options: { silent?: boolean } = {}): Promise {
+ const { silent = false } = options;
+ const isSilent = silent && this._serverProps !== null;
- try {
- console.log('Fetching server properties...');
- const props = await ChatService.getServerProps();
- this._serverProps = props;
- this.persistServerProps(props);
- console.log('Server properties loaded:', props);
+ if (this.fetchServerPropsPromise) {
+ return this.fetchServerPropsPromise;
+ }
- // Check slots endpoint availability after server props are loaded
- await this.checkSlotsEndpointAvailability();
- } catch (error) {
- const hadCachedProps = this._serverProps !== null;
- let errorMessage = 'Failed to connect to server';
- let isOfflineLikeError = false;
- let isServerSideError = false;
+ if (!isSilent) {
+ this._loading = true;
+ this._error = null;
+ this._serverWarning = null;
+ }
- if (error instanceof Error) {
- // Handle specific error types with user-friendly messages
- if (error.name === 'TypeError' && error.message.includes('fetch')) {
- errorMessage = 'Server is not running or unreachable';
- isOfflineLikeError = true;
- } else if (error.message.includes('ECONNREFUSED')) {
- errorMessage = 'Connection refused - server may be offline';
- isOfflineLikeError = true;
- } else if (error.message.includes('ENOTFOUND')) {
- errorMessage = 'Server not found - check server address';
- isOfflineLikeError = true;
- } else if (error.message.includes('ETIMEDOUT')) {
- errorMessage = 'Request timed out - the server took too long to respond';
- isOfflineLikeError = true;
- } else if (error.message.includes('503')) {
- errorMessage = 'Server temporarily unavailable - try again shortly';
- isServerSideError = true;
- } else if (error.message.includes('500')) {
- errorMessage = 'Server error - check server logs';
- isServerSideError = true;
- } else if (error.message.includes('404')) {
- errorMessage = 'Server endpoint not found';
- } else if (error.message.includes('403') || error.message.includes('401')) {
- errorMessage = 'Access denied';
+ const hadProps = this._serverProps !== null;
+
+ const fetchPromise = (async () => {
+ try {
+ const props = await ChatService.getServerProps();
+ this._serverProps = props;
+ this.persistServerProps(props);
+ this._error = null;
+ this._serverWarning = null;
+ await this.checkSlotsEndpointAvailability();
+ } catch (error) {
+ if (isSilent && hadProps) {
+ console.warn('Silent server props refresh failed, keeping cached data:', error);
+ return;
}
+
+ this.handleFetchServerPropsError(error, hadProps);
+ } finally {
+ if (!isSilent) {
+ this._loading = false;
+ }
+
+ this.fetchServerPropsPromise = null;
}
+ })();
- let cachedProps: ApiLlamaCppServerProps | null = null;
+ this.fetchServerPropsPromise = fetchPromise;
- if (!hadCachedProps) {
- cachedProps = this.readCachedServerProps();
- if (cachedProps) {
- this._serverProps = cachedProps;
- this._error = null;
+ await fetchPromise;
+ }
- if (isOfflineLikeError || isServerSideError) {
- this._serverWarning = errorMessage;
- }
+ /**
+ * Handles fetch failures by attempting to recover cached server props and
+ * updating the user-facing error or warning state appropriately.
+ */
+ private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
+ const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
- console.warn(
- 'Failed to refresh server properties, using cached values from localStorage:',
- errorMessage
- );
- } else {
- this._error = errorMessage;
- }
- } else {
+ let cachedProps: ApiLlamaCppServerProps | null = null;
+
+ if (!hadProps) {
+ cachedProps = this.readCachedServerProps();
+
+ if (cachedProps) {
+ this._serverProps = cachedProps;
this._error = null;
if (isOfflineLikeError || isServerSideError) {
@@ -245,14 +238,66 @@ class ServerStore {
}
console.warn(
- 'Failed to refresh server properties, continuing with cached values:',
+ 'Failed to refresh server properties, using cached values from localStorage:',
errorMessage
);
+ } else {
+ this._error = errorMessage;
}
- console.error('Error fetching server properties:', error);
- } finally {
- this._loading = false;
+ } else {
+ this._error = null;
+
+ if (isOfflineLikeError || isServerSideError) {
+ this._serverWarning = errorMessage;
+ }
+
+ console.warn(
+ 'Failed to refresh server properties, continuing with cached values:',
+ errorMessage
+ );
}
+
+ console.error('Error fetching server properties:', error);
+ }
+
+ private normalizeFetchError(error: unknown): {
+ errorMessage: string;
+ isOfflineLikeError: boolean;
+ isServerSideError: boolean;
+ } {
+ let errorMessage = 'Failed to connect to server';
+ let isOfflineLikeError = false;
+ let isServerSideError = false;
+
+ if (error instanceof Error) {
+ const message = error.message || '';
+
+ if (error.name === 'TypeError' && message.includes('fetch')) {
+ errorMessage = 'Server is not running or unreachable';
+ isOfflineLikeError = true;
+ } else if (message.includes('ECONNREFUSED')) {
+ errorMessage = 'Connection refused - server may be offline';
+ isOfflineLikeError = true;
+ } else if (message.includes('ENOTFOUND')) {
+ errorMessage = 'Server not found - check server address';
+ isOfflineLikeError = true;
+ } else if (message.includes('ETIMEDOUT')) {
+ errorMessage = 'Request timed out - the server took too long to respond';
+ isOfflineLikeError = true;
+ } else if (message.includes('503')) {
+ errorMessage = 'Server temporarily unavailable - try again shortly';
+ isServerSideError = true;
+ } else if (message.includes('500')) {
+ errorMessage = 'Server error - check server logs';
+ isServerSideError = true;
+ } else if (message.includes('404')) {
+ errorMessage = 'Server endpoint not found';
+ } else if (message.includes('403') || message.includes('401')) {
+ errorMessage = 'Access denied';
+ }
+ }
+
+ return { errorMessage, isOfflineLikeError, isServerSideError };
}
/**
@@ -264,6 +309,7 @@ class ServerStore {
this._serverWarning = null;
this._loading = false;
this._slotsEndpointAvailable = null;
+ this.fetchServerPropsPromise = null;
this.persistServerProps(null);
}
}
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index 6d76ab1f68..6ebc43db0e 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
}
export interface ApiChatCompletionStreamChunk {
+ object?: string;
model?: string;
choices: Array<{
model?: string;
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 659fb0c7d1..946ef015e9 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
+ onFirstValidChunk?: () => void;
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
onError?: (error: Error) => void;
}