mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
webui: auto-refresh /props on inference start to resync model metadata (#16784)
* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
This commit is contained in:
Binary file not shown.
@@ -85,8 +85,8 @@
|
|||||||
let displayedModel = $derived((): string | null => {
|
let displayedModel = $derived((): string | null => {
|
||||||
if (!currentConfig.showModelInfo) return null;
|
if (!currentConfig.showModelInfo) return null;
|
||||||
|
|
||||||
if (currentConfig.modelSelectorEnabled) {
|
if (message.model) {
|
||||||
return message.model ?? null;
|
return message.model;
|
||||||
}
|
}
|
||||||
|
|
||||||
return serverModel;
|
return serverModel;
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ export class ChatService {
|
|||||||
onError,
|
onError,
|
||||||
onReasoningChunk,
|
onReasoningChunk,
|
||||||
onModel,
|
onModel,
|
||||||
|
onFirstValidChunk,
|
||||||
// Generation parameters
|
// Generation parameters
|
||||||
temperature,
|
temperature,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
@@ -201,6 +202,7 @@ export class ChatService {
|
|||||||
onError,
|
onError,
|
||||||
onReasoningChunk,
|
onReasoningChunk,
|
||||||
onModel,
|
onModel,
|
||||||
|
onFirstValidChunk,
|
||||||
conversationId,
|
conversationId,
|
||||||
abortController.signal
|
abortController.signal
|
||||||
);
|
);
|
||||||
@@ -267,6 +269,7 @@ export class ChatService {
|
|||||||
onError?: (error: Error) => void,
|
onError?: (error: Error) => void,
|
||||||
onReasoningChunk?: (chunk: string) => void,
|
onReasoningChunk?: (chunk: string) => void,
|
||||||
onModel?: (model: string) => void,
|
onModel?: (model: string) => void,
|
||||||
|
onFirstValidChunk?: () => void,
|
||||||
conversationId?: string,
|
conversationId?: string,
|
||||||
abortSignal?: AbortSignal
|
abortSignal?: AbortSignal
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
@@ -283,6 +286,7 @@ export class ChatService {
|
|||||||
let lastTimings: ChatMessageTimings | undefined;
|
let lastTimings: ChatMessageTimings | undefined;
|
||||||
let streamFinished = false;
|
let streamFinished = false;
|
||||||
let modelEmitted = false;
|
let modelEmitted = false;
|
||||||
|
let firstValidChunkEmitted = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let chunk = '';
|
let chunk = '';
|
||||||
@@ -311,10 +315,12 @@ export class ChatService {
|
|||||||
try {
|
try {
|
||||||
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
|
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
|
||||||
|
|
||||||
const chunkModel = this.extractModelName(parsed);
|
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
|
||||||
if (chunkModel && !modelEmitted) {
|
firstValidChunkEmitted = true;
|
||||||
modelEmitted = true;
|
|
||||||
onModel?.(chunkModel);
|
if (!abortSignal?.aborted) {
|
||||||
|
onFirstValidChunk?.();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = parsed.choices[0]?.delta?.content;
|
const content = parsed.choices[0]?.delta?.content;
|
||||||
@@ -322,6 +328,12 @@ export class ChatService {
|
|||||||
const timings = parsed.timings;
|
const timings = parsed.timings;
|
||||||
const promptProgress = parsed.prompt_progress;
|
const promptProgress = parsed.prompt_progress;
|
||||||
|
|
||||||
|
const chunkModel = this.extractModelName(parsed);
|
||||||
|
if (chunkModel && !modelEmitted) {
|
||||||
|
modelEmitted = true;
|
||||||
|
onModel?.(chunkModel);
|
||||||
|
}
|
||||||
|
|
||||||
if (timings || promptProgress) {
|
if (timings || promptProgress) {
|
||||||
this.updateProcessingState(timings, promptProgress, conversationId);
|
this.updateProcessingState(timings, promptProgress, conversationId);
|
||||||
if (timings) {
|
if (timings) {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { DatabaseStore } from '$lib/stores/database';
|
import { DatabaseStore } from '$lib/stores/database';
|
||||||
import { chatService, slotsService } from '$lib/services';
|
import { chatService, slotsService } from '$lib/services';
|
||||||
import { config } from '$lib/stores/settings.svelte';
|
import { config } from '$lib/stores/settings.svelte';
|
||||||
|
import { serverStore } from '$lib/stores/server.svelte';
|
||||||
import { normalizeModelName } from '$lib/utils/model-names';
|
import { normalizeModelName } from '$lib/utils/model-names';
|
||||||
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
|
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
|
||||||
import { browser } from '$app/environment';
|
import { browser } from '$app/environment';
|
||||||
@@ -362,9 +363,41 @@ class ChatStore {
|
|||||||
|
|
||||||
let resolvedModel: string | null = null;
|
let resolvedModel: string | null = null;
|
||||||
let modelPersisted = false;
|
let modelPersisted = false;
|
||||||
|
const currentConfig = config();
|
||||||
|
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
|
||||||
|
let serverPropsRefreshed = false;
|
||||||
|
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
|
||||||
|
|
||||||
const recordModel = (modelName: string, persistImmediately = true): void => {
|
const refreshServerPropsOnce = () => {
|
||||||
const normalizedModel = normalizeModelName(modelName);
|
if (serverPropsRefreshed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
serverPropsRefreshed = true;
|
||||||
|
|
||||||
|
const hasExistingProps = serverStore.serverProps !== null;
|
||||||
|
|
||||||
|
serverStore
|
||||||
|
.fetchServerProps({ silent: hasExistingProps })
|
||||||
|
.then(() => {
|
||||||
|
updateModelFromServerProps?.(true);
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
console.warn('Failed to refresh server props after streaming started:', error);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
|
||||||
|
const serverModelName = serverStore.modelName;
|
||||||
|
const preferredModelSource = preferServerPropsModel
|
||||||
|
? (serverModelName ?? modelName ?? null)
|
||||||
|
: (modelName ?? serverModelName ?? null);
|
||||||
|
|
||||||
|
if (!preferredModelSource) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizedModel = normalizeModelName(preferredModelSource);
|
||||||
|
|
||||||
if (!normalizedModel || normalizedModel === resolvedModel) {
|
if (!normalizedModel || normalizedModel === resolvedModel) {
|
||||||
return;
|
return;
|
||||||
@@ -388,6 +421,20 @@ class ChatStore {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (preferServerPropsModel) {
|
||||||
|
updateModelFromServerProps = (persistImmediately = true) => {
|
||||||
|
const currentServerModel = serverStore.modelName;
|
||||||
|
|
||||||
|
if (!currentServerModel) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
recordModel(currentServerModel, persistImmediately);
|
||||||
|
};
|
||||||
|
|
||||||
|
updateModelFromServerProps(false);
|
||||||
|
}
|
||||||
|
|
||||||
slotsService.startStreaming();
|
slotsService.startStreaming();
|
||||||
slotsService.setActiveConversation(assistantMessage.convId);
|
slotsService.setActiveConversation(assistantMessage.convId);
|
||||||
|
|
||||||
@@ -396,6 +443,9 @@ class ChatStore {
|
|||||||
{
|
{
|
||||||
...this.getApiOptions(),
|
...this.getApiOptions(),
|
||||||
|
|
||||||
|
onFirstValidChunk: () => {
|
||||||
|
refreshServerPropsOnce();
|
||||||
|
},
|
||||||
onChunk: (chunk: string) => {
|
onChunk: (chunk: string) => {
|
||||||
streamedContent += chunk;
|
streamedContent += chunk;
|
||||||
this.setConversationStreaming(
|
this.setConversationStreaming(
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ class ServerStore {
|
|||||||
private _error = $state<string | null>(null);
|
private _error = $state<string | null>(null);
|
||||||
private _serverWarning = $state<string | null>(null);
|
private _serverWarning = $state<string | null>(null);
|
||||||
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
||||||
|
private fetchServerPropsPromise: Promise<void> | null = null;
|
||||||
|
|
||||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||||
if (!browser) return null;
|
if (!browser) return null;
|
||||||
@@ -171,57 +172,63 @@ class ServerStore {
|
|||||||
/**
|
/**
|
||||||
* Fetches server properties from the server
|
* Fetches server properties from the server
|
||||||
*/
|
*/
|
||||||
async fetchServerProps(): Promise<void> {
|
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
|
||||||
|
const { silent = false } = options;
|
||||||
|
const isSilent = silent && this._serverProps !== null;
|
||||||
|
|
||||||
|
if (this.fetchServerPropsPromise) {
|
||||||
|
return this.fetchServerPropsPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isSilent) {
|
||||||
this._loading = true;
|
this._loading = true;
|
||||||
this._error = null;
|
this._error = null;
|
||||||
this._serverWarning = null;
|
this._serverWarning = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hadProps = this._serverProps !== null;
|
||||||
|
|
||||||
|
const fetchPromise = (async () => {
|
||||||
try {
|
try {
|
||||||
console.log('Fetching server properties...');
|
|
||||||
const props = await ChatService.getServerProps();
|
const props = await ChatService.getServerProps();
|
||||||
this._serverProps = props;
|
this._serverProps = props;
|
||||||
this.persistServerProps(props);
|
this.persistServerProps(props);
|
||||||
console.log('Server properties loaded:', props);
|
this._error = null;
|
||||||
|
this._serverWarning = null;
|
||||||
// Check slots endpoint availability after server props are loaded
|
|
||||||
await this.checkSlotsEndpointAvailability();
|
await this.checkSlotsEndpointAvailability();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const hadCachedProps = this._serverProps !== null;
|
if (isSilent && hadProps) {
|
||||||
let errorMessage = 'Failed to connect to server';
|
console.warn('Silent server props refresh failed, keeping cached data:', error);
|
||||||
let isOfflineLikeError = false;
|
return;
|
||||||
let isServerSideError = false;
|
}
|
||||||
|
|
||||||
if (error instanceof Error) {
|
this.handleFetchServerPropsError(error, hadProps);
|
||||||
// Handle specific error types with user-friendly messages
|
} finally {
|
||||||
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
if (!isSilent) {
|
||||||
errorMessage = 'Server is not running or unreachable';
|
this._loading = false;
|
||||||
isOfflineLikeError = true;
|
|
||||||
} else if (error.message.includes('ECONNREFUSED')) {
|
|
||||||
errorMessage = 'Connection refused - server may be offline';
|
|
||||||
isOfflineLikeError = true;
|
|
||||||
} else if (error.message.includes('ENOTFOUND')) {
|
|
||||||
errorMessage = 'Server not found - check server address';
|
|
||||||
isOfflineLikeError = true;
|
|
||||||
} else if (error.message.includes('ETIMEDOUT')) {
|
|
||||||
errorMessage = 'Request timed out - the server took too long to respond';
|
|
||||||
isOfflineLikeError = true;
|
|
||||||
} else if (error.message.includes('503')) {
|
|
||||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
|
||||||
isServerSideError = true;
|
|
||||||
} else if (error.message.includes('500')) {
|
|
||||||
errorMessage = 'Server error - check server logs';
|
|
||||||
isServerSideError = true;
|
|
||||||
} else if (error.message.includes('404')) {
|
|
||||||
errorMessage = 'Server endpoint not found';
|
|
||||||
} else if (error.message.includes('403') || error.message.includes('401')) {
|
|
||||||
errorMessage = 'Access denied';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.fetchServerPropsPromise = null;
|
||||||
}
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
this.fetchServerPropsPromise = fetchPromise;
|
||||||
|
|
||||||
|
await fetchPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles fetch failures by attempting to recover cached server props and
|
||||||
|
* updating the user-facing error or warning state appropriately.
|
||||||
|
*/
|
||||||
|
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
|
||||||
|
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
|
||||||
|
|
||||||
let cachedProps: ApiLlamaCppServerProps | null = null;
|
let cachedProps: ApiLlamaCppServerProps | null = null;
|
||||||
|
|
||||||
if (!hadCachedProps) {
|
if (!hadProps) {
|
||||||
cachedProps = this.readCachedServerProps();
|
cachedProps = this.readCachedServerProps();
|
||||||
|
|
||||||
if (cachedProps) {
|
if (cachedProps) {
|
||||||
this._serverProps = cachedProps;
|
this._serverProps = cachedProps;
|
||||||
this._error = null;
|
this._error = null;
|
||||||
@@ -249,10 +256,48 @@ class ServerStore {
|
|||||||
errorMessage
|
errorMessage
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.error('Error fetching server properties:', error);
|
console.error('Error fetching server properties:', error);
|
||||||
} finally {
|
|
||||||
this._loading = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private normalizeFetchError(error: unknown): {
|
||||||
|
errorMessage: string;
|
||||||
|
isOfflineLikeError: boolean;
|
||||||
|
isServerSideError: boolean;
|
||||||
|
} {
|
||||||
|
let errorMessage = 'Failed to connect to server';
|
||||||
|
let isOfflineLikeError = false;
|
||||||
|
let isServerSideError = false;
|
||||||
|
|
||||||
|
if (error instanceof Error) {
|
||||||
|
const message = error.message || '';
|
||||||
|
|
||||||
|
if (error.name === 'TypeError' && message.includes('fetch')) {
|
||||||
|
errorMessage = 'Server is not running or unreachable';
|
||||||
|
isOfflineLikeError = true;
|
||||||
|
} else if (message.includes('ECONNREFUSED')) {
|
||||||
|
errorMessage = 'Connection refused - server may be offline';
|
||||||
|
isOfflineLikeError = true;
|
||||||
|
} else if (message.includes('ENOTFOUND')) {
|
||||||
|
errorMessage = 'Server not found - check server address';
|
||||||
|
isOfflineLikeError = true;
|
||||||
|
} else if (message.includes('ETIMEDOUT')) {
|
||||||
|
errorMessage = 'Request timed out - the server took too long to respond';
|
||||||
|
isOfflineLikeError = true;
|
||||||
|
} else if (message.includes('503')) {
|
||||||
|
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||||
|
isServerSideError = true;
|
||||||
|
} else if (message.includes('500')) {
|
||||||
|
errorMessage = 'Server error - check server logs';
|
||||||
|
isServerSideError = true;
|
||||||
|
} else if (message.includes('404')) {
|
||||||
|
errorMessage = 'Server endpoint not found';
|
||||||
|
} else if (message.includes('403') || message.includes('401')) {
|
||||||
|
errorMessage = 'Access denied';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { errorMessage, isOfflineLikeError, isServerSideError };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -264,6 +309,7 @@ class ServerStore {
|
|||||||
this._serverWarning = null;
|
this._serverWarning = null;
|
||||||
this._loading = false;
|
this._loading = false;
|
||||||
this._slotsEndpointAvailable = null;
|
this._slotsEndpointAvailable = null;
|
||||||
|
this.fetchServerPropsPromise = null;
|
||||||
this.persistServerProps(null);
|
this.persistServerProps(null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
1
tools/server/webui/src/lib/types/api.d.ts
vendored
1
tools/server/webui/src/lib/types/api.d.ts
vendored
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export interface ApiChatCompletionStreamChunk {
|
export interface ApiChatCompletionStreamChunk {
|
||||||
|
object?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
choices: Array<{
|
choices: Array<{
|
||||||
model?: string;
|
model?: string;
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
|
|||||||
onChunk?: (chunk: string) => void;
|
onChunk?: (chunk: string) => void;
|
||||||
onReasoningChunk?: (chunk: string) => void;
|
onReasoningChunk?: (chunk: string) => void;
|
||||||
onModel?: (model: string) => void;
|
onModel?: (model: string) => void;
|
||||||
|
onFirstValidChunk?: () => void;
|
||||||
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
|
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
|
||||||
onError?: (error: Error) => void;
|
onError?: (error: Error) => void;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user