mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
webui: auto-refresh /props on inference start to resync model metadata (#16784)
* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
This commit is contained in:
@@ -52,6 +52,7 @@ class ServerStore {
|
||||
private _error = $state<string | null>(null);
|
||||
private _serverWarning = $state<string | null>(null);
|
||||
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
||||
private fetchServerPropsPromise: Promise<void> | null = null;
|
||||
|
||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||
if (!browser) return null;
|
||||
@@ -171,73 +172,65 @@ class ServerStore {
|
||||
/**
|
||||
* Fetches server properties from the server
|
||||
*/
|
||||
async fetchServerProps(): Promise<void> {
|
||||
this._loading = true;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
|
||||
const { silent = false } = options;
|
||||
const isSilent = silent && this._serverProps !== null;
|
||||
|
||||
try {
|
||||
console.log('Fetching server properties...');
|
||||
const props = await ChatService.getServerProps();
|
||||
this._serverProps = props;
|
||||
this.persistServerProps(props);
|
||||
console.log('Server properties loaded:', props);
|
||||
if (this.fetchServerPropsPromise) {
|
||||
return this.fetchServerPropsPromise;
|
||||
}
|
||||
|
||||
// Check slots endpoint availability after server props are loaded
|
||||
await this.checkSlotsEndpointAvailability();
|
||||
} catch (error) {
|
||||
const hadCachedProps = this._serverProps !== null;
|
||||
let errorMessage = 'Failed to connect to server';
|
||||
let isOfflineLikeError = false;
|
||||
let isServerSideError = false;
|
||||
if (!isSilent) {
|
||||
this._loading = true;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
}
|
||||
|
||||
if (error instanceof Error) {
|
||||
// Handle specific error types with user-friendly messages
|
||||
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
||||
errorMessage = 'Server is not running or unreachable';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('ECONNREFUSED')) {
|
||||
errorMessage = 'Connection refused - server may be offline';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('ENOTFOUND')) {
|
||||
errorMessage = 'Server not found - check server address';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('ETIMEDOUT')) {
|
||||
errorMessage = 'Request timed out - the server took too long to respond';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('503')) {
|
||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||
isServerSideError = true;
|
||||
} else if (error.message.includes('500')) {
|
||||
errorMessage = 'Server error - check server logs';
|
||||
isServerSideError = true;
|
||||
} else if (error.message.includes('404')) {
|
||||
errorMessage = 'Server endpoint not found';
|
||||
} else if (error.message.includes('403') || error.message.includes('401')) {
|
||||
errorMessage = 'Access denied';
|
||||
const hadProps = this._serverProps !== null;
|
||||
|
||||
const fetchPromise = (async () => {
|
||||
try {
|
||||
const props = await ChatService.getServerProps();
|
||||
this._serverProps = props;
|
||||
this.persistServerProps(props);
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
await this.checkSlotsEndpointAvailability();
|
||||
} catch (error) {
|
||||
if (isSilent && hadProps) {
|
||||
console.warn('Silent server props refresh failed, keeping cached data:', error);
|
||||
return;
|
||||
}
|
||||
|
||||
this.handleFetchServerPropsError(error, hadProps);
|
||||
} finally {
|
||||
if (!isSilent) {
|
||||
this._loading = false;
|
||||
}
|
||||
|
||||
this.fetchServerPropsPromise = null;
|
||||
}
|
||||
})();
|
||||
|
||||
let cachedProps: ApiLlamaCppServerProps | null = null;
|
||||
this.fetchServerPropsPromise = fetchPromise;
|
||||
|
||||
if (!hadCachedProps) {
|
||||
cachedProps = this.readCachedServerProps();
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
this._error = null;
|
||||
await fetchPromise;
|
||||
}
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
/**
|
||||
* Handles fetch failures by attempting to recover cached server props and
|
||||
* updating the user-facing error or warning state appropriately.
|
||||
*/
|
||||
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
|
||||
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, using cached values from localStorage:',
|
||||
errorMessage
|
||||
);
|
||||
} else {
|
||||
this._error = errorMessage;
|
||||
}
|
||||
} else {
|
||||
let cachedProps: ApiLlamaCppServerProps | null = null;
|
||||
|
||||
if (!hadProps) {
|
||||
cachedProps = this.readCachedServerProps();
|
||||
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
@@ -245,14 +238,66 @@ class ServerStore {
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, continuing with cached values:',
|
||||
'Failed to refresh server properties, using cached values from localStorage:',
|
||||
errorMessage
|
||||
);
|
||||
} else {
|
||||
this._error = errorMessage;
|
||||
}
|
||||
console.error('Error fetching server properties:', error);
|
||||
} finally {
|
||||
this._loading = false;
|
||||
} else {
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, continuing with cached values:',
|
||||
errorMessage
|
||||
);
|
||||
}
|
||||
|
||||
console.error('Error fetching server properties:', error);
|
||||
}
|
||||
|
||||
private normalizeFetchError(error: unknown): {
|
||||
errorMessage: string;
|
||||
isOfflineLikeError: boolean;
|
||||
isServerSideError: boolean;
|
||||
} {
|
||||
let errorMessage = 'Failed to connect to server';
|
||||
let isOfflineLikeError = false;
|
||||
let isServerSideError = false;
|
||||
|
||||
if (error instanceof Error) {
|
||||
const message = error.message || '';
|
||||
|
||||
if (error.name === 'TypeError' && message.includes('fetch')) {
|
||||
errorMessage = 'Server is not running or unreachable';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ECONNREFUSED')) {
|
||||
errorMessage = 'Connection refused - server may be offline';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ENOTFOUND')) {
|
||||
errorMessage = 'Server not found - check server address';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ETIMEDOUT')) {
|
||||
errorMessage = 'Request timed out - the server took too long to respond';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('503')) {
|
||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('500')) {
|
||||
errorMessage = 'Server error - check server logs';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('404')) {
|
||||
errorMessage = 'Server endpoint not found';
|
||||
} else if (message.includes('403') || message.includes('401')) {
|
||||
errorMessage = 'Access denied';
|
||||
}
|
||||
}
|
||||
|
||||
return { errorMessage, isOfflineLikeError, isServerSideError };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -264,6 +309,7 @@ class ServerStore {
|
||||
this._serverWarning = null;
|
||||
this._loading = false;
|
||||
this._slotsEndpointAvailable = null;
|
||||
this.fetchServerPropsPromise = null;
|
||||
this.persistServerProps(null);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user