mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
webui: remove client-side context pre-check and rely on backend for limits (#16506)
* fix: make SSE client robust to premature [DONE] in agentic proxy chains * webui: remove client-side context pre-check and rely on backend for limits Removed the client-side context window pre-check and now simply sends messages while keeping the dialog imports limited to core components, eliminating the maximum context alert path Simplified streaming and non-streaming chat error handling to surface a generic 'No response received from server' error whenever the backend returns no content Removed the obsolete maxContextError plumbing from the chat store so state management now focuses on the core message flow without special context-limit cases * webui: cosmetic rename of error messages * Update tools/server/webui/src/lib/stores/chat.svelte.ts Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/stores/chat.svelte.ts Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
Binary file not shown.
@@ -7,6 +7,7 @@
|
||||
ChatMessages,
|
||||
ChatProcessingInfo,
|
||||
EmptyFileAlertDialog,
|
||||
ChatErrorDialog,
|
||||
ServerErrorSplash,
|
||||
ServerInfo,
|
||||
ServerLoadingSplash,
|
||||
@@ -22,10 +23,11 @@
|
||||
activeMessages,
|
||||
activeConversation,
|
||||
deleteConversation,
|
||||
dismissErrorDialog,
|
||||
errorDialog,
|
||||
isLoading,
|
||||
sendMessage,
|
||||
stopGeneration,
|
||||
setMaxContextError
|
||||
stopGeneration
|
||||
} from '$lib/stores/chat.svelte';
|
||||
import {
|
||||
supportsVision,
|
||||
@@ -34,7 +36,6 @@
|
||||
serverWarning,
|
||||
serverStore
|
||||
} from '$lib/stores/server.svelte';
|
||||
import { contextService } from '$lib/services';
|
||||
import { parseFilesToMessageExtras } from '$lib/utils/convert-files-to-extra';
|
||||
import { isFileTypeSupported } from '$lib/utils/file-type';
|
||||
import { filterFilesByModalities } from '$lib/utils/modality-file-validation';
|
||||
@@ -79,6 +80,7 @@
|
||||
showCenteredEmpty && !activeConversation() && activeMessages().length === 0 && !isLoading()
|
||||
);
|
||||
|
||||
let activeErrorDialog = $derived(errorDialog());
|
||||
let isServerLoading = $derived(serverLoading());
|
||||
|
||||
async function handleDeleteConfirm() {
|
||||
@@ -105,6 +107,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
function handleErrorDialogOpenChange(open: boolean) {
|
||||
if (!open) {
|
||||
dismissErrorDialog();
|
||||
}
|
||||
}
|
||||
|
||||
function handleDragOver(event: DragEvent) {
|
||||
event.preventDefault();
|
||||
}
|
||||
@@ -183,21 +191,6 @@
|
||||
|
||||
const extras = result?.extras;
|
||||
|
||||
// Check context limit using real-time slots data
|
||||
const contextCheck = await contextService.checkContextLimit();
|
||||
|
||||
if (contextCheck && contextCheck.wouldExceed) {
|
||||
const errorMessage = contextService.getContextErrorMessage(contextCheck);
|
||||
|
||||
setMaxContextError({
|
||||
message: errorMessage,
|
||||
estimatedTokens: contextCheck.currentUsage,
|
||||
maxContext: contextCheck.maxContext
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enable autoscroll for user-initiated message sending
|
||||
userScrolledUp = false;
|
||||
autoScrollEnabled = true;
|
||||
@@ -461,6 +454,13 @@
|
||||
}}
|
||||
/>
|
||||
|
||||
<ChatErrorDialog
|
||||
message={activeErrorDialog?.message ?? ''}
|
||||
onOpenChange={handleErrorDialogOpenChange}
|
||||
open={Boolean(activeErrorDialog)}
|
||||
type={activeErrorDialog?.type ?? 'server'}
|
||||
/>
|
||||
|
||||
<style>
|
||||
.conversation-chat-form {
|
||||
position: relative;
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
<script lang="ts">
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import { AlertTriangle, TimerOff } from '@lucide/svelte';
|
||||
|
||||
interface Props {
|
||||
open: boolean;
|
||||
type: 'timeout' | 'server';
|
||||
message: string;
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
}
|
||||
|
||||
let { open = $bindable(), type, message, onOpenChange }: Props = $props();
|
||||
|
||||
const isTimeout = $derived(type === 'timeout');
|
||||
const title = $derived(isTimeout ? 'TCP Timeout' : 'Server Error');
|
||||
const description = $derived(
|
||||
isTimeout
|
||||
? 'The request did not receive a response from the server before timing out.'
|
||||
: 'The server responded with an error message. Review the details below.'
|
||||
);
|
||||
const iconClass = $derived(isTimeout ? 'text-destructive' : 'text-amber-500');
|
||||
const badgeClass = $derived(
|
||||
isTimeout
|
||||
? 'border-destructive/40 bg-destructive/10 text-destructive'
|
||||
: 'border-amber-500/40 bg-amber-500/10 text-amber-600 dark:text-amber-400'
|
||||
);
|
||||
|
||||
function handleOpenChange(newOpen: boolean) {
|
||||
open = newOpen;
|
||||
onOpenChange?.(newOpen);
|
||||
}
|
||||
</script>
|
||||
|
||||
<AlertDialog.Root {open} onOpenChange={handleOpenChange}>
|
||||
<AlertDialog.Content>
|
||||
<AlertDialog.Header>
|
||||
<AlertDialog.Title class="flex items-center gap-2">
|
||||
{#if isTimeout}
|
||||
<TimerOff class={`h-5 w-5 ${iconClass}`} />
|
||||
{:else}
|
||||
<AlertTriangle class={`h-5 w-5 ${iconClass}`} />
|
||||
{/if}
|
||||
|
||||
{title}
|
||||
</AlertDialog.Title>
|
||||
|
||||
<AlertDialog.Description>
|
||||
{description}
|
||||
</AlertDialog.Description>
|
||||
</AlertDialog.Header>
|
||||
|
||||
<div class={`rounded-lg border px-4 py-3 text-sm ${badgeClass}`}>
|
||||
<p class="font-medium">{message}</p>
|
||||
</div>
|
||||
|
||||
<AlertDialog.Footer>
|
||||
<AlertDialog.Action onclick={() => handleOpenChange(false)}>Close</AlertDialog.Action>
|
||||
</AlertDialog.Footer>
|
||||
</AlertDialog.Content>
|
||||
</AlertDialog.Root>
|
||||
@@ -1,66 +0,0 @@
|
||||
<script lang="ts">
|
||||
import { AlertTriangle } from '@lucide/svelte';
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import { maxContextError, clearMaxContextError } from '$lib/stores/chat.svelte';
|
||||
</script>
|
||||
|
||||
<AlertDialog.Root
|
||||
open={maxContextError() !== null}
|
||||
onOpenChange={(open) => !open && clearMaxContextError()}
|
||||
>
|
||||
<AlertDialog.Content>
|
||||
<AlertDialog.Header>
|
||||
<AlertDialog.Title class="flex items-center gap-2">
|
||||
<AlertTriangle class="h-5 w-5 text-destructive" />
|
||||
|
||||
Message Too Long
|
||||
</AlertDialog.Title>
|
||||
|
||||
<AlertDialog.Description>
|
||||
Your message exceeds the model's context window and cannot be processed.
|
||||
</AlertDialog.Description>
|
||||
</AlertDialog.Header>
|
||||
|
||||
{#if maxContextError()}
|
||||
<div class="space-y-3 text-sm">
|
||||
<div class="rounded-lg bg-muted p-3">
|
||||
<div class="mb-2 font-medium">Token Usage:</div>
|
||||
|
||||
<div class="space-y-1 text-muted-foreground">
|
||||
<div>
|
||||
Estimated tokens:
|
||||
|
||||
<span class="font-mono">
|
||||
{maxContextError()?.estimatedTokens.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
Context window:
|
||||
|
||||
<span class="font-mono">
|
||||
{maxContextError()?.maxContext.toLocaleString()}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="mb-2 font-medium">Suggestions:</div>
|
||||
|
||||
<ul class="list-inside list-disc space-y-1 text-muted-foreground">
|
||||
<li>Shorten your message</li>
|
||||
|
||||
<li>Remove some file attachments</li>
|
||||
|
||||
<li>Start a new conversation</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<AlertDialog.Footer>
|
||||
<AlertDialog.Action onclick={() => clearMaxContextError()}>Got it</AlertDialog.Action>
|
||||
</AlertDialog.Footer>
|
||||
</AlertDialog.Content>
|
||||
</AlertDialog.Root>
|
||||
@@ -30,12 +30,11 @@ export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte';
|
||||
export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte';
|
||||
export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte';
|
||||
|
||||
export { default as ChatErrorDialog } from './dialogs/ChatErrorDialog.svelte';
|
||||
export { default as EmptyFileAlertDialog } from './dialogs/EmptyFileAlertDialog.svelte';
|
||||
|
||||
export { default as ConversationTitleUpdateDialog } from './dialogs/ConversationTitleUpdateDialog.svelte';
|
||||
|
||||
export { default as MaximumContextAlertDialog } from './dialogs/MaximumContextAlertDialog.svelte';
|
||||
|
||||
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
|
||||
|
||||
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
||||
|
||||
@@ -13,7 +13,7 @@ import { slotsService } from './slots';
|
||||
* - Manages streaming and non-streaming response parsing
|
||||
* - Provides request abortion capabilities
|
||||
* - Converts database messages to API format
|
||||
* - Handles error translation and context detection
|
||||
* - Handles error translation for server responses
|
||||
*
|
||||
* - **ChatStore**: Stateful orchestration and UI state management
|
||||
* - Uses ChatService for all AI model communication
|
||||
@@ -26,7 +26,6 @@ import { slotsService } from './slots';
|
||||
* - Streaming response handling with real-time callbacks
|
||||
* - Reasoning content extraction and processing
|
||||
* - File attachment processing (images, PDFs, audio, text)
|
||||
* - Context error detection and reporting
|
||||
* - Request lifecycle management (abort, cleanup)
|
||||
*/
|
||||
export class ChatService {
|
||||
@@ -209,10 +208,13 @@ export class ChatService {
|
||||
userFriendlyError = new Error(
|
||||
'Unable to connect to server - please check if the server is running'
|
||||
);
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ECONNREFUSED')) {
|
||||
userFriendlyError = new Error('Connection refused - server may be offline');
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ETIMEDOUT')) {
|
||||
userFriendlyError = new Error('Request timeout - server may be overloaded');
|
||||
userFriendlyError = new Error('Request timed out - the server took too long to respond');
|
||||
userFriendlyError.name = 'TimeoutError';
|
||||
} else {
|
||||
userFriendlyError = error;
|
||||
}
|
||||
@@ -262,6 +264,7 @@ export class ChatService {
|
||||
let fullReasoningContent = '';
|
||||
let hasReceivedData = false;
|
||||
let lastTimings: ChatMessageTimings | undefined;
|
||||
let streamFinished = false;
|
||||
|
||||
try {
|
||||
let chunk = '';
|
||||
@@ -277,18 +280,8 @@ export class ChatService {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') {
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
return;
|
||||
}
|
||||
|
||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
||||
|
||||
return;
|
||||
streamFinished = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -326,13 +319,13 @@ export class ChatService {
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
return;
|
||||
if (streamFinished) {
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Stream error');
|
||||
@@ -368,12 +361,8 @@ export class ChatService {
|
||||
const responseText = await response.text();
|
||||
|
||||
if (!responseText.trim()) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
throw contextError;
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
const data: ApiChatCompletionResponse = JSON.parse(responseText);
|
||||
@@ -385,22 +374,14 @@ export class ChatService {
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
throw contextError;
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(content, reasoningContent);
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'ContextError') {
|
||||
throw error;
|
||||
}
|
||||
|
||||
const err = error instanceof Error ? error : new Error('Parse error');
|
||||
|
||||
onError?.(err);
|
||||
@@ -594,37 +575,19 @@ export class ChatService {
|
||||
const errorText = await response.text();
|
||||
const errorData: ApiErrorResponse = JSON.parse(errorText);
|
||||
|
||||
if (errorData.error?.type === 'exceed_context_size_error') {
|
||||
const contextError = errorData.error as ApiContextSizeError;
|
||||
const error = new Error(contextError.message);
|
||||
error.name = 'ContextError';
|
||||
// Attach structured context information
|
||||
(
|
||||
error as Error & {
|
||||
contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number };
|
||||
}
|
||||
).contextInfo = {
|
||||
promptTokens: contextError.n_prompt_tokens,
|
||||
maxContext: contextError.n_ctx,
|
||||
estimatedTokens: contextError.n_prompt_tokens
|
||||
};
|
||||
return error;
|
||||
}
|
||||
|
||||
// Fallback for other error types
|
||||
const message = errorData.error?.message || 'Unknown server error';
|
||||
return new Error(message);
|
||||
const error = new Error(message);
|
||||
error.name = response.status === 400 ? 'ServerError' : 'HttpError';
|
||||
|
||||
return error;
|
||||
} catch {
|
||||
// If we can't parse the error response, return a generic error
|
||||
return new Error(`Server error (${response.status}): ${response.statusText}`);
|
||||
const fallback = new Error(`Server error (${response.status}): ${response.statusText}`);
|
||||
fallback.name = 'HttpError';
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the processing state with timing information from the server response
|
||||
* @param timings - Timing data from the API response
|
||||
* @param promptProgress - Progress data from the API response
|
||||
*/
|
||||
private updateProcessingState(
|
||||
timings?: ChatMessageTimings,
|
||||
promptProgress?: ChatMessagePromptProgress
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
import { slotsService } from './slots';
|
||||
|
||||
export interface ContextCheckResult {
|
||||
wouldExceed: boolean;
|
||||
currentUsage: number;
|
||||
maxContext: number;
|
||||
availableTokens: number;
|
||||
reservedTokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* ContextService - Context window management and limit checking
|
||||
*
|
||||
* This service provides context window monitoring and limit checking using real-time
|
||||
* server data from the slots service. It helps prevent context overflow by tracking
|
||||
* current usage and calculating available space for new content.
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **ContextService** (this class): Context limit monitoring
|
||||
* - Uses SlotsService for real-time context usage data
|
||||
* - Calculates available tokens with configurable reserves
|
||||
* - Provides context limit checking and error messaging
|
||||
* - Helps prevent context window overflow
|
||||
*
|
||||
* - **SlotsService**: Provides current context usage from server slots
|
||||
* - **ChatStore**: Uses context checking before sending messages
|
||||
* - **UI Components**: Display context usage warnings and limits
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **Real-time Context Checking**: Uses live server data for accuracy
|
||||
* - **Token Reservation**: Reserves tokens for response generation
|
||||
* - **Limit Detection**: Prevents context window overflow
|
||||
* - **Usage Reporting**: Detailed context usage statistics
|
||||
* - **Error Messaging**: User-friendly context limit messages
|
||||
* - **Configurable Reserves**: Adjustable token reservation for responses
|
||||
*
|
||||
* **Context Management:**
|
||||
* - Monitors current context usage from active slots
|
||||
* - Calculates available space considering reserved tokens
|
||||
* - Provides early warning before context limits are reached
|
||||
* - Helps optimize conversation length and content
|
||||
*/
|
||||
export class ContextService {
|
||||
private reserveTokens: number;
|
||||
|
||||
constructor(reserveTokens = 512) {
|
||||
this.reserveTokens = reserveTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the context limit would be exceeded
|
||||
*
|
||||
* @returns {Promise<ContextCheckResult | null>} Promise that resolves to the context check result or null if an error occurs
|
||||
*/
|
||||
async checkContextLimit(): Promise<ContextCheckResult | null> {
|
||||
try {
|
||||
const currentState = await slotsService.getCurrentState();
|
||||
|
||||
if (!currentState) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const maxContext = currentState.contextTotal;
|
||||
const currentUsage = currentState.contextUsed;
|
||||
const availableTokens = maxContext - currentUsage - this.reserveTokens;
|
||||
const wouldExceed = availableTokens <= 0;
|
||||
|
||||
return {
|
||||
wouldExceed,
|
||||
currentUsage,
|
||||
maxContext,
|
||||
availableTokens: Math.max(0, availableTokens),
|
||||
reservedTokens: this.reserveTokens
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn('Error checking context limit:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a formatted error message for context limit exceeded
|
||||
*
|
||||
* @param {ContextCheckResult} result - Context check result
|
||||
* @returns {string} Formatted error message
|
||||
*/
|
||||
getContextErrorMessage(result: ContextCheckResult): string {
|
||||
const usagePercent = Math.round((result.currentUsage / result.maxContext) * 100);
|
||||
return `Context window is nearly full. Current usage: ${result.currentUsage.toLocaleString()}/${result.maxContext.toLocaleString()} tokens (${usagePercent}%). Available space: ${result.availableTokens.toLocaleString()} tokens (${result.reservedTokens} reserved for response).`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of tokens to reserve for response generation
|
||||
*
|
||||
* @param {number} tokens - Number of tokens to reserve
|
||||
*/
|
||||
setReserveTokens(tokens: number): void {
|
||||
this.reserveTokens = tokens;
|
||||
}
|
||||
}
|
||||
|
||||
export const contextService = new ContextService();
|
||||
@@ -1,3 +1,2 @@
|
||||
export { chatService } from './chat';
|
||||
export { contextService } from './context';
|
||||
export { slotsService } from './slots';
|
||||
|
||||
@@ -39,7 +39,6 @@ import type { ExportedConversations } from '$lib/types/database';
|
||||
* - Conversation branching for exploring different response paths
|
||||
* - Streaming AI responses with real-time content updates
|
||||
* - File attachment support (images, PDFs, text files, audio)
|
||||
* - Context window management with error recovery
|
||||
* - Partial response saving when generation is interrupted
|
||||
* - Message editing with automatic response regeneration
|
||||
*/
|
||||
@@ -48,11 +47,9 @@ class ChatStore {
|
||||
activeMessages = $state<DatabaseMessage[]>([]);
|
||||
conversations = $state<DatabaseConversation[]>([]);
|
||||
currentResponse = $state('');
|
||||
errorDialogState = $state<{ type: 'timeout' | 'server'; message: string } | null>(null);
|
||||
isInitialized = $state(false);
|
||||
isLoading = $state(false);
|
||||
maxContextError = $state<{ message: string; estimatedTokens: number; maxContext: number } | null>(
|
||||
null
|
||||
);
|
||||
titleUpdateConfirmationCallback?: (currentTitle: string, newTitle: string) => Promise<boolean>;
|
||||
|
||||
constructor() {
|
||||
@@ -69,8 +66,6 @@ class ChatStore {
|
||||
try {
|
||||
await this.loadConversations();
|
||||
|
||||
this.maxContextError = null;
|
||||
|
||||
this.isInitialized = true;
|
||||
} catch (error) {
|
||||
console.error('Failed to initialize chat store:', error);
|
||||
@@ -99,8 +94,6 @@ class ChatStore {
|
||||
this.activeConversation = conversation;
|
||||
this.activeMessages = [];
|
||||
|
||||
this.maxContextError = null;
|
||||
|
||||
await goto(`#/chat/${conversation.id}`);
|
||||
|
||||
return conversation.id;
|
||||
@@ -133,8 +126,6 @@ class ChatStore {
|
||||
this.activeMessages = await DatabaseStore.getConversationMessages(convId);
|
||||
}
|
||||
|
||||
this.maxContextError = null;
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Failed to load conversation:', error);
|
||||
@@ -418,56 +409,6 @@ class ChatStore {
|
||||
return;
|
||||
}
|
||||
|
||||
if (error.name === 'ContextError') {
|
||||
console.warn('Context error detected:', error.message);
|
||||
this.isLoading = false;
|
||||
this.currentResponse = '';
|
||||
|
||||
const messageIndex = this.activeMessages.findIndex(
|
||||
(m: DatabaseMessage) => m.id === assistantMessage.id
|
||||
);
|
||||
|
||||
if (messageIndex !== -1) {
|
||||
this.activeMessages.splice(messageIndex, 1);
|
||||
DatabaseStore.deleteMessage(assistantMessage.id).catch(console.error);
|
||||
}
|
||||
|
||||
// Use structured context info from new exceed_context_size_error format if available
|
||||
const contextInfo = (
|
||||
error as Error & {
|
||||
contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number };
|
||||
}
|
||||
).contextInfo;
|
||||
let estimatedTokens = 0;
|
||||
let maxContext = serverStore.serverProps?.default_generation_settings.n_ctx || 8192;
|
||||
|
||||
if (contextInfo) {
|
||||
// Use precise token counts from server response
|
||||
estimatedTokens = contextInfo.promptTokens;
|
||||
maxContext = contextInfo.maxContext;
|
||||
} else {
|
||||
// Fallback to estimation for older error format
|
||||
try {
|
||||
// Rough estimation: ~4 characters per token
|
||||
const messageContent = JSON.stringify(messages);
|
||||
estimatedTokens = Math.ceil(messageContent.length / 4);
|
||||
} catch {
|
||||
estimatedTokens = 0;
|
||||
}
|
||||
}
|
||||
|
||||
this.maxContextError = {
|
||||
message: error.message,
|
||||
estimatedTokens,
|
||||
maxContext
|
||||
};
|
||||
|
||||
if (onError) {
|
||||
onError(error);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
console.error('Streaming error:', error);
|
||||
this.isLoading = false;
|
||||
this.currentResponse = '';
|
||||
@@ -477,9 +418,19 @@ class ChatStore {
|
||||
);
|
||||
|
||||
if (messageIndex !== -1) {
|
||||
this.activeMessages[messageIndex].content = `Error: ${error.message}`;
|
||||
const [failedMessage] = this.activeMessages.splice(messageIndex, 1);
|
||||
|
||||
if (failedMessage) {
|
||||
DatabaseStore.deleteMessage(failedMessage.id).catch((cleanupError) => {
|
||||
console.error('Failed to remove assistant message after error:', cleanupError);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server';
|
||||
|
||||
this.showErrorDialog(dialogType, error.message);
|
||||
|
||||
if (onError) {
|
||||
onError(error);
|
||||
}
|
||||
@@ -487,6 +438,14 @@ class ChatStore {
|
||||
});
|
||||
}
|
||||
|
||||
private showErrorDialog(type: 'timeout' | 'server', message: string): void {
|
||||
this.errorDialogState = { type, message };
|
||||
}
|
||||
|
||||
dismissErrorDialog(): void {
|
||||
this.errorDialogState = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an error is an abort error (user cancelled operation)
|
||||
* @param error - The error to check
|
||||
@@ -574,6 +533,7 @@ class ChatStore {
|
||||
return;
|
||||
}
|
||||
|
||||
this.errorDialogState = null;
|
||||
this.isLoading = true;
|
||||
this.currentResponse = '';
|
||||
|
||||
@@ -603,37 +563,23 @@ class ChatStore {
|
||||
|
||||
const conversationContext = this.activeMessages.slice(0, -1);
|
||||
|
||||
await this.streamChatCompletion(
|
||||
conversationContext,
|
||||
assistantMessage,
|
||||
undefined,
|
||||
(error: Error) => {
|
||||
if (error.name === 'ContextError' && userMessage) {
|
||||
const userMessageIndex = this.findMessageIndex(userMessage.id);
|
||||
|
||||
if (userMessageIndex !== -1) {
|
||||
this.activeMessages.splice(userMessageIndex, 1);
|
||||
DatabaseStore.deleteMessage(userMessage.id).catch(console.error);
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
await this.streamChatCompletion(conversationContext, assistantMessage);
|
||||
} catch (error) {
|
||||
if (this.isAbortError(error)) {
|
||||
this.isLoading = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (error instanceof Error && error.name === 'ContextError' && userMessage) {
|
||||
const userMessageIndex = this.findMessageIndex(userMessage.id);
|
||||
if (userMessageIndex !== -1) {
|
||||
this.activeMessages.splice(userMessageIndex, 1);
|
||||
DatabaseStore.deleteMessage(userMessage.id).catch(console.error);
|
||||
}
|
||||
}
|
||||
|
||||
console.error('Failed to send message:', error);
|
||||
this.isLoading = false;
|
||||
if (!this.errorDialogState) {
|
||||
if (error instanceof Error) {
|
||||
const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server';
|
||||
this.showErrorDialog(dialogType, error.message);
|
||||
} else {
|
||||
this.showErrorDialog('server', 'Unknown error occurred while sending message');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -662,24 +608,6 @@ class ChatStore {
|
||||
this.currentResponse = '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the max context error state
|
||||
* Removes any displayed context limit warnings
|
||||
*/
|
||||
clearMaxContextError(): void {
|
||||
this.maxContextError = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the max context error state
|
||||
* @param error - The context error details or null to clear
|
||||
*/
|
||||
setMaxContextError(
|
||||
error: { message: string; estimatedTokens: number; maxContext: number } | null
|
||||
): void {
|
||||
this.maxContextError = error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves partial response if generation was interrupted
|
||||
* Preserves user's partial content and timing data when generation is stopped early
|
||||
@@ -1250,7 +1178,6 @@ class ChatStore {
|
||||
this.activeMessages = [];
|
||||
this.currentResponse = '';
|
||||
this.isLoading = false;
|
||||
this.maxContextError = null;
|
||||
}
|
||||
|
||||
/** Refreshes active messages based on currNode after branch navigation */
|
||||
@@ -1538,6 +1465,7 @@ class ChatStore {
|
||||
private async generateResponseForMessage(userMessageId: string): Promise<void> {
|
||||
if (!this.activeConversation) return;
|
||||
|
||||
this.errorDialogState = null;
|
||||
this.isLoading = true;
|
||||
this.currentResponse = '';
|
||||
|
||||
@@ -1584,7 +1512,7 @@ export const activeMessages = () => chatStore.activeMessages;
|
||||
export const isLoading = () => chatStore.isLoading;
|
||||
export const currentResponse = () => chatStore.currentResponse;
|
||||
export const isInitialized = () => chatStore.isInitialized;
|
||||
export const maxContextError = () => chatStore.maxContextError;
|
||||
export const errorDialog = () => chatStore.errorDialogState;
|
||||
|
||||
export const createConversation = chatStore.createConversation.bind(chatStore);
|
||||
export const downloadConversation = chatStore.downloadConversation.bind(chatStore);
|
||||
@@ -1592,9 +1520,9 @@ export const exportAllConversations = chatStore.exportAllConversations.bind(chat
|
||||
export const importConversations = chatStore.importConversations.bind(chatStore);
|
||||
export const deleteConversation = chatStore.deleteConversation.bind(chatStore);
|
||||
export const sendMessage = chatStore.sendMessage.bind(chatStore);
|
||||
export const dismissErrorDialog = chatStore.dismissErrorDialog.bind(chatStore);
|
||||
|
||||
export const gracefulStop = chatStore.gracefulStop.bind(chatStore);
|
||||
export const clearMaxContextError = chatStore.clearMaxContextError.bind(chatStore);
|
||||
export const setMaxContextError = chatStore.setMaxContextError.bind(chatStore);
|
||||
|
||||
// Branching operations
|
||||
export const refreshActiveMessages = chatStore.refreshActiveMessages.bind(chatStore);
|
||||
|
||||
@@ -197,7 +197,7 @@ class ServerStore {
|
||||
errorMessage = 'Server not found - check server address';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('ETIMEDOUT')) {
|
||||
errorMessage = 'Connection timeout - server may be overloaded';
|
||||
errorMessage = 'Request timed out - the server took too long to respond';
|
||||
isOfflineLikeError = true;
|
||||
} else if (error.message.includes('503')) {
|
||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
<script lang="ts">
|
||||
import '../app.css';
|
||||
import { page } from '$app/state';
|
||||
import {
|
||||
ChatSidebar,
|
||||
ConversationTitleUpdateDialog,
|
||||
MaximumContextAlertDialog
|
||||
} from '$lib/components/app';
|
||||
import { ChatSidebar, ConversationTitleUpdateDialog } from '$lib/components/app';
|
||||
import {
|
||||
activeMessages,
|
||||
isLoading,
|
||||
@@ -145,8 +141,6 @@
|
||||
|
||||
<Toaster richColors />
|
||||
|
||||
<MaximumContextAlertDialog />
|
||||
|
||||
<ConversationTitleUpdateDialog
|
||||
bind:open={titleUpdateDialogOpen}
|
||||
currentTitle={titleUpdateCurrentTitle}
|
||||
|
||||
Reference in New Issue
Block a user