mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-18 11:46:58 +00:00
webui: add OAI-Compat Harmony tool-call streaming visualization and persistence in chat UI (#16618)
* webui: add OAI-Compat Harmony tool-call live streaming visualization and persistence in chat UI - Purely visual and diagnostic change, no effect on model context, prompt construction, or inference behavior - Captured assistant tool call payloads during streaming and non-streaming completions, and persisted them in chat state and storage for downstream use - Exposed parsed tool call labels beneath the assistant's model info line with graceful fallback when parsing fails - Added tool call badges beneath assistant responses that expose JSON tooltips and copy their payloads when clicked, matching the existing model badge styling - Added a user-facing setting to toggle tool call visibility to the Developer settings section directly under the model selector option * webui: remove scroll listener causing unnecessary layout updates (model selector) * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * chore: npm run format & update webui build output * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
Binary file not shown.
@@ -72,12 +72,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleScroll() {
|
|
||||||
if (isOpen) {
|
|
||||||
updateMenuPosition();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleSelect(value: string | undefined) {
|
async function handleSelect(value: string | undefined) {
|
||||||
if (!value) return;
|
if (!value) return;
|
||||||
|
|
||||||
@@ -259,7 +253,7 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<svelte:window onresize={handleResize} onscroll={handleScroll} />
|
<svelte:window onresize={handleResize} />
|
||||||
|
|
||||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import { getDeletionInfo } from '$lib/stores/chat.svelte';
|
import { getDeletionInfo } from '$lib/stores/chat.svelte';
|
||||||
import { copyToClipboard } from '$lib/utils/copy';
|
import { copyToClipboard } from '$lib/utils/copy';
|
||||||
import { isIMEComposing } from '$lib/utils/is-ime-composing';
|
import { isIMEComposing } from '$lib/utils/is-ime-composing';
|
||||||
|
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||||
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
|
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
|
||||||
import ChatMessageUser from './ChatMessageUser.svelte';
|
import ChatMessageUser from './ChatMessageUser.svelte';
|
||||||
|
|
||||||
@@ -54,6 +55,29 @@
|
|||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let toolCallContent = $derived.by((): ApiChatCompletionToolCall[] | string | null => {
|
||||||
|
if (message.role === 'assistant') {
|
||||||
|
const trimmedToolCalls = message.toolCalls?.trim();
|
||||||
|
|
||||||
|
if (!trimmedToolCalls) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(trimmedToolCalls);
|
||||||
|
|
||||||
|
if (Array.isArray(parsed)) {
|
||||||
|
return parsed as ApiChatCompletionToolCall[];
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Harmony-only path: fall back to the raw string so issues surface visibly.
|
||||||
|
}
|
||||||
|
|
||||||
|
return trimmedToolCalls;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
function handleCancelEdit() {
|
function handleCancelEdit() {
|
||||||
isEditing = false;
|
isEditing = false;
|
||||||
editedContent = message.content;
|
editedContent = message.content;
|
||||||
@@ -171,5 +195,6 @@
|
|||||||
{showDeleteDialog}
|
{showDeleteDialog}
|
||||||
{siblingInfo}
|
{siblingInfo}
|
||||||
{thinkingContent}
|
{thinkingContent}
|
||||||
|
{toolCallContent}
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -11,7 +11,8 @@
|
|||||||
Gauge,
|
Gauge,
|
||||||
Clock,
|
Clock,
|
||||||
WholeWord,
|
WholeWord,
|
||||||
ChartNoAxesColumn
|
ChartNoAxesColumn,
|
||||||
|
Wrench
|
||||||
} from '@lucide/svelte';
|
} from '@lucide/svelte';
|
||||||
import { Button } from '$lib/components/ui/button';
|
import { Button } from '$lib/components/ui/button';
|
||||||
import { Checkbox } from '$lib/components/ui/checkbox';
|
import { Checkbox } from '$lib/components/ui/checkbox';
|
||||||
@@ -21,6 +22,7 @@
|
|||||||
import { config } from '$lib/stores/settings.svelte';
|
import { config } from '$lib/stores/settings.svelte';
|
||||||
import { modelName as serverModelName } from '$lib/stores/server.svelte';
|
import { modelName as serverModelName } from '$lib/stores/server.svelte';
|
||||||
import { copyToClipboard } from '$lib/utils/copy';
|
import { copyToClipboard } from '$lib/utils/copy';
|
||||||
|
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
class?: string;
|
class?: string;
|
||||||
@@ -51,6 +53,7 @@
|
|||||||
siblingInfo?: ChatMessageSiblingInfo | null;
|
siblingInfo?: ChatMessageSiblingInfo | null;
|
||||||
textareaElement?: HTMLTextAreaElement;
|
textareaElement?: HTMLTextAreaElement;
|
||||||
thinkingContent: string | null;
|
thinkingContent: string | null;
|
||||||
|
toolCallContent: ApiChatCompletionToolCall[] | string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
let {
|
let {
|
||||||
@@ -76,9 +79,15 @@
|
|||||||
shouldBranchAfterEdit = false,
|
shouldBranchAfterEdit = false,
|
||||||
siblingInfo = null,
|
siblingInfo = null,
|
||||||
textareaElement = $bindable(),
|
textareaElement = $bindable(),
|
||||||
thinkingContent
|
thinkingContent,
|
||||||
|
toolCallContent = null
|
||||||
}: Props = $props();
|
}: Props = $props();
|
||||||
|
|
||||||
|
const toolCalls = $derived(
|
||||||
|
Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null
|
||||||
|
);
|
||||||
|
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
|
||||||
|
|
||||||
const processingState = useProcessingState();
|
const processingState = useProcessingState();
|
||||||
let currentConfig = $derived(config());
|
let currentConfig = $derived(config());
|
||||||
let serverModel = $derived(serverModelName());
|
let serverModel = $derived(serverModelName());
|
||||||
@@ -97,6 +106,58 @@
|
|||||||
|
|
||||||
void copyToClipboard(model ?? '');
|
void copyToClipboard(model ?? '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
|
||||||
|
const callNumber = index + 1;
|
||||||
|
const functionName = toolCall.function?.name?.trim();
|
||||||
|
const label = functionName || `Call #${callNumber}`;
|
||||||
|
|
||||||
|
const payload: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
const id = toolCall.id?.trim();
|
||||||
|
if (id) {
|
||||||
|
payload.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
const type = toolCall.type?.trim();
|
||||||
|
if (type) {
|
||||||
|
payload.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (toolCall.function) {
|
||||||
|
const fnPayload: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
const name = toolCall.function.name?.trim();
|
||||||
|
if (name) {
|
||||||
|
fnPayload.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawArguments = toolCall.function.arguments?.trim();
|
||||||
|
if (rawArguments) {
|
||||||
|
try {
|
||||||
|
fnPayload.arguments = JSON.parse(rawArguments);
|
||||||
|
} catch {
|
||||||
|
fnPayload.arguments = rawArguments;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Object.keys(fnPayload).length > 0) {
|
||||||
|
payload.function = fnPayload;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const formattedPayload = JSON.stringify(payload, null, 2);
|
||||||
|
|
||||||
|
return {
|
||||||
|
label,
|
||||||
|
tooltip: formattedPayload,
|
||||||
|
copyValue: formattedPayload
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleCopyToolCall(payload: string) {
|
||||||
|
void copyToClipboard(payload, 'Tool call copied to clipboard');
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
@@ -189,6 +250,47 @@
|
|||||||
</span>
|
</span>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
{#if config().showToolCalls}
|
||||||
|
{#if (toolCalls && toolCalls.length > 0) || fallbackToolCalls}
|
||||||
|
<span class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
|
||||||
|
<span class="inline-flex items-center gap-1">
|
||||||
|
<Wrench class="h-3.5 w-3.5" />
|
||||||
|
|
||||||
|
<span>Tool calls:</span>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
{#if toolCalls && toolCalls.length > 0}
|
||||||
|
{#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)}
|
||||||
|
{@const badge = formatToolCallBadge(toolCall, index)}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="tool-call-badge inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||||
|
title={badge.tooltip}
|
||||||
|
aria-label={`Copy tool call ${badge.label}`}
|
||||||
|
onclick={() => handleCopyToolCall(badge.copyValue)}
|
||||||
|
>
|
||||||
|
{badge.label}
|
||||||
|
|
||||||
|
<Copy class="ml-1 h-3 w-3" />
|
||||||
|
</button>
|
||||||
|
{/each}
|
||||||
|
{:else if fallbackToolCalls}
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="tool-call-badge tool-call-badge--fallback inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||||
|
title={fallbackToolCalls}
|
||||||
|
aria-label="Copy tool call payload"
|
||||||
|
onclick={() => handleCopyToolCall(fallbackToolCalls)}
|
||||||
|
>
|
||||||
|
{fallbackToolCalls}
|
||||||
|
|
||||||
|
<Copy class="ml-1 h-3 w-3" />
|
||||||
|
</button>
|
||||||
|
{/if}
|
||||||
|
</span>
|
||||||
|
{/if}
|
||||||
|
{/if}
|
||||||
|
|
||||||
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
||||||
{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
|
{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
|
||||||
<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
|
<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
|
||||||
@@ -287,4 +389,17 @@
|
|||||||
white-space: pre-wrap;
|
white-space: pre-wrap;
|
||||||
word-break: break-word;
|
word-break: break-word;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.tool-call-badge {
|
||||||
|
max-width: 12rem;
|
||||||
|
white-space: nowrap;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tool-call-badge--fallback {
|
||||||
|
max-width: 20rem;
|
||||||
|
white-space: normal;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -226,6 +226,11 @@
|
|||||||
label: 'Enable model selector',
|
label: 'Enable model selector',
|
||||||
type: 'checkbox'
|
type: 'checkbox'
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
key: 'showToolCalls',
|
||||||
|
label: 'Show tool call labels',
|
||||||
|
type: 'checkbox'
|
||||||
|
},
|
||||||
{
|
{
|
||||||
key: 'disableReasoningFormat',
|
key: 'disableReasoningFormat',
|
||||||
label: 'Show raw LLM output',
|
label: 'Show raw LLM output',
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
|
|||||||
theme: 'system',
|
theme: 'system',
|
||||||
showTokensPerSecond: false,
|
showTokensPerSecond: false,
|
||||||
showThoughtInProgress: false,
|
showThoughtInProgress: false,
|
||||||
|
showToolCalls: false,
|
||||||
disableReasoningFormat: false,
|
disableReasoningFormat: false,
|
||||||
keepStatsVisible: false,
|
keepStatsVisible: false,
|
||||||
showMessageStats: true,
|
showMessageStats: true,
|
||||||
@@ -80,6 +81,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
|
|||||||
custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
|
custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
|
||||||
showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
|
showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
|
||||||
showThoughtInProgress: 'Expand thought process by default when generating messages.',
|
showThoughtInProgress: 'Expand thought process by default when generating messages.',
|
||||||
|
showToolCalls:
|
||||||
|
'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.',
|
||||||
disableReasoningFormat:
|
disableReasoningFormat:
|
||||||
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
|
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
|
||||||
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
|
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
|
||||||
|
|||||||
@@ -1,6 +1,25 @@
|
|||||||
import { config } from '$lib/stores/settings.svelte';
|
import { config } from '$lib/stores/settings.svelte';
|
||||||
import { selectedModelName } from '$lib/stores/models.svelte';
|
import { selectedModelName } from '$lib/stores/models.svelte';
|
||||||
import { slotsService } from './slots';
|
import { slotsService } from './slots';
|
||||||
|
import type {
|
||||||
|
ApiChatCompletionRequest,
|
||||||
|
ApiChatCompletionResponse,
|
||||||
|
ApiChatCompletionStreamChunk,
|
||||||
|
ApiChatCompletionToolCall,
|
||||||
|
ApiChatCompletionToolCallDelta,
|
||||||
|
ApiChatMessageData
|
||||||
|
} from '$lib/types/api';
|
||||||
|
import type {
|
||||||
|
DatabaseMessage,
|
||||||
|
DatabaseMessageExtra,
|
||||||
|
DatabaseMessageExtraAudioFile,
|
||||||
|
DatabaseMessageExtraImageFile,
|
||||||
|
DatabaseMessageExtraLegacyContext,
|
||||||
|
DatabaseMessageExtraPdfFile,
|
||||||
|
DatabaseMessageExtraTextFile
|
||||||
|
} from '$lib/types/database';
|
||||||
|
import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
|
||||||
|
import type { SettingsChatServiceOptions } from '$lib/types/settings';
|
||||||
/**
|
/**
|
||||||
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
||||||
*
|
*
|
||||||
@@ -53,6 +72,7 @@ export class ChatService {
|
|||||||
onComplete,
|
onComplete,
|
||||||
onError,
|
onError,
|
||||||
onReasoningChunk,
|
onReasoningChunk,
|
||||||
|
onToolCallChunk,
|
||||||
onModel,
|
onModel,
|
||||||
onFirstValidChunk,
|
onFirstValidChunk,
|
||||||
// Generation parameters
|
// Generation parameters
|
||||||
@@ -201,6 +221,7 @@ export class ChatService {
|
|||||||
onComplete,
|
onComplete,
|
||||||
onError,
|
onError,
|
||||||
onReasoningChunk,
|
onReasoningChunk,
|
||||||
|
onToolCallChunk,
|
||||||
onModel,
|
onModel,
|
||||||
onFirstValidChunk,
|
onFirstValidChunk,
|
||||||
conversationId,
|
conversationId,
|
||||||
@@ -208,7 +229,13 @@ export class ChatService {
|
|||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
return this.handleNonStreamResponse(response, onComplete, onError, onModel);
|
return this.handleNonStreamResponse(
|
||||||
|
response,
|
||||||
|
onComplete,
|
||||||
|
onError,
|
||||||
|
onToolCallChunk,
|
||||||
|
onModel
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof Error && error.name === 'AbortError') {
|
if (error instanceof Error && error.name === 'AbortError') {
|
||||||
@@ -264,10 +291,12 @@ export class ChatService {
|
|||||||
onComplete?: (
|
onComplete?: (
|
||||||
response: string,
|
response: string,
|
||||||
reasoningContent?: string,
|
reasoningContent?: string,
|
||||||
timings?: ChatMessageTimings
|
timings?: ChatMessageTimings,
|
||||||
|
toolCalls?: string
|
||||||
) => void,
|
) => void,
|
||||||
onError?: (error: Error) => void,
|
onError?: (error: Error) => void,
|
||||||
onReasoningChunk?: (chunk: string) => void,
|
onReasoningChunk?: (chunk: string) => void,
|
||||||
|
onToolCallChunk?: (chunk: string) => void,
|
||||||
onModel?: (model: string) => void,
|
onModel?: (model: string) => void,
|
||||||
onFirstValidChunk?: () => void,
|
onFirstValidChunk?: () => void,
|
||||||
conversationId?: string,
|
conversationId?: string,
|
||||||
@@ -282,11 +311,53 @@ export class ChatService {
|
|||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
let aggregatedContent = '';
|
let aggregatedContent = '';
|
||||||
let fullReasoningContent = '';
|
let fullReasoningContent = '';
|
||||||
|
let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
|
||||||
let hasReceivedData = false;
|
let hasReceivedData = false;
|
||||||
let lastTimings: ChatMessageTimings | undefined;
|
let lastTimings: ChatMessageTimings | undefined;
|
||||||
let streamFinished = false;
|
let streamFinished = false;
|
||||||
let modelEmitted = false;
|
let modelEmitted = false;
|
||||||
let firstValidChunkEmitted = false;
|
let firstValidChunkEmitted = false;
|
||||||
|
let toolCallIndexOffset = 0;
|
||||||
|
let hasOpenToolCallBatch = false;
|
||||||
|
|
||||||
|
const finalizeOpenToolCallBatch = () => {
|
||||||
|
if (!hasOpenToolCallBatch) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
toolCallIndexOffset = aggregatedToolCalls.length;
|
||||||
|
hasOpenToolCallBatch = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
|
||||||
|
if (!toolCalls || toolCalls.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
aggregatedToolCalls = this.mergeToolCallDeltas(
|
||||||
|
aggregatedToolCalls,
|
||||||
|
toolCalls,
|
||||||
|
toolCallIndexOffset
|
||||||
|
);
|
||||||
|
|
||||||
|
if (aggregatedToolCalls.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hasOpenToolCallBatch = true;
|
||||||
|
|
||||||
|
const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
|
||||||
|
|
||||||
|
if (!serializedToolCalls) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
hasReceivedData = true;
|
||||||
|
|
||||||
|
if (!abortSignal?.aborted) {
|
||||||
|
onToolCallChunk?.(serializedToolCalls);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let chunk = '';
|
let chunk = '';
|
||||||
@@ -325,6 +396,7 @@ export class ChatService {
|
|||||||
|
|
||||||
const content = parsed.choices[0]?.delta?.content;
|
const content = parsed.choices[0]?.delta?.content;
|
||||||
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
||||||
|
const toolCalls = parsed.choices[0]?.delta?.tool_calls;
|
||||||
const timings = parsed.timings;
|
const timings = parsed.timings;
|
||||||
const promptProgress = parsed.prompt_progress;
|
const promptProgress = parsed.prompt_progress;
|
||||||
|
|
||||||
@@ -342,6 +414,7 @@ export class ChatService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (content) {
|
if (content) {
|
||||||
|
finalizeOpenToolCallBatch();
|
||||||
hasReceivedData = true;
|
hasReceivedData = true;
|
||||||
aggregatedContent += content;
|
aggregatedContent += content;
|
||||||
if (!abortSignal?.aborted) {
|
if (!abortSignal?.aborted) {
|
||||||
@@ -350,12 +423,15 @@ export class ChatService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (reasoningContent) {
|
if (reasoningContent) {
|
||||||
|
finalizeOpenToolCallBatch();
|
||||||
hasReceivedData = true;
|
hasReceivedData = true;
|
||||||
fullReasoningContent += reasoningContent;
|
fullReasoningContent += reasoningContent;
|
||||||
if (!abortSignal?.aborted) {
|
if (!abortSignal?.aborted) {
|
||||||
onReasoningChunk?.(reasoningContent);
|
onReasoningChunk?.(reasoningContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
processToolCallDelta(toolCalls);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Error parsing JSON chunk:', e);
|
console.error('Error parsing JSON chunk:', e);
|
||||||
}
|
}
|
||||||
@@ -368,12 +444,26 @@ export class ChatService {
|
|||||||
if (abortSignal?.aborted) return;
|
if (abortSignal?.aborted) return;
|
||||||
|
|
||||||
if (streamFinished) {
|
if (streamFinished) {
|
||||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
finalizeOpenToolCallBatch();
|
||||||
|
|
||||||
|
if (
|
||||||
|
!hasReceivedData &&
|
||||||
|
aggregatedContent.length === 0 &&
|
||||||
|
aggregatedToolCalls.length === 0
|
||||||
|
) {
|
||||||
const noResponseError = new Error('No response received from server. Please try again.');
|
const noResponseError = new Error('No response received from server. Please try again.');
|
||||||
throw noResponseError;
|
throw noResponseError;
|
||||||
}
|
}
|
||||||
|
|
||||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
const finalToolCalls =
|
||||||
|
aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
|
||||||
|
|
||||||
|
onComplete?.(
|
||||||
|
aggregatedContent,
|
||||||
|
fullReasoningContent || undefined,
|
||||||
|
lastTimings,
|
||||||
|
finalToolCalls
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const err = error instanceof Error ? error : new Error('Stream error');
|
const err = error instanceof Error ? error : new Error('Stream error');
|
||||||
@@ -386,6 +476,54 @@ export class ChatService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private mergeToolCallDeltas(
|
||||||
|
existing: ApiChatCompletionToolCall[],
|
||||||
|
deltas: ApiChatCompletionToolCallDelta[],
|
||||||
|
indexOffset = 0
|
||||||
|
): ApiChatCompletionToolCall[] {
|
||||||
|
const result = existing.map((call) => ({
|
||||||
|
...call,
|
||||||
|
function: call.function ? { ...call.function } : undefined
|
||||||
|
}));
|
||||||
|
|
||||||
|
for (const delta of deltas) {
|
||||||
|
const index =
|
||||||
|
typeof delta.index === 'number' && delta.index >= 0
|
||||||
|
? delta.index + indexOffset
|
||||||
|
: result.length;
|
||||||
|
|
||||||
|
while (result.length <= index) {
|
||||||
|
result.push({ function: undefined });
|
||||||
|
}
|
||||||
|
|
||||||
|
const target = result[index]!;
|
||||||
|
|
||||||
|
if (delta.id) {
|
||||||
|
target.id = delta.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta.type) {
|
||||||
|
target.type = delta.type;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta.function) {
|
||||||
|
const fn = target.function ? { ...target.function } : {};
|
||||||
|
|
||||||
|
if (delta.function.name) {
|
||||||
|
fn.name = delta.function.name;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta.function.arguments) {
|
||||||
|
fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
target.function = fn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles non-streaming response from the chat completion API.
|
* Handles non-streaming response from the chat completion API.
|
||||||
* Parses the JSON response and extracts the generated content.
|
* Parses the JSON response and extracts the generated content.
|
||||||
@@ -401,9 +539,11 @@ export class ChatService {
|
|||||||
onComplete?: (
|
onComplete?: (
|
||||||
response: string,
|
response: string,
|
||||||
reasoningContent?: string,
|
reasoningContent?: string,
|
||||||
timings?: ChatMessageTimings
|
timings?: ChatMessageTimings,
|
||||||
|
toolCalls?: string
|
||||||
) => void,
|
) => void,
|
||||||
onError?: (error: Error) => void,
|
onError?: (error: Error) => void,
|
||||||
|
onToolCallChunk?: (chunk: string) => void,
|
||||||
onModel?: (model: string) => void
|
onModel?: (model: string) => void
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
try {
|
try {
|
||||||
@@ -423,17 +563,31 @@ export class ChatService {
|
|||||||
|
|
||||||
const content = data.choices[0]?.message?.content || '';
|
const content = data.choices[0]?.message?.content || '';
|
||||||
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
||||||
|
const toolCalls = data.choices[0]?.message?.tool_calls;
|
||||||
|
|
||||||
if (reasoningContent) {
|
if (reasoningContent) {
|
||||||
console.log('Full reasoning content:', reasoningContent);
|
console.log('Full reasoning content:', reasoningContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!content.trim()) {
|
let serializedToolCalls: string | undefined;
|
||||||
|
|
||||||
|
if (toolCalls && toolCalls.length > 0) {
|
||||||
|
const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
|
||||||
|
|
||||||
|
if (mergedToolCalls.length > 0) {
|
||||||
|
serializedToolCalls = JSON.stringify(mergedToolCalls);
|
||||||
|
if (serializedToolCalls) {
|
||||||
|
onToolCallChunk?.(serializedToolCalls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!content.trim() && !serializedToolCalls) {
|
||||||
const noResponseError = new Error('No response received from server. Please try again.');
|
const noResponseError = new Error('No response received from server. Please try again.');
|
||||||
throw noResponseError;
|
throw noResponseError;
|
||||||
}
|
}
|
||||||
|
|
||||||
onComplete?.(content, reasoningContent);
|
onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -205,6 +205,7 @@ class ChatStore {
|
|||||||
type,
|
type,
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
thinking: '',
|
thinking: '',
|
||||||
|
toolCalls: '',
|
||||||
children: [],
|
children: [],
|
||||||
extra: extras
|
extra: extras
|
||||||
},
|
},
|
||||||
@@ -360,6 +361,7 @@ class ChatStore {
|
|||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
let streamedContent = '';
|
let streamedContent = '';
|
||||||
let streamedReasoningContent = '';
|
let streamedReasoningContent = '';
|
||||||
|
let streamedToolCallContent = '';
|
||||||
|
|
||||||
let resolvedModel: string | null = null;
|
let resolvedModel: string | null = null;
|
||||||
let modelPersisted = false;
|
let modelPersisted = false;
|
||||||
@@ -468,6 +470,20 @@ class ChatStore {
|
|||||||
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
|
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
|
||||||
},
|
},
|
||||||
|
|
||||||
|
onToolCallChunk: (toolCallChunk: string) => {
|
||||||
|
const chunk = toolCallChunk.trim();
|
||||||
|
|
||||||
|
if (!chunk) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
streamedToolCallContent = chunk;
|
||||||
|
|
||||||
|
const messageIndex = this.findMessageIndex(assistantMessage.id);
|
||||||
|
|
||||||
|
this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent });
|
||||||
|
},
|
||||||
|
|
||||||
onModel: (modelName: string) => {
|
onModel: (modelName: string) => {
|
||||||
recordModel(modelName);
|
recordModel(modelName);
|
||||||
},
|
},
|
||||||
@@ -475,18 +491,21 @@ class ChatStore {
|
|||||||
onComplete: async (
|
onComplete: async (
|
||||||
finalContent?: string,
|
finalContent?: string,
|
||||||
reasoningContent?: string,
|
reasoningContent?: string,
|
||||||
timings?: ChatMessageTimings
|
timings?: ChatMessageTimings,
|
||||||
|
toolCallContent?: string
|
||||||
) => {
|
) => {
|
||||||
slotsService.stopStreaming();
|
slotsService.stopStreaming();
|
||||||
|
|
||||||
const updateData: {
|
const updateData: {
|
||||||
content: string;
|
content: string;
|
||||||
thinking: string;
|
thinking: string;
|
||||||
|
toolCalls: string;
|
||||||
timings?: ChatMessageTimings;
|
timings?: ChatMessageTimings;
|
||||||
model?: string;
|
model?: string;
|
||||||
} = {
|
} = {
|
||||||
content: finalContent || streamedContent,
|
content: finalContent || streamedContent,
|
||||||
thinking: reasoningContent || streamedReasoningContent,
|
thinking: reasoningContent || streamedReasoningContent,
|
||||||
|
toolCalls: toolCallContent || streamedToolCallContent,
|
||||||
timings: timings
|
timings: timings
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -499,7 +518,11 @@ class ChatStore {
|
|||||||
|
|
||||||
const messageIndex = this.findMessageIndex(assistantMessage.id);
|
const messageIndex = this.findMessageIndex(assistantMessage.id);
|
||||||
|
|
||||||
const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
|
const localUpdateData: {
|
||||||
|
timings?: ChatMessageTimings;
|
||||||
|
model?: string;
|
||||||
|
toolCalls?: string;
|
||||||
|
} = {
|
||||||
timings: timings
|
timings: timings
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -507,6 +530,10 @@ class ChatStore {
|
|||||||
localUpdateData.model = updateData.model;
|
localUpdateData.model = updateData.model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (updateData.toolCalls !== undefined) {
|
||||||
|
localUpdateData.toolCalls = updateData.toolCalls;
|
||||||
|
}
|
||||||
|
|
||||||
this.updateMessageAtIndex(messageIndex, localUpdateData);
|
this.updateMessageAtIndex(messageIndex, localUpdateData);
|
||||||
|
|
||||||
await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id);
|
await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id);
|
||||||
@@ -620,6 +647,7 @@ class ChatStore {
|
|||||||
content: '',
|
content: '',
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
thinking: '',
|
thinking: '',
|
||||||
|
toolCalls: '',
|
||||||
children: [],
|
children: [],
|
||||||
model: null
|
model: null
|
||||||
},
|
},
|
||||||
@@ -1443,6 +1471,7 @@ class ChatStore {
|
|||||||
role: messageToEdit.role,
|
role: messageToEdit.role,
|
||||||
content: newContent,
|
content: newContent,
|
||||||
thinking: messageToEdit.thinking || '',
|
thinking: messageToEdit.thinking || '',
|
||||||
|
toolCalls: messageToEdit.toolCalls || '',
|
||||||
children: [],
|
children: [],
|
||||||
model: messageToEdit.model // Preserve original model info when branching
|
model: messageToEdit.model // Preserve original model info when branching
|
||||||
},
|
},
|
||||||
@@ -1518,6 +1547,7 @@ class ChatStore {
|
|||||||
role: messageToEdit.role,
|
role: messageToEdit.role,
|
||||||
content: newContent,
|
content: newContent,
|
||||||
thinking: messageToEdit.thinking || '',
|
thinking: messageToEdit.thinking || '',
|
||||||
|
toolCalls: messageToEdit.toolCalls || '',
|
||||||
children: [],
|
children: [],
|
||||||
extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined,
|
extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined,
|
||||||
model: messageToEdit.model // Preserve original model info when branching
|
model: messageToEdit.model // Preserve original model info when branching
|
||||||
@@ -1589,6 +1619,7 @@ class ChatStore {
|
|||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: '',
|
content: '',
|
||||||
thinking: '',
|
thinking: '',
|
||||||
|
toolCalls: '',
|
||||||
children: [],
|
children: [],
|
||||||
model: null
|
model: null
|
||||||
},
|
},
|
||||||
@@ -1647,6 +1678,7 @@ class ChatStore {
|
|||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: '',
|
content: '',
|
||||||
thinking: '',
|
thinking: '',
|
||||||
|
toolCalls: '',
|
||||||
children: [],
|
children: [],
|
||||||
model: null
|
model: null
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ export class DatabaseStore {
|
|||||||
...message,
|
...message,
|
||||||
id: uuid(),
|
id: uuid(),
|
||||||
parent: parentId,
|
parent: parentId,
|
||||||
|
toolCalls: message.toolCalls ?? '',
|
||||||
children: []
|
children: []
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -154,6 +155,7 @@ export class DatabaseStore {
|
|||||||
content: '',
|
content: '',
|
||||||
parent: null,
|
parent: null,
|
||||||
thinking: '',
|
thinking: '',
|
||||||
|
toolCalls: '',
|
||||||
children: []
|
children: []
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
19
tools/server/webui/src/lib/types/api.d.ts
vendored
19
tools/server/webui/src/lib/types/api.d.ts
vendored
@@ -183,6 +183,23 @@ export interface ApiChatCompletionRequest {
|
|||||||
samplers?: string[];
|
samplers?: string[];
|
||||||
// Custom parameters (JSON string)
|
// Custom parameters (JSON string)
|
||||||
custom?: Record<string, unknown>;
|
custom?: Record<string, unknown>;
|
||||||
|
timings_per_token?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ApiChatCompletionToolCallFunctionDelta {
|
||||||
|
name?: string;
|
||||||
|
arguments?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ApiChatCompletionToolCallDelta {
|
||||||
|
index?: number;
|
||||||
|
id?: string;
|
||||||
|
type?: string;
|
||||||
|
function?: ApiChatCompletionToolCallFunctionDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta {
|
||||||
|
function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string };
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ApiChatCompletionStreamChunk {
|
export interface ApiChatCompletionStreamChunk {
|
||||||
@@ -195,6 +212,7 @@ export interface ApiChatCompletionStreamChunk {
|
|||||||
content?: string;
|
content?: string;
|
||||||
reasoning_content?: string;
|
reasoning_content?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
tool_calls?: ApiChatCompletionToolCallDelta[];
|
||||||
};
|
};
|
||||||
}>;
|
}>;
|
||||||
timings?: {
|
timings?: {
|
||||||
@@ -216,6 +234,7 @@ export interface ApiChatCompletionResponse {
|
|||||||
content: string;
|
content: string;
|
||||||
reasoning_content?: string;
|
reasoning_content?: string;
|
||||||
model?: string;
|
model?: string;
|
||||||
|
tool_calls?: ApiChatCompletionToolCallDelta[];
|
||||||
};
|
};
|
||||||
}>;
|
}>;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ export interface DatabaseMessage {
|
|||||||
content: string;
|
content: string;
|
||||||
parent: string;
|
parent: string;
|
||||||
thinking: string;
|
thinking: string;
|
||||||
|
toolCalls?: string;
|
||||||
children: string[];
|
children: string[];
|
||||||
extra?: DatabaseMessageExtra[];
|
extra?: DatabaseMessageExtra[];
|
||||||
timings?: ChatMessageTimings;
|
timings?: ChatMessageTimings;
|
||||||
|
|||||||
@@ -38,12 +38,19 @@ export interface SettingsChatServiceOptions {
|
|||||||
samplers?: string | string[];
|
samplers?: string | string[];
|
||||||
// Custom parameters
|
// Custom parameters
|
||||||
custom?: string;
|
custom?: string;
|
||||||
|
timings_per_token?: boolean;
|
||||||
// Callbacks
|
// Callbacks
|
||||||
onChunk?: (chunk: string) => void;
|
onChunk?: (chunk: string) => void;
|
||||||
onReasoningChunk?: (chunk: string) => void;
|
onReasoningChunk?: (chunk: string) => void;
|
||||||
|
onToolCallChunk?: (chunk: string) => void;
|
||||||
onModel?: (model: string) => void;
|
onModel?: (model: string) => void;
|
||||||
onFirstValidChunk?: () => void;
|
onFirstValidChunk?: () => void;
|
||||||
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
|
onComplete?: (
|
||||||
|
response: string,
|
||||||
|
reasoningContent?: string,
|
||||||
|
timings?: ChatMessageTimings,
|
||||||
|
toolCalls?: string
|
||||||
|
) => void;
|
||||||
onError?: (error: Error) => void;
|
onError?: (error: Error) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user