diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 29579e08c2..a81bae04d1 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index e878e7bf8a..41774c6f87 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -3,7 +3,16 @@ import { useProcessingState } from '$lib/hooks/use-processing-state.svelte'; import { isLoading } from '$lib/stores/chat.svelte'; import { fade } from 'svelte/transition'; - import { Check, Copy, Package, X } from '@lucide/svelte'; + import { + Check, + Copy, + Package, + X, + Gauge, + Clock, + WholeWord, + ChartNoAxesColumn + } from '@lucide/svelte'; import { Button } from '$lib/components/ui/button'; import { Checkbox } from '$lib/components/ui/checkbox'; import { INPUT_CLASSES } from '$lib/constants/input-classes'; @@ -160,22 +169,58 @@ {/if} - {#if displayedModel()} - - +
+ {#if displayedModel()} + + + - Model used: + Model used: + - - - {/if} + + + + {/if} + + {#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms} + {@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000} + + + + + Statistics: + + +
+ + + {tokensPerSecond.toFixed(2)} tokens/s + + + + {message.timings.predicted_n} tokens + + + + {(message.timings.predicted_ms / 1000).toFixed(2)}s + +
+
+ {/if} +
{#if message.timestamp && !isEditing} = showThoughtInProgress: false, disableReasoningFormat: false, keepStatsVisible: false, + showMessageStats: true, askForTitleConfirmation: false, pasteLongTextToFileLen: 2500, pdfAsImage: false, @@ -82,6 +83,8 @@ export const SETTING_CONFIG_INFO: Record = { disableReasoningFormat: 'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.', keepStatsVisible: 'Keep processing statistics visible after generation finishes.', + showMessageStats: + 'Display generation statistics (tokens/second, token count, duration) below each assistant message.', askForTitleConfirmation: 'Ask for confirmation before automatically changing conversation title when editing the first message.', pdfAsImage: 'Parse PDF as image instead of text (requires vision-capable model).',