Add a setting to display message generation statistics (#16901)

* feat: Add setting to display message generation statistics * chore: build static webui output
2025-11-05 09:36:52 +00:00 · 2025-11-01 15:35:57 +01:00
parent 1ae74882f8
commit d8b860a219
4 changed files with 67 additions and 14 deletions
--- a/tools/server/public/index.html.gz
+++ b/tools/server/public/index.html.gz
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -3,7 +3,16 @@
 	import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
 	import { isLoading } from '$lib/stores/chat.svelte';
 	import { fade } from 'svelte/transition';
-	import { Check, Copy, Package, X } from '@lucide/svelte';
+	import {
+		Check,
+		Copy,
+		Package,
+		X,
+		Gauge,
+		Clock,
+		WholeWord,
+		ChartNoAxesColumn
+	} from '@lucide/svelte';
 	import { Button } from '$lib/components/ui/button';
 	import { Checkbox } from '$lib/components/ui/checkbox';
 	import { INPUT_CLASSES } from '$lib/constants/input-classes';
@@ -160,11 +169,14 @@
 		</div>
 	{/if}

+	<div class="info my-6 grid gap-4">
 		{#if displayedModel()}
-		<span class="mt-6 mb-4 inline-flex items-center gap-1 text-xs text-muted-foreground">
+			<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
+				<span class="inline-flex items-center gap-1">
 					<Package class="h-3.5 w-3.5" />

 					<span>Model used:</span>
+				</span>

 				<button
 					class="inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
@@ -177,6 +189,39 @@
 			</span>
 		{/if}

+		{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
+			{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
+			<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
+				<span class="inline-flex items-center gap-1">
+					<ChartNoAxesColumn class="h-3.5 w-3.5" />
+
+					<span>Statistics:</span>
+				</span>
+
+				<div class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
+					<span
+						class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+					>
+						<Gauge class="h-3 w-3" />
+						{tokensPerSecond.toFixed(2)} tokens/s
+					</span>
+					<span
+						class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+					>
+						<WholeWord class="h-3 w-3" />
+						{message.timings.predicted_n} tokens
+					</span>
+					<span
+						class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
+					>
+						<Clock class="h-3 w-3" />
+						{(message.timings.predicted_ms / 1000).toFixed(2)}s
+					</span>
+				</div>
+			</span>
+		{/if}
+	</div>
+
 	{#if message.timestamp && !isEditing}
 		<ChatMessageActions
 			role="assistant"
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
@@ -52,6 +52,11 @@
 						{ value: 'dark', label: 'Dark', icon: Moon }
 					]
 				},
+				{
+					key: 'showMessageStats',
+					label: 'Show message generation statistics',
+					type: 'checkbox'
+				},
 				{
 					key: 'showTokensPerSecond',
 					label: 'Show tokens per second',
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -8,6 +8,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean> =
 	showThoughtInProgress: false,
 	disableReasoningFormat: false,
 	keepStatsVisible: false,
+	showMessageStats: true,
 	askForTitleConfirmation: false,
 	pasteLongTextToFileLen: 2500,
 	pdfAsImage: false,
@@ -82,6 +83,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
 	disableReasoningFormat:
 		'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
 	keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
+	showMessageStats:
+		'Display generation statistics (tokens/second, token count, duration) below each assistant message.',
 	askForTitleConfirmation:
 		'Ask for confirmation before automatically changing conversation title when editing the first message.',
 	pdfAsImage: 'Parse PDF as image instead of text (requires vision-capable model).',