mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
webui: updated the chat service to only include max_tokens in the req… (#16489)
* webui: updated the chat service to only include max_tokens in the request payload when the setting is explicitly provided, while still mapping explicit zero or null values to the infinite-token sentinel * chore: update webui build output
This commit is contained in:
Binary file not shown.
@@ -122,9 +122,10 @@ export class ChatService {
|
||||
requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
|
||||
|
||||
if (temperature !== undefined) requestBody.temperature = temperature;
|
||||
// Set max_tokens to -1 (infinite) if not provided or empty
|
||||
requestBody.max_tokens =
|
||||
max_tokens !== undefined && max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
||||
if (max_tokens !== undefined) {
|
||||
// Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
|
||||
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
|
||||
}
|
||||
|
||||
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
|
||||
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
|
||||
|
||||
Reference in New Issue
Block a user