server : include usage statistics only when user request them (#16052)

* server : include usage statistics only when user request them

When serving the OpenAI compatible API, we should check if
{"stream_options": {"include_usage": true} is set in the request when
deciding whether we should send usage statistics

closes: #16048

* add unit test
This commit is contained in:
Radoslav Gerganov
2025-09-18 13:36:57 +03:00
committed by GitHub
parent e58174cecb
commit 2b6b55a59f
2 changed files with 37 additions and 26 deletions

View File

@@ -271,8 +271,10 @@ def test_chat_completion_with_timings_per_token():
"max_tokens": 10,
"messages": [{"role": "user", "content": "test"}],
"stream": True,
"stream_options": {"include_usage": True},
"timings_per_token": True,
})
stats_received = False
for i, data in enumerate(res):
if i == 0:
# Check first role message for stream=True
@@ -288,6 +290,8 @@ def test_chat_completion_with_timings_per_token():
assert "predicted_per_second" in data["timings"]
assert "predicted_n" in data["timings"]
assert data["timings"]["predicted_n"] <= 10
stats_received = True
assert stats_received
def test_logprobs():