server : include usage statistics only when user request them (#16052)

* server : include usage statistics only when user request them When serving the OpenAI compatible API, we should check if {"stream_options": {"include_usage": true} is set in the request when deciding whether we should send usage statistics closes: #16048 * add unit test
2025-10-27 08:21:30 +00:00 · 2025-09-18 13:36:57 +03:00
parent e58174cecb
commit 2b6b55a59f
2 changed files with 37 additions and 26 deletions
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@@ -271,8 +271,10 @@ def test_chat_completion_with_timings_per_token():
        "max_tokens": 10,
        "messages": [{"role": "user", "content": "test"}],
        "stream": True,
+        "stream_options": {"include_usage": True},
        "timings_per_token": True,
    })
+    stats_received = False
    for i, data in enumerate(res):
        if i == 0:
            # Check first role message for stream=True
@@ -288,6 +290,8 @@ def test_chat_completion_with_timings_per_token():
                assert "predicted_per_second" in data["timings"]
                assert "predicted_n" in data["timings"]
                assert data["timings"]["predicted_n"] <= 10
+                stats_received = True
+    assert stats_received


 def test_logprobs():