server : improve infill stop criteria

2025-10-28 08:31:25 +00:00 · 2025-03-11 15:43:37 +02:00
parent 6ab2e4765a
commit ed58975f51
1 changed files with 35 additions and 12 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2162,35 +2162,58 @@ struct server_context {
        if (slot.has_new_line) {
            // require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent
-            if (slot.params.n_indent > 0) {
+            if (slot.params.n_indent >= 0) {
                // check the current indentation
-                // TODO: improve by not doing it more than once for each new line
+                int n_indent = 0;
                if (slot.last_nl_pos > 0) {
                    size_t pos = slot.last_nl_pos;
-                    int n_indent = 0;
+                size_t pos = slot.last_nl_pos;
-                    while (pos < slot.generated_text.size() && (slot.generated_text[pos] == ' ' || slot.generated_text[pos] == '\t')) {
+
-                        n_indent++;
+                while (pos < slot.generated_text.size() && (slot.generated_text[pos] == ' ' || slot.generated_text[pos] == '\t' || slot.generated_text[pos] == '\n')) {
-                        pos++;
+                    n_indent++;
                    if (slot.generated_text[pos] == '\n') {
                        n_indent = 0;
                        slot.last_nl_pos = pos + 1;
                    }
-                    if (pos < slot.generated_text.size() && n_indent < slot.params.n_indent) {
+                    pos++;
                }
                if (0 < pos && pos < slot.generated_text.size()) {
                    if (n_indent < slot.params.n_indent) {
                        slot.stop           = STOP_TYPE_LIMIT;
                        slot.has_next_token = false;
                        // cut the last line
-                        slot.generated_text.erase(pos, std::string::npos);
+                        //slot.generated_text.erase(pos, std::string::npos);
                        SLT_DBG(slot, "stopped by indentation limit, n_decoded = %d, n_indent = %d\n", slot.n_decoded, n_indent);
                    }
                }
                //SLT_ERR(slot, "n_indent = %d (%d), generated_text.size() = %d, n_decoded = %d, last_nl_pos = %d\n", n_indent, slot.params.n_indent, slot.generated_text.size(), slot.n_decoded, slot.last_nl_pos);
                // find the next new line
                {
-                    const size_t pos = slot.generated_text.find('\n', slot.last_nl_pos);
+                    size_t pos = slot.generated_text.find('\n', slot.last_nl_pos);
-                    if (pos != std::string::npos) {
+                    while (pos != std::string::npos) {
                        slot.last_nl_pos = pos + 1;
                        // detect end of paragraph at current indent level
                        if (slot.generated_text[slot.last_nl_pos - 2] == '\n' && n_indent <= slot.params.n_indent) {
                            slot.stop           = STOP_TYPE_LIMIT;
                            slot.has_next_token = false;
                            // cut the last line
                            slot.generated_text.erase(pos, std::string::npos);
                            SLT_DBG(slot, "stopped by reaching end of paragraph, n_decoded = %d, n_indent = %d\n", slot.n_decoded, n_indent);
                            break;
                        }
                        pos = slot.generated_text.find('\n', slot.last_nl_pos);
                    }
                }
            }