server : improve infill stop criteria

2025-10-28 08:31:25 +00:00 · 2025-03-11 15:43:37 +02:00
parent 6ab2e4765a
commit ed58975f51
1 changed files with 35 additions and 12 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2162,35 +2162,58 @@ struct server_context {

        if (slot.has_new_line) {
            // require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent
-            if (slot.params.n_indent > 0) {
+            if (slot.params.n_indent >= 0) {
                // check the current indentation
-                // TODO: improve by not doing it more than once for each new line
-                if (slot.last_nl_pos > 0) {
+                int n_indent = 0;
+
                size_t pos = slot.last_nl_pos;

-                    int n_indent = 0;
-                    while (pos < slot.generated_text.size() && (slot.generated_text[pos] == ' ' || slot.generated_text[pos] == '\t')) {
+                while (pos < slot.generated_text.size() && (slot.generated_text[pos] == ' ' || slot.generated_text[pos] == '\t' || slot.generated_text[pos] == '\n')) {
                    n_indent++;
+
+                    if (slot.generated_text[pos] == '\n') {
+                        n_indent = 0;
+                        slot.last_nl_pos = pos + 1;
+                    }
+
                    pos++;
                }

-                    if (pos < slot.generated_text.size() && n_indent < slot.params.n_indent) {
+                if (0 < pos && pos < slot.generated_text.size()) {
+                    if (n_indent < slot.params.n_indent) {
+                        slot.stop           = STOP_TYPE_LIMIT;
+                        slot.has_next_token = false;
+
+                        // cut the last line
+                        //slot.generated_text.erase(pos, std::string::npos);
+
+                        SLT_DBG(slot, "stopped by indentation limit, n_decoded = %d, n_indent = %d\n", slot.n_decoded, n_indent);
+                    }
+                }
+
+                //SLT_ERR(slot, "n_indent = %d (%d), generated_text.size() = %d, n_decoded = %d, last_nl_pos = %d\n", n_indent, slot.params.n_indent, slot.generated_text.size(), slot.n_decoded, slot.last_nl_pos);
+
+                // find the next new line
+                {
+                    size_t pos = slot.generated_text.find('\n', slot.last_nl_pos);
+
+                    while (pos != std::string::npos) {
+                        slot.last_nl_pos = pos + 1;
+
+                        // detect end of paragraph at current indent level
+                        if (slot.generated_text[slot.last_nl_pos - 2] == '\n' && n_indent <= slot.params.n_indent) {
                            slot.stop           = STOP_TYPE_LIMIT;
                            slot.has_next_token = false;

                            // cut the last line
                            slot.generated_text.erase(pos, std::string::npos);

-                        SLT_DBG(slot, "stopped by indentation limit, n_decoded = %d, n_indent = %d\n", slot.n_decoded, n_indent);
-                    }
+                            SLT_DBG(slot, "stopped by reaching end of paragraph, n_decoded = %d, n_indent = %d\n", slot.n_decoded, n_indent);
+
+                            break;
                        }

-                // find the next new line
-                {
-                    const size_t pos = slot.generated_text.find('\n', slot.last_nl_pos);
-
-                    if (pos != std::string::npos) {
-                        slot.last_nl_pos = pos + 1;
+                        pos = slot.generated_text.find('\n', slot.last_nl_pos);
                    }
                }
            }