Fix optimistic json parser strict mode (#2506)

2025-06-03 04:30:22 +00:00 · 2025-03-21 01:22:21 +08:00 · 2025-03-21 01:22:21 +08:00 · 4e6807dee3
commit 4e6807dee3
parent dcacf724fa
2 changed files with 38 additions and 6 deletions
--- a/letta/server/rest_api/optimistic_json_parser.py
+++ b/letta/server/rest_api/optimistic_json_parser.py
@ -32,7 +32,7 @@ class OptimisticJSONParser:
        self.on_extra_token = self.default_on_extra_token

    def default_on_extra_token(self, text, data, reminding):
-        pass
+        print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}")

    def parse(self, input_str):
        """
@ -130,8 +130,8 @@ class OptimisticJSONParser:
        if end == -1:
            # Incomplete string
            if not self.strict:
-                return input_str[1:], ""
-            return json.loads(f'"{input_str[1:]}"'), ""
+                return input_str[1:], ""  # Lenient mode returns partial string
+            raise decode_error  # Raise error for incomplete string in strict mode

        str_val = input_str[: end + 1]
        input_str = input_str[end + 1 :]
@ -152,8 +152,8 @@ class OptimisticJSONParser:
        num_str = input_str[:idx]
        remainder = input_str[idx:]

-        # If it's only a sign or just '.', return as-is with empty remainder
-        if not num_str or num_str in {"-", "."}:
+        # If not strict, and it's only a sign or just '.', return as-is with empty remainder
+        if not self.strict and (not num_str or num_str in {"-", "."}):
            return num_str, ""

        try:
--- a/tests/test_optimistic_json_parser.py
+++ b/tests/test_optimistic_json_parser.py
@ -96,7 +96,7 @@ def test_parse_number_cases(strict_parser):
 def test_parse_boolean_true(strict_parser):
    assert strict_parser.parse("true") is True, "Should parse 'true'."
    # Check leftover
-    assert strict_parser.last_parse_reminding == "", "No extra tokens expected."
+    assert strict_parser.last_parse_reminding == None, "No extra tokens expected."


 def test_parse_boolean_false(strict_parser):
@ -246,3 +246,35 @@ def test_multiple_parse_calls(strict_parser):
    result_2 = strict_parser.parse(input_2)
    assert result_2 == [2, 3]
    assert strict_parser.last_parse_reminding.strip() == "trailing2"
+
+
+def test_parse_incomplete_string_streaming_strict(strict_parser):
+    """
+    Test how a strict parser handles an incomplete string received in chunks.
+    """
+    # Simulate streaming chunks
+    chunk1 = '{"message": "This is an incomplete'
+    chunk2 = " string with a newline\\n"
+    chunk3 = 'and more text"}'
+
+    with pytest.raises(json.JSONDecodeError, match="Unterminated string"):
+        strict_parser.parse(chunk1)
+
+    incomplete_json = chunk1 + chunk2
+    with pytest.raises(json.JSONDecodeError, match="Unterminated string"):
+        strict_parser.parse(incomplete_json)
+
+    complete_json = incomplete_json + chunk3
+    result = strict_parser.parse(complete_json)
+    expected = {"message": "This is an incomplete string with a newline\nand more text"}
+    assert result == expected, "Should parse complete JSON correctly"
+
+
+def test_unescaped_control_characters_strict(strict_parser):
+    """
+    Test parsing JSON containing unescaped control characters in strict mode.
+    """
+    input_str = '{"message": "This has a newline\nand tab\t"}'
+
+    with pytest.raises(json.JSONDecodeError, match="Invalid control character"):
+        strict_parser.parse(input_str)