mirror of
https://github.com/cpacker/MemGPT.git
synced 2025-06-03 04:30:22 +00:00
add new manual json parser meant to catch send_message calls with trailing bad extra chars (#509)
* add new manual json parser meant to catch send_message calls with stray trailing chars, patch json error passing * typo
This commit is contained in:
parent
09dfc17061
commit
c20ad866ea
@ -1,8 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from memgpt.errors import LLMJSONParsingError
|
||||||
|
|
||||||
|
|
||||||
def extract_first_json(string):
|
def extract_first_json(string):
|
||||||
"""Handles the case of two JSON objects back-to-back"""
|
"""Handles the case of two JSON objects back-to-back"""
|
||||||
|
from memgpt.utils import printd
|
||||||
|
|
||||||
depth = 0
|
depth = 0
|
||||||
start_index = None
|
start_index = None
|
||||||
|
|
||||||
@ -17,9 +22,9 @@ def extract_first_json(string):
|
|||||||
try:
|
try:
|
||||||
return json.loads(string[start_index : i + 1])
|
return json.loads(string[start_index : i + 1])
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
raise json.JSONDecodeError(f"Matched closing bracket, but decode failed with error: {str(e)}")
|
raise LLMJSONParsingError(f"Matched closing bracket, but decode failed with error: {str(e)}")
|
||||||
print("No valid JSON object found.")
|
printd("No valid JSON object found.")
|
||||||
raise json.JSONDecodeError("Couldn't find starting bracket")
|
raise LLMJSONParsingError("Couldn't find starting bracket")
|
||||||
|
|
||||||
|
|
||||||
def add_missing_heartbeat(llm_json):
|
def add_missing_heartbeat(llm_json):
|
||||||
@ -46,6 +51,25 @@ def add_missing_heartbeat(llm_json):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
def clean_and_interpret_send_message_json(json_string):
|
||||||
|
# If normal parsing fails, attempt to clean and extract manually
|
||||||
|
cleaned_json_string = re.sub(r"[^\x00-\x7F]+", "", json_string) # Remove non-ASCII characters
|
||||||
|
function_match = re.search(r'"function":\s*"send_message"', cleaned_json_string)
|
||||||
|
inner_thoughts_match = re.search(r'"inner_thoughts":\s*"([^"]+)"', cleaned_json_string)
|
||||||
|
message_match = re.search(r'"message":\s*"([^"]+)"', cleaned_json_string)
|
||||||
|
|
||||||
|
if function_match and inner_thoughts_match and message_match:
|
||||||
|
return {
|
||||||
|
"function": "send_message",
|
||||||
|
"params": {
|
||||||
|
"inner_thoughts": inner_thoughts_match,
|
||||||
|
"message": message_match,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise LLMJSONParsingError(f"Couldn't manually extract send_message pattern from:\n{json_string}")
|
||||||
|
|
||||||
|
|
||||||
def repair_json_string(json_string):
|
def repair_json_string(json_string):
|
||||||
"""
|
"""
|
||||||
This function repairs a JSON string where line feeds were accidentally added
|
This function repairs a JSON string where line feeds were accidentally added
|
||||||
@ -128,32 +152,38 @@ def clean_json(raw_llm_output, messages=None, functions=None):
|
|||||||
try:
|
try:
|
||||||
# printd("clean json runs:", raw_llm_output)
|
# printd("clean json runs:", raw_llm_output)
|
||||||
data = json.loads(raw_llm_output)
|
data = json.loads(raw_llm_output)
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
printd("trying adding }")
|
printd("trying adding }")
|
||||||
data = json.loads(raw_llm_output + "}")
|
data = json.loads(raw_llm_output + "}")
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
printd("trying adding }}")
|
printd("trying adding }}")
|
||||||
data = json.loads(raw_llm_output + "}}")
|
data = json.loads(raw_llm_output + "}}")
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
printd('trying adding "}}')
|
printd('trying adding "}}')
|
||||||
data = json.loads(raw_llm_output + '"}}')
|
data = json.loads(raw_llm_output + '"}}')
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
repaired = repair_json_string(raw_llm_output)
|
repaired = repair_json_string(raw_llm_output)
|
||||||
printd("trying repair_json_string:", repaired)
|
printd("trying repair_json_string:", repaired)
|
||||||
data = json.loads(repaired)
|
data = json.loads(repaired)
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
repaired = repair_even_worse_json(raw_llm_output)
|
repaired = repair_even_worse_json(raw_llm_output)
|
||||||
printd("trying repair_even_worse_json:", repaired)
|
printd("trying repair_even_worse_json:", repaired)
|
||||||
data = json.loads(repaired)
|
data = json.loads(repaired)
|
||||||
except json.JSONDecodeError:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
try:
|
try:
|
||||||
printd("trying first_json")
|
printd("trying first_json")
|
||||||
data = extract_first_json(raw_llm_output + "}}")
|
data = extract_first_json(raw_llm_output + "}}")
|
||||||
except:
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
raise
|
try:
|
||||||
|
printd("trying to pull send_message manually")
|
||||||
|
data = clean_and_interpret_send_message_json(raw_llm_output)
|
||||||
|
except (json.JSONDecodeError, LLMJSONParsingError):
|
||||||
|
raise LLMJSONParsingError(
|
||||||
|
f"Failed to decode valid MemGPT JSON from LLM output:\n=====\n{raw_llm_output}\n====="
|
||||||
|
)
|
||||||
return data
|
return data
|
||||||
|
@ -45,11 +45,29 @@ EXAMPLE_HARD_LINE_FEEDS = """{
|
|||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Situation where beginning of send_message call is fine (and thus can be extracted)
|
||||||
|
# but has a long training garbage string that comes after
|
||||||
|
EXAMPLE_SEND_MESSAGE_PREFIX_OK_REST_BAD = """{
|
||||||
|
"function": "send_message",
|
||||||
|
"params": {
|
||||||
|
"inner_thoughts": "User request for debug assistance",
|
||||||
|
"message": "Of course, Chad. Please check the system log file for 'assistant.json' and send me the JSON output you're getting. Armed with that data, I'll assist you in debugging the issue.",
|
||||||
|
GARBAGEGARBAGEGARBAGEGARBAGE
|
||||||
|
GARBAGEGARBAGEGARBAGEGARBAGE
|
||||||
|
GARBAGEGARBAGEGARBAGEGARBAGE
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def test_json_parsers():
|
def test_json_parsers():
|
||||||
"""Try various broken JSON and check that the parsers can fix it"""
|
"""Try various broken JSON and check that the parsers can fix it"""
|
||||||
|
|
||||||
test_strings = [EXAMPLE_MISSING_CLOSING_BRACE, EXAMPLE_BAD_TOKEN_END, EXAMPLE_DOUBLE_JSON, EXAMPLE_HARD_LINE_FEEDS]
|
test_strings = [
|
||||||
|
EXAMPLE_MISSING_CLOSING_BRACE,
|
||||||
|
EXAMPLE_BAD_TOKEN_END,
|
||||||
|
EXAMPLE_DOUBLE_JSON,
|
||||||
|
EXAMPLE_HARD_LINE_FEEDS,
|
||||||
|
EXAMPLE_SEND_MESSAGE_PREFIX_OK_REST_BAD,
|
||||||
|
]
|
||||||
|
|
||||||
for string in test_strings:
|
for string in test_strings:
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user