mirror of
https://github.com/cpacker/MemGPT.git
synced 2025-06-03 04:30:22 +00:00
236 lines
9.5 KiB
Python
236 lines
9.5 KiB
Python
import json
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from letta.functions.functions import derive_openai_json_schema
|
|
from letta.llm_api.helpers import convert_to_structured_output, make_post_request
|
|
from letta.schemas.tool import ToolCreate
|
|
|
|
|
|
def _clean_diff(d1, d2):
|
|
"""Utility function to clean up the diff between two dictionaries."""
|
|
|
|
# Keys in d1 but not in d2
|
|
removed = {k: d1[k] for k in d1.keys() - d2.keys()}
|
|
|
|
# Keys in d2 but not in d1
|
|
added = {k: d2[k] for k in d2.keys() - d1.keys()}
|
|
|
|
# Keys in both but values changed
|
|
changed = {k: (d1[k], d2[k]) for k in d1.keys() & d2.keys() if d1[k] != d2[k]}
|
|
|
|
return {k: v for k, v in {"removed": removed, "added": added, "changed": changed}.items() if v} # Only include non-empty differences
|
|
|
|
|
|
def _compare_schemas(generated_schema: dict, expected_schema: dict, strip_heartbeat: bool = True):
|
|
"""Compare an autogenerated schema to an expected schema."""
|
|
|
|
if strip_heartbeat:
|
|
# Pop out the heartbeat parameter
|
|
del generated_schema["parameters"]["properties"]["request_heartbeat"]
|
|
# Remove from the required list
|
|
generated_schema["parameters"]["required"].remove("request_heartbeat")
|
|
|
|
# Check that the two schemas are equal
|
|
# If not, pretty print the difference by dumping with indent=4
|
|
if generated_schema != expected_schema:
|
|
print("==== GENERATED SCHEMA ====")
|
|
print(json.dumps(generated_schema, indent=4))
|
|
print("==== EXPECTED SCHEMA ====")
|
|
print(json.dumps(expected_schema, indent=4))
|
|
print("==== DIFF ====")
|
|
print(json.dumps(_clean_diff(generated_schema, expected_schema), indent=4))
|
|
raise AssertionError("Schemas are not equal")
|
|
else:
|
|
print("Schemas are equal")
|
|
|
|
|
|
def _run_schema_test(schema_name: str, desired_function_name: str, expect_structured_output_fail: bool = False):
|
|
"""Load a file and compare the autogenerated schema to the expected schema."""
|
|
|
|
# Open the python file as a string
|
|
# Use the absolute path to make it easier to run the test from the root directory
|
|
with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.py"), "r") as file:
|
|
source_code = file.read()
|
|
|
|
# Derive the schema
|
|
schema = derive_openai_json_schema(source_code, name=desired_function_name)
|
|
|
|
# Assert that the schema matches the expected schema
|
|
with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.json"), "r") as file:
|
|
expected_schema = json.load(file)
|
|
|
|
_compare_schemas(schema, expected_schema)
|
|
|
|
# Convert to structured output and compare
|
|
if expect_structured_output_fail:
|
|
with pytest.raises(ValueError):
|
|
structured_output = convert_to_structured_output(schema)
|
|
|
|
else:
|
|
structured_output = convert_to_structured_output(schema)
|
|
|
|
with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}_so.json"), "r") as file:
|
|
expected_structured_output = json.load(file)
|
|
|
|
_compare_schemas(structured_output, expected_structured_output, strip_heartbeat=False)
|
|
|
|
|
|
def test_derive_openai_json_schema():
|
|
"""Test that the schema generator works across a variety of example source code inputs."""
|
|
|
|
print("==== TESTING basic example where the arg is a pydantic model ====")
|
|
_run_schema_test("pydantic_as_single_arg_example", "create_step")
|
|
|
|
print("==== TESTING basic example where the arg is a list of pydantic models ====")
|
|
_run_schema_test("list_of_pydantic_example", "create_task_plan")
|
|
|
|
print("==== TESTING more complex example where the arg is a nested pydantic model ====")
|
|
_run_schema_test("nested_pydantic_as_arg_example", "create_task_plan")
|
|
|
|
print("==== TESTING simple function with no args ====")
|
|
_run_schema_test("simple_d20", "roll_d20")
|
|
|
|
print("==== TESTING complex function with many args ====")
|
|
_run_schema_test("all_python_complex", "check_order_status", expect_structured_output_fail=True)
|
|
|
|
print("==== TESTING complex function with many args and no dict ====")
|
|
# TODO we should properly cast Optionals into union nulls
|
|
# Currently, we just disregard all Optional types on the conversion path
|
|
_run_schema_test("all_python_complex_nodict", "check_order_status")
|
|
|
|
|
|
def _openai_payload(model: str, schema: dict, structured_output: bool):
|
|
"""Create an OpenAI payload with a tool call.
|
|
|
|
Raw version of openai_chat_completions_request w/o pydantic models
|
|
"""
|
|
|
|
if structured_output:
|
|
tool_schema = convert_to_structured_output(schema)
|
|
else:
|
|
tool_schema = schema
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
assert api_key is not None, "OPENAI_API_KEY must be set"
|
|
|
|
# Simple system prompt to encourage the LLM to jump directly to a tool call
|
|
system_prompt = "You job is to test the tool that you've been provided. Don't ask for any clarification on the args, just come up with some dummy data and try executing the tool."
|
|
|
|
url = "https://api.openai.com/v1/chat/completions"
|
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
|
data = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
],
|
|
"tools": [
|
|
{
|
|
"type": "function",
|
|
"function": tool_schema,
|
|
}
|
|
],
|
|
"tool_choice": "auto", # TODO force the tool call on the one we want
|
|
# NOTE: disabled for simplicity
|
|
"parallel_tool_calls": False,
|
|
}
|
|
|
|
print("Request:\n", json.dumps(data, indent=2), "\n\n")
|
|
|
|
try:
|
|
make_post_request(url, headers, data)
|
|
except Exception as e:
|
|
print(f"Request failed, tool_schema=\n{json.dumps(tool_schema, indent=2)}")
|
|
print(f"Error: {e}")
|
|
raise e
|
|
|
|
|
|
def _load_schema_from_source_filename(filename: str) -> dict:
|
|
with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{filename}.py"), "r") as file:
|
|
source_code = file.read()
|
|
|
|
return derive_openai_json_schema(source_code)
|
|
|
|
|
|
# @pytest.mark.parametrize("openai_model", ["gpt-4o-mini"])
|
|
# @pytest.mark.parametrize("structured_output", [True])
|
|
@pytest.mark.parametrize("openai_model", ["gpt-4", "gpt-4o"])
|
|
@pytest.mark.parametrize("structured_output", [True, False])
|
|
def test_valid_schemas_via_openai(openai_model: str, structured_output: bool):
|
|
"""Test that we can send the schemas to OpenAI and get a tool call back."""
|
|
|
|
for filename in [
|
|
"pydantic_as_single_arg_example",
|
|
"list_of_pydantic_example",
|
|
"nested_pydantic_as_arg_example",
|
|
"simple_d20",
|
|
"all_python_complex",
|
|
"all_python_complex_nodict",
|
|
]:
|
|
print(f"==== TESTING OPENAI PAYLOAD FOR {openai_model} + {filename} ====")
|
|
schema = _load_schema_from_source_filename(filename)
|
|
|
|
# We should expect the all_python_complex one to fail when structured_output=True
|
|
if filename == "all_python_complex" and structured_output:
|
|
with pytest.raises(ValueError):
|
|
_openai_payload(openai_model, schema, structured_output)
|
|
else:
|
|
_openai_payload(openai_model, schema, structured_output)
|
|
|
|
|
|
@pytest.mark.parametrize("openai_model", ["gpt-4o-mini"])
|
|
@pytest.mark.parametrize("structured_output", [True])
|
|
def test_composio_tool_schema_generation(openai_model: str, structured_output: bool):
|
|
"""Test that we can generate the schemas for some Composio tools."""
|
|
|
|
if not os.getenv("COMPOSIO_API_KEY"):
|
|
pytest.skip("COMPOSIO_API_KEY not set")
|
|
|
|
for action_name in [
|
|
"GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER", # Simple
|
|
"CAL_GET_AVAILABLE_SLOTS_INFO", # has an array arg, needs to be converted properly
|
|
"SALESFORCE_RETRIEVE_LEAD_DETAILS_BY_ID_WITH_CONDITIONAL_SUPPORT", # has an array arg, needs to be converted properly
|
|
]:
|
|
tool_create = ToolCreate.from_composio(action_name=action_name)
|
|
|
|
assert tool_create.json_schema
|
|
schema = tool_create.json_schema
|
|
print(f"The schema for {action_name}: {json.dumps(schema, indent=4)}\n\n")
|
|
|
|
try:
|
|
_openai_payload(openai_model, schema, structured_output)
|
|
print(f"Successfully called OpenAI using schema {schema} generated from {action_name}\n\n")
|
|
except:
|
|
print(f"Failed to call OpenAI using schema {schema} generated from {action_name}\n\n")
|
|
|
|
raise
|
|
|
|
|
|
@pytest.mark.parametrize("openai_model", ["gpt-4o-mini"])
|
|
@pytest.mark.parametrize("structured_output", [True])
|
|
def test_langchain_tool_schema_generation(openai_model: str, structured_output: bool):
|
|
"""Test that we can generate the schemas for some Langchain tools."""
|
|
from langchain_community.tools import WikipediaQueryRun
|
|
from langchain_community.utilities import WikipediaAPIWrapper
|
|
|
|
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=500)
|
|
langchain_tool = WikipediaQueryRun(api_wrapper=api_wrapper)
|
|
|
|
tool_create = ToolCreate.from_langchain(
|
|
langchain_tool=langchain_tool,
|
|
additional_imports_module_attr_map={"langchain_community.utilities": "WikipediaAPIWrapper"},
|
|
)
|
|
|
|
assert tool_create.json_schema
|
|
schema = tool_create.json_schema
|
|
print(f"The schema for {langchain_tool.name}: {json.dumps(schema, indent=4)}\n\n")
|
|
|
|
try:
|
|
_openai_payload(openai_model, schema, structured_output)
|
|
print(f"Successfully called OpenAI using schema {schema} generated from {langchain_tool.name}\n\n")
|
|
except:
|
|
print(f"Failed to call OpenAI using schema {schema} generated from {langchain_tool.name}\n\n")
|
|
|
|
raise
|