import json import os import pytest from letta.functions.functions import derive_openai_json_schema from letta.llm_api.helpers import convert_to_structured_output, make_post_request from letta.schemas.tool import ToolCreate def _clean_diff(d1, d2): """Utility function to clean up the diff between two dictionaries.""" # Keys in d1 but not in d2 removed = {k: d1[k] for k in d1.keys() - d2.keys()} # Keys in d2 but not in d1 added = {k: d2[k] for k in d2.keys() - d1.keys()} # Keys in both but values changed changed = {k: (d1[k], d2[k]) for k in d1.keys() & d2.keys() if d1[k] != d2[k]} return {k: v for k, v in {"removed": removed, "added": added, "changed": changed}.items() if v} # Only include non-empty differences def _compare_schemas(generated_schema: dict, expected_schema: dict, strip_heartbeat: bool = True): """Compare an autogenerated schema to an expected schema.""" if strip_heartbeat: # Pop out the heartbeat parameter del generated_schema["parameters"]["properties"]["request_heartbeat"] # Remove from the required list generated_schema["parameters"]["required"].remove("request_heartbeat") # Check that the two schemas are equal # If not, pretty print the difference by dumping with indent=4 if generated_schema != expected_schema: print("==== GENERATED SCHEMA ====") print(json.dumps(generated_schema, indent=4)) print("==== EXPECTED SCHEMA ====") print(json.dumps(expected_schema, indent=4)) print("==== DIFF ====") print(json.dumps(_clean_diff(generated_schema, expected_schema), indent=4)) raise AssertionError("Schemas are not equal") else: print("Schemas are equal") def _run_schema_test(schema_name: str, desired_function_name: str, expect_structured_output_fail: bool = False): """Load a file and compare the autogenerated schema to the expected schema.""" # Open the python file as a string # Use the absolute path to make it easier to run the test from the root directory with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.py"), "r") as file: source_code = file.read() # Derive the schema schema = derive_openai_json_schema(source_code, name=desired_function_name) # Assert that the schema matches the expected schema with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}.json"), "r") as file: expected_schema = json.load(file) _compare_schemas(schema, expected_schema) # Convert to structured output and compare if expect_structured_output_fail: with pytest.raises(ValueError): structured_output = convert_to_structured_output(schema) else: structured_output = convert_to_structured_output(schema) with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{schema_name}_so.json"), "r") as file: expected_structured_output = json.load(file) _compare_schemas(structured_output, expected_structured_output, strip_heartbeat=False) def test_derive_openai_json_schema(): """Test that the schema generator works across a variety of example source code inputs.""" print("==== TESTING basic example where the arg is a pydantic model ====") _run_schema_test("pydantic_as_single_arg_example", "create_step") print("==== TESTING basic example where the arg is a list of pydantic models ====") _run_schema_test("list_of_pydantic_example", "create_task_plan") print("==== TESTING more complex example where the arg is a nested pydantic model ====") _run_schema_test("nested_pydantic_as_arg_example", "create_task_plan") print("==== TESTING simple function with no args ====") _run_schema_test("simple_d20", "roll_d20") print("==== TESTING complex function with many args ====") _run_schema_test("all_python_complex", "check_order_status", expect_structured_output_fail=True) print("==== TESTING complex function with many args and no dict ====") # TODO we should properly cast Optionals into union nulls # Currently, we just disregard all Optional types on the conversion path _run_schema_test("all_python_complex_nodict", "check_order_status") def _openai_payload(model: str, schema: dict, structured_output: bool): """Create an OpenAI payload with a tool call. Raw version of openai_chat_completions_request w/o pydantic models """ if structured_output: tool_schema = convert_to_structured_output(schema) else: tool_schema = schema api_key = os.getenv("OPENAI_API_KEY") assert api_key is not None, "OPENAI_API_KEY must be set" # Simple system prompt to encourage the LLM to jump directly to a tool call system_prompt = "You job is to test the tool that you've been provided. Don't ask for any clarification on the args, just come up with some dummy data and try executing the tool." url = "https://api.openai.com/v1/chat/completions" headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"} data = { "model": model, "messages": [ {"role": "system", "content": system_prompt}, ], "tools": [ { "type": "function", "function": tool_schema, } ], "tool_choice": "auto", # TODO force the tool call on the one we want # NOTE: disabled for simplicity "parallel_tool_calls": False, } print("Request:\n", json.dumps(data, indent=2)) try: make_post_request(url, headers, data) except Exception as e: print(f"Request failed, tool_schema=\n{json.dumps(tool_schema, indent=2)}") print(f"Error: {e}") raise e def _load_schema_from_source_filename(filename: str) -> dict: with open(os.path.join(os.path.dirname(__file__), f"test_tool_schema_parsing_files/{filename}.py"), "r") as file: source_code = file.read() return derive_openai_json_schema(source_code) # @pytest.mark.parametrize("openai_model", ["gpt-4o-mini"]) # @pytest.mark.parametrize("structured_output", [True]) @pytest.mark.parametrize("openai_model", ["gpt-4", "gpt-4o"]) @pytest.mark.parametrize("structured_output", [True, False]) def test_valid_schemas_via_openai(openai_model: str, structured_output: bool): """Test that we can send the schemas to OpenAI and get a tool call back.""" for filename in [ "pydantic_as_single_arg_example", "list_of_pydantic_example", "nested_pydantic_as_arg_example", "simple_d20", "all_python_complex", "all_python_complex_nodict", ]: print(f"==== TESTING OPENAI PAYLOAD FOR {openai_model} + {filename} ====") schema = _load_schema_from_source_filename(filename) # We should expect the all_python_complex one to fail when structured_output=True if filename == "all_python_complex" and structured_output: with pytest.raises(ValueError): _openai_payload(openai_model, schema, structured_output) else: _openai_payload(openai_model, schema, structured_output) @pytest.mark.parametrize("openai_model", ["gpt-4o-mini"]) @pytest.mark.parametrize("structured_output", [True]) def test_composio_tool_schema_generation(openai_model: str, structured_output: bool): """Test that we can generate the schemas for some Composio tools.""" if not os.getenv("COMPOSIO_API_KEY"): pytest.skip("COMPOSIO_API_KEY not set") for action_name in [ "CAL_GET_AVAILABLE_SLOTS_INFO", # has an array arg, needs to be converted properly ]: tool_create = ToolCreate.from_composio(action_name=action_name) print(tool_create) schema = tool_create.json_schema _openai_payload(openai_model, schema, structured_output)