import time import uuid import pytest from letta import create_client from letta.schemas.letta_message import ToolCallMessage from letta.schemas.tool_rule import ( ChildToolRule, ConditionalToolRule, InitToolRule, TerminalToolRule, ) from tests.helpers.endpoints_helper import ( assert_invoked_function_call, assert_invoked_send_message_with_keyword, assert_sanity_checks, setup_agent, ) from tests.helpers.utils import cleanup # Generate uuid for agent name for this example namespace = uuid.NAMESPACE_DNS agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph")) config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json" """Contrived tools for this test case""" def first_secret_word(): """ Call this to retrieve the first secret word, which you will need for the second_secret_word function. """ return "v0iq020i0g" def second_secret_word(prev_secret_word: str): """ Call this to retrieve the second secret word, which you will need for the third_secret_word function. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling first_secret_word. """ if prev_secret_word != "v0iq020i0g": raise RuntimeError(f"Expected secret {"v0iq020i0g"}, got {prev_secret_word}") return "4rwp2b4gxq" def third_secret_word(prev_secret_word: str): """ Call this to retrieve the third secret word, which you will need for the fourth_secret_word function. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling second_secret_word. """ if prev_secret_word != "4rwp2b4gxq": raise RuntimeError(f"Expected secret {"4rwp2b4gxq"}, got {prev_secret_word}") return "hj2hwibbqm" def fourth_secret_word(prev_secret_word: str): """ Call this to retrieve the last secret word, which you will need to output in a send_message later. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling third_secret_word. """ if prev_secret_word != "hj2hwibbqm": raise RuntimeError(f"Expected secret {"hj2hwibbqm"}, got {prev_secret_word}") return "banana" def flip_coin(): """ Call this to retrieve the password to the secret word, which you will need to output in a send_message later. If it returns an empty string, try flipping again! Returns: str: The password or an empty string """ import random # Flip a coin with 50% chance if random.random() < 0.5: return "" return "hj2hwibbqm" def flip_coin_hard(): """ Call this to retrieve the password to the secret word, which you will need to output in a send_message later. If it returns an empty string, try flipping again! Returns: str: The password or an empty string """ import random # Flip a coin with 50% chance result = random.random() if result < 0.5: return "" if result < 0.75: return "START_OVER" return "hj2hwibbqm" def can_play_game(): """ Call this to start the tool chain. """ import random return random.random() < 0.5 def return_none(): """ Really simple function """ return None def auto_error(): """ If you call this function, it will throw an error automatically. """ raise RuntimeError("This should never be called.") @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Add tools t1 = client.create_or_update_tool(first_secret_word) t2 = client.create_or_update_tool(second_secret_word) t3 = client.create_or_update_tool(third_secret_word) t4 = client.create_or_update_tool(fourth_secret_word) t_err = client.create_or_update_tool(auto_error) tools = [t1, t2, t3, t4, t_err] # Make tool rules tool_rules = [ InitToolRule(tool_name="first_secret_word"), ChildToolRule(tool_name="first_secret_word", children=["second_secret_word"]), ChildToolRule(tool_name="second_secret_word", children=["third_secret_word"]), ChildToolRule(tool_name="third_secret_word", children=["fourth_secret_word"]), ChildToolRule(tool_name="fourth_secret_word", children=["send_message"]), TerminalToolRule(tool_name="send_message"), ] # Make agent state agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="What is the fourth secret word?") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "first_secret_word") assert_invoked_function_call(response.messages, "second_secret_word") assert_invoked_function_call(response.messages, "third_secret_word") assert_invoked_function_call(response.messages, "fourth_secret_word") # Check ordering of tool calls tool_names = [t.name for t in [t1, t2, t3, t4]] tool_names += ["send_message"] for m in response.messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] # Check final send message contains "done" assert_invoked_send_message_with_keyword(response.messages, "banana") print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): """Test that tool rules are properly checked for different model configurations.""" client = create_client() config_files = [ "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json", "tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json", "tests/configs/llm_model_configs/openai-gpt-4o.json", ] # Create two test tools t1_name = "first_secret_word" t2_name = "second_secret_word" t1 = client.create_or_update_tool(first_secret_word, name=t1_name) t2 = client.create_or_update_tool(second_secret_word, name=t2_name) tool_rules = [InitToolRule(tool_name=t1_name), InitToolRule(tool_name=t2_name)] tools = [t1, t2] for config_file in config_files: # Setup tools agent_uuid = str(uuid.uuid4()) if "gpt-4o" in config_file: # Structured output model (should work with multiple init tools) agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None else: # Non-structured output model (should raise error with multiple init tools) with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"): setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Cleanup cleanup(client=client, agent_uuid=agent_uuid) # Create tool rule with single initial tool t3_name = "third_secret_word" t3 = client.create_or_update_tool(third_secret_word, name=t3_name) tool_rules = [InitToolRule(tool_name=t3_name)] tools = [t3] for config_file in config_files: agent_uuid = str(uuid.uuid4()) # Structured output model (should work with single init tool) agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None cleanup(client=client, agent_uuid=agent_uuid) def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): """Test that the initial tool rule is enforced for the first message.""" client = create_client() # Create tool rules that require tool_a to be called first t1_name = "first_secret_word" t2_name = "second_secret_word" t1 = client.create_or_update_tool(first_secret_word, name=t1_name) t2 = client.create_or_update_tool(second_secret_word, name=t2_name) tool_rules = [ InitToolRule(tool_name=t1_name), ChildToolRule(tool_name=t1_name, children=[t2_name]), TerminalToolRule(tool_name=t2_name), ] tools = [t1, t2] # Make agent state anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" for i in range(3): agent_uuid = str(uuid.uuid4()) agent_state = setup_agent( client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules ) response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") assert_sanity_checks(response) messages = response.messages assert_invoked_function_call(messages, "first_secret_word") assert_invoked_function_call(messages, "second_secret_word") tool_names = [t.name for t in [t1, t2]] tool_names += ["send_message"] for m in messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] print(f"Passed iteration {i}") cleanup(client=client, agent_uuid=agent_uuid) # Implement exponential backoff with initial time of 10 seconds if i < 2: backoff_time = 10 * (2**i) time.sleep(backoff_time) @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) send_message = client.server.tool_manager.get_tool_by_name(tool_name="send_message", actor=client.user) archival_memory_search = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_search", actor=client.user) archival_memory_insert = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_insert", actor=client.user) # Make tool rules tool_rules = [ InitToolRule(tool_name="archival_memory_search"), ChildToolRule(tool_name="archival_memory_search", children=["archival_memory_insert"]), ChildToolRule(tool_name="archival_memory_insert", children=["send_message"]), TerminalToolRule(tool_name="send_message"), ] tools = [send_message, archival_memory_search, archival_memory_insert] config_files = [ "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json", "tests/configs/llm_model_configs/openai-gpt-4o.json", ] for config in config_files: max_retries = 3 last_error = None for attempt in range(max_retries): try: agent_state = setup_agent(client, config, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="hi. run archival memory search") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "archival_memory_search") assert_invoked_function_call(response.messages, "archival_memory_insert") assert_invoked_function_call(response.messages, "send_message") # Check ordering of tool calls tool_names = [t.name for t in [archival_memory_search, archival_memory_insert, send_message]] for m in response.messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] print(f"Got successful response from client: \n\n{response}") break # Test passed, exit retry loop except AssertionError as e: last_error = e print(f"Attempt {attempt + 1} failed, retrying..." if attempt < max_retries - 1 else f"All {max_retries} attempts failed") cleanup(client=client, agent_uuid=agent_uuid) continue if last_error and attempt == max_retries - 1: raise last_error # Re-raise the last error if all retries failed cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_agent_conditional_tool_easy(mock_e2b_api_key_none): """ Test the agent with a conditional tool that has a child tool. Tool Flow: ------- | | | v -- flip_coin | v reveal_secret_word """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) coin_flip_name = "flip_coin" secret_word_tool = "fourth_secret_word" flip_coin_tool = client.create_or_update_tool(flip_coin, name=coin_flip_name) reveal_secret = client.create_or_update_tool(fourth_secret_word, name=secret_word_tool) # Make tool rules tool_rules = [ InitToolRule(tool_name=coin_flip_name), ConditionalToolRule( tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={ "hj2hwibbqm": secret_word_tool, }, ), TerminalToolRule(tool_name=secret_word_tool), ] tools = [flip_coin_tool, reveal_secret] config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="flip a coin until you get the secret word") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "flip_coin") assert_invoked_function_call(response.messages, "fourth_secret_word") # Check ordering of tool calls found_secret_word = False for m in response.messages: if isinstance(m, ToolCallMessage): if m.tool_call.name == secret_word_tool: # Should be the last tool call found_secret_word = True else: # Before finding secret_word, only flip_coin should be called assert m.tool_call.name == coin_flip_name assert not found_secret_word # Ensure we found the secret word exactly once assert found_secret_word print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(90) # Longer timeout since this test has more steps def test_agent_conditional_tool_hard(mock_e2b_api_key_none): """ Test the agent with a complex conditional tool graph Tool Flow: can_play_game <---+ | | v | flip_coin -----+ | v fourth_secret_word """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools play_game = "can_play_game" coin_flip_name = "flip_coin_hard" final_tool = "fourth_secret_word" play_game_tool = client.create_or_update_tool(can_play_game, name=play_game) flip_coin_tool = client.create_or_update_tool(flip_coin_hard, name=coin_flip_name) reveal_secret = client.create_or_update_tool(fourth_secret_word, name=final_tool) # Make tool rules - chain them together with conditional rules tool_rules = [ InitToolRule(tool_name=play_game), ConditionalToolRule( tool_name=play_game, default_child=play_game, # Keep trying if we can't play child_output_mapping={True: coin_flip_name}, # Only allow access when can_play_game returns True ), ConditionalToolRule( tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={"hj2hwibbqm": final_tool, "START_OVER": play_game} ), TerminalToolRule(tool_name=final_tool), ] # Setup agent with all tools tools = [play_game_tool, flip_coin_tool, reveal_secret] config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try to get all secret words response = client.user_message(agent_id=agent_state.id, message="hi") # Make checks assert_sanity_checks(response) # Assert all tools were called assert_invoked_function_call(response.messages, play_game) assert_invoked_function_call(response.messages, final_tool) # Check ordering of tool calls found_words = [] for m in response.messages: if isinstance(m, ToolCallMessage): name = m.tool_call.name if name in [play_game, coin_flip_name]: # Before finding secret_word, only can_play_game and flip_coin should be called assert name in [play_game, coin_flip_name] else: # Should find secret words in order expected_word = final_tool assert name == expected_word, f"Found {name} but expected {expected_word}" found_words.append(name) # Ensure we found all secret words in order assert found_words == [final_tool] print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none): """ Test the agent with a conditional tool that allows any child tool to be called if a function returns None. Tool Flow: return_none | v any tool... <-- When output doesn't match mapping, agent can call any tool """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools - we'll make several available to the agent tool_name = "return_none" tool = client.create_or_update_tool(return_none, name=tool_name) secret_word = client.create_or_update_tool(first_secret_word, name="first_secret_word") # Make tool rules - only map one output, let others be free choice tool_rules = [ InitToolRule(tool_name=tool_name), ConditionalToolRule( tool_name=tool_name, default_child=None, # Allow any tool to be called if output doesn't match child_output_mapping={"anything but none": "first_secret_word"}, ), ] tools = [tool, secret_word] # Setup agent with all tools agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try different tools based on the game output response = client.user_message(agent_id=agent_state.id, message="call a function, any function. then call send_message") # Make checks assert_sanity_checks(response) # Assert return_none was called assert_invoked_function_call(response.messages, tool_name) # Assert any base function called afterward found_any_tool = False found_return_none = False for m in response.messages: if isinstance(m, ToolCallMessage): if m.tool_call.name == tool_name: found_return_none = True elif found_return_none and m.tool_call.name: found_any_tool = True break assert found_any_tool, "Should have called any tool after return_none" print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) def test_agent_reload_remembers_function_response(mock_e2b_api_key_none): """ Test that when an agent is reloaded, it remembers the last function response for conditional tool chaining. Tool Flow: flip_coin | v fourth_secret_word <-- Should remember coin flip result after reload """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools flip_coin_name = "flip_coin" secret_word = "fourth_secret_word" flip_coin_tool = client.create_or_update_tool(flip_coin, name=flip_coin_name) secret_word_tool = client.create_or_update_tool(fourth_secret_word, name=secret_word) # Make tool rules - map coin flip to fourth_secret_word tool_rules = [ InitToolRule(tool_name=flip_coin_name), ConditionalToolRule( tool_name=flip_coin_name, default_child=flip_coin_name, # Allow any tool to be called if output doesn't match child_output_mapping={"hj2hwibbqm": secret_word}, ), TerminalToolRule(tool_name=secret_word), ] tools = [flip_coin_tool, secret_word_tool] # Setup initial agent agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Call flip_coin first response = client.user_message(agent_id=agent_state.id, message="flip a coin") assert_invoked_function_call(response.messages, flip_coin_name) assert_invoked_function_call(response.messages, secret_word) found_fourth_secret = False for m in response.messages: if isinstance(m, ToolCallMessage) and m.tool_call.name == secret_word: found_fourth_secret = True break assert found_fourth_secret, "Reloaded agent should remember coin flip result and call fourth_secret_word if True" # Reload the agent reloaded_agent = client.server.load_agent(agent_id=agent_state.id, actor=client.user) assert reloaded_agent.last_function_response is not None print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid)