import time import uuid import pytest from letta import create_client from letta.schemas.letta_message import ToolCallMessage from letta.schemas.tool_rule import ChildToolRule, ConditionalToolRule, ContinueToolRule, InitToolRule, TerminalToolRule from tests.helpers.endpoints_helper import ( assert_invoked_function_call, assert_invoked_send_message_with_keyword, assert_sanity_checks, setup_agent, ) from tests.helpers.utils import cleanup # Generate uuid for agent name for this example namespace = uuid.NAMESPACE_DNS agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph")) config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json" """Contrived tools for this test case""" def first_secret_word(): """ Call this to retrieve the first secret word, which you will need for the second_secret_word function. """ return "v0iq020i0g" def second_secret_word(prev_secret_word: str): """ Call this to retrieve the second secret word, which you will need for the third_secret_word function. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling first_secret_word. """ if prev_secret_word != "v0iq020i0g": raise RuntimeError(f"Expected secret {'v0iq020i0g'}, got {prev_secret_word}") return "4rwp2b4gxq" def third_secret_word(prev_secret_word: str): """ Call this to retrieve the third secret word, which you will need for the fourth_secret_word function. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling second_secret_word. """ if prev_secret_word != "4rwp2b4gxq": raise RuntimeError(f'Expected secret "4rwp2b4gxq", got {prev_secret_word}') return "hj2hwibbqm" def fourth_secret_word(prev_secret_word: str): """ Call this to retrieve the last secret word, which you will need to output in a send_message later. If you get the word wrong, this function will error. Args: prev_secret_word (str): The secret word retrieved from calling third_secret_word. """ if prev_secret_word != "hj2hwibbqm": raise RuntimeError(f"Expected secret {'hj2hwibbqm'}, got {prev_secret_word}") return "banana" def flip_coin(): """ Call this to retrieve the password to the secret word, which you will need to output in a send_message later. If it returns an empty string, try flipping again! Returns: str: The password or an empty string """ import random # Flip a coin with 50% chance if random.random() < 0.5: return "" return "hj2hwibbqm" def flip_coin_hard(): """ Call this to retrieve the password to the secret word, which you will need to output in a send_message later. If it returns an empty string, try flipping again! Returns: str: The password or an empty string """ import random # Flip a coin with 50% chance result = random.random() if result < 0.5: return "" if result < 0.75: return "START_OVER" return "hj2hwibbqm" def can_play_game(): """ Call this to start the tool chain. """ import random return random.random() < 0.5 def return_none(): """ Really simple function """ return None def auto_error(): """ If you call this function, it will throw an error automatically. """ raise RuntimeError("This should never be called.") @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Add tools t1 = client.create_or_update_tool(first_secret_word) t2 = client.create_or_update_tool(second_secret_word) t3 = client.create_or_update_tool(third_secret_word) t4 = client.create_or_update_tool(fourth_secret_word) t_err = client.create_or_update_tool(auto_error) tools = [t1, t2, t3, t4, t_err] # Make tool rules tool_rules = [ InitToolRule(tool_name="first_secret_word"), ChildToolRule(tool_name="first_secret_word", children=["second_secret_word"]), ChildToolRule(tool_name="second_secret_word", children=["third_secret_word"]), ChildToolRule(tool_name="third_secret_word", children=["fourth_secret_word"]), ChildToolRule(tool_name="fourth_secret_word", children=["send_message"]), TerminalToolRule(tool_name="send_message"), ] # Make agent state agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="What is the fourth secret word?") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "first_secret_word") assert_invoked_function_call(response.messages, "second_secret_word") assert_invoked_function_call(response.messages, "third_secret_word") assert_invoked_function_call(response.messages, "fourth_secret_word") # Check ordering of tool calls tool_names = [t.name for t in [t1, t2, t3, t4]] tool_names += ["send_message"] for m in response.messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] # Check final send message contains "done" assert_invoked_send_message_with_keyword(response.messages, "banana") print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): """Test that tool rules are properly checked for different model configurations.""" client = create_client() config_files = [ "tests/configs/llm_model_configs/claude-3-5-sonnet.json", "tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json", "tests/configs/llm_model_configs/openai-gpt-4o.json", ] # Create two test tools t1_name = "first_secret_word" t2_name = "second_secret_word" t1 = client.create_or_update_tool(first_secret_word) t2 = client.create_or_update_tool(second_secret_word) tool_rules = [InitToolRule(tool_name=t1_name), InitToolRule(tool_name=t2_name)] tools = [t1, t2] for config_file in config_files: # Setup tools agent_uuid = str(uuid.uuid4()) if "gpt-4o" in config_file: # Structured output model (should work with multiple init tools) agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None else: # Non-structured output model (should raise error with multiple init tools) with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"): setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Cleanup cleanup(client=client, agent_uuid=agent_uuid) # Create tool rule with single initial tool t3_name = "third_secret_word" t3 = client.create_or_update_tool(third_secret_word) tool_rules = [InitToolRule(tool_name=t3_name)] tools = [t3] for config_file in config_files: agent_uuid = str(uuid.uuid4()) # Structured output model (should work with single init tool) agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) assert agent_state is not None cleanup(client=client, agent_uuid=agent_uuid) def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): """Test that the initial tool rule is enforced for the first message.""" client = create_client() # Create tool rules that require tool_a to be called first t1_name = "first_secret_word" t2_name = "second_secret_word" t1 = client.create_or_update_tool(first_secret_word) t2 = client.create_or_update_tool(second_secret_word) tool_rules = [ InitToolRule(tool_name=t1_name), ChildToolRule(tool_name=t1_name, children=[t2_name]), TerminalToolRule(tool_name=t2_name), ] tools = [t1, t2] # Make agent state anthropic_config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" for i in range(3): agent_uuid = str(uuid.uuid4()) agent_state = setup_agent( client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules ) response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") assert_sanity_checks(response) messages = response.messages assert_invoked_function_call(messages, "first_secret_word") assert_invoked_function_call(messages, "second_secret_word") tool_names = [t.name for t in [t1, t2]] tool_names += ["send_message"] for m in messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] print(f"Passed iteration {i}") cleanup(client=client, agent_uuid=agent_uuid) # Implement exponential backoff with initial time of 10 seconds if i < 2: backoff_time = 10 * (2**i) time.sleep(backoff_time) @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none): client = create_client() cleanup(client=client, agent_uuid=agent_uuid) send_message = client.server.tool_manager.get_tool_by_name(tool_name="send_message", actor=client.user) archival_memory_search = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_search", actor=client.user) archival_memory_insert = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_insert", actor=client.user) # Make tool rules tool_rules = [ InitToolRule(tool_name="archival_memory_search"), ChildToolRule(tool_name="archival_memory_search", children=["archival_memory_insert"]), ChildToolRule(tool_name="archival_memory_insert", children=["send_message"]), TerminalToolRule(tool_name="send_message"), ] tools = [send_message, archival_memory_search, archival_memory_insert] config_files = [ "tests/configs/llm_model_configs/claude-3-5-sonnet.json", "tests/configs/llm_model_configs/openai-gpt-4o.json", ] for config in config_files: max_retries = 3 last_error = None for attempt in range(max_retries): try: agent_state = setup_agent(client, config, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="hi. run archival memory search") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "archival_memory_search") assert_invoked_function_call(response.messages, "archival_memory_insert") assert_invoked_function_call(response.messages, "send_message") # Check ordering of tool calls tool_names = [t.name for t in [archival_memory_search, archival_memory_insert, send_message]] for m in response.messages: if isinstance(m, ToolCallMessage): # Check that it's equal to the first one assert m.tool_call.name == tool_names[0] # Pop out first one tool_names = tool_names[1:] print(f"Got successful response from client: \n\n{response}") break # Test passed, exit retry loop except AssertionError as e: last_error = e print(f"Attempt {attempt + 1} failed, retrying..." if attempt < max_retries - 1 else f"All {max_retries} attempts failed") cleanup(client=client, agent_uuid=agent_uuid) continue if last_error and attempt == max_retries - 1: raise last_error # Re-raise the last error if all retries failed cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_agent_conditional_tool_easy(mock_e2b_api_key_none): """ Test the agent with a conditional tool that has a child tool. Tool Flow: ------- | | | v -- flip_coin | v reveal_secret_word """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) coin_flip_name = "flip_coin" secret_word_tool = "fourth_secret_word" flip_coin_tool = client.create_or_update_tool(flip_coin) reveal_secret = client.create_or_update_tool(fourth_secret_word) # Make tool rules tool_rules = [ InitToolRule(tool_name=coin_flip_name), ConditionalToolRule( tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={ "hj2hwibbqm": secret_word_tool, }, ), TerminalToolRule(tool_name=secret_word_tool), ] tools = [flip_coin_tool, reveal_secret] config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="flip a coin until you get the secret word") # Make checks assert_sanity_checks(response) # Assert the tools were called assert_invoked_function_call(response.messages, "flip_coin") assert_invoked_function_call(response.messages, "fourth_secret_word") # Check ordering of tool calls found_secret_word = False for m in response.messages: if isinstance(m, ToolCallMessage): if m.tool_call.name == secret_word_tool: # Should be the last tool call found_secret_word = True else: # Before finding secret_word, only flip_coin should be called assert m.tool_call.name == coin_flip_name assert not found_secret_word # Ensure we found the secret word exactly once assert found_secret_word print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(90) # Longer timeout since this test has more steps def test_agent_conditional_tool_hard(mock_e2b_api_key_none): """ Test the agent with a complex conditional tool graph Tool Flow: can_play_game <---+ | | v | flip_coin -----+ | v fourth_secret_word """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools play_game = "can_play_game" coin_flip_name = "flip_coin_hard" final_tool = "fourth_secret_word" play_game_tool = client.create_or_update_tool(can_play_game) flip_coin_tool = client.create_or_update_tool(flip_coin_hard) reveal_secret = client.create_or_update_tool(fourth_secret_word) # Make tool rules - chain them together with conditional rules tool_rules = [ InitToolRule(tool_name=play_game), ConditionalToolRule( tool_name=play_game, default_child=play_game, # Keep trying if we can't play child_output_mapping={True: coin_flip_name}, # Only allow access when can_play_game returns True ), ConditionalToolRule( tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={"hj2hwibbqm": final_tool, "START_OVER": play_game} ), TerminalToolRule(tool_name=final_tool), ] # Setup agent with all tools tools = [play_game_tool, flip_coin_tool, reveal_secret] config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try to get all secret words response = client.user_message(agent_id=agent_state.id, message="hi") # Make checks assert_sanity_checks(response) # Assert all tools were called assert_invoked_function_call(response.messages, play_game) assert_invoked_function_call(response.messages, final_tool) # Check ordering of tool calls found_words = [] for m in response.messages: if isinstance(m, ToolCallMessage): name = m.tool_call.name if name in [play_game, coin_flip_name]: # Before finding secret_word, only can_play_game and flip_coin should be called assert name in [play_game, coin_flip_name] else: # Should find secret words in order expected_word = final_tool assert name == expected_word, f"Found {name} but expected {expected_word}" found_words.append(name) # Ensure we found all secret words in order assert found_words == [final_tool] print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none): """ Test the agent with a conditional tool that allows any child tool to be called if a function returns None. Tool Flow: return_none | v any tool... <-- When output doesn't match mapping, agent can call any tool """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools - we'll make several available to the agent tool_name = "return_none" tool = client.create_or_update_tool(return_none) secret_word = client.create_or_update_tool(first_secret_word) # Make tool rules - only map one output, let others be free choice tool_rules = [ InitToolRule(tool_name=tool_name), ConditionalToolRule( tool_name=tool_name, default_child=None, # Allow any tool to be called if output doesn't match child_output_mapping={"anything but none": "first_secret_word"}, ), ] tools = [tool, secret_word] # Setup agent with all tools agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Ask agent to try different tools based on the game output response = client.user_message(agent_id=agent_state.id, message="call a function, any function. then call send_message") # Make checks assert_sanity_checks(response) # Assert return_none was called assert_invoked_function_call(response.messages, tool_name) # Assert any base function called afterward found_any_tool = False found_return_none = False for m in response.messages: if isinstance(m, ToolCallMessage): if m.tool_call.name == tool_name: found_return_none = True elif found_return_none and m.tool_call.name: found_any_tool = True break assert found_any_tool, "Should have called any tool after return_none" print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) def test_agent_reload_remembers_function_response(mock_e2b_api_key_none): """ Test that when an agent is reloaded, it remembers the last function response for conditional tool chaining. Tool Flow: flip_coin | v fourth_secret_word <-- Should remember coin flip result after reload """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools flip_coin_name = "flip_coin" secret_word = "fourth_secret_word" flip_coin_tool = client.create_or_update_tool(flip_coin) secret_word_tool = client.create_or_update_tool(fourth_secret_word) # Make tool rules - map coin flip to fourth_secret_word tool_rules = [ InitToolRule(tool_name=flip_coin_name), ConditionalToolRule( tool_name=flip_coin_name, default_child=flip_coin_name, # Allow any tool to be called if output doesn't match child_output_mapping={"hj2hwibbqm": secret_word}, ), TerminalToolRule(tool_name=secret_word), ] tools = [flip_coin_tool, secret_word_tool] # Setup initial agent agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) # Call flip_coin first response = client.user_message(agent_id=agent_state.id, message="flip a coin") assert_invoked_function_call(response.messages, flip_coin_name) assert_invoked_function_call(response.messages, secret_word) found_fourth_secret = False for m in response.messages: if isinstance(m, ToolCallMessage) and m.tool_call.name == secret_word: found_fourth_secret = True break assert found_fourth_secret, "Reloaded agent should remember coin flip result and call fourth_secret_word if True" # Reload the agent reloaded_agent = client.server.load_agent(agent_id=agent_state.id, actor=client.user) assert reloaded_agent.last_function_response is not None print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) @pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely def test_simple_tool_rule(mock_e2b_api_key_none): """ Test a simple tool rule where fourth_secret_word must be called after flip_coin. Tool Flow: flip_coin | v fourth_secret_word """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools flip_coin_name = "flip_coin" secret_word = "fourth_secret_word" random_tool = "can_play_game" flip_coin_tool = client.create_or_update_tool(flip_coin) secret_word_tool = client.create_or_update_tool(fourth_secret_word) another_secret_word_tool = client.create_or_update_tool(first_secret_word) random_tool = client.create_or_update_tool(can_play_game) tools = [flip_coin_tool, secret_word_tool, another_secret_word_tool, random_tool] # Create tool rule: after flip_coin, must call fourth_secret_word tool_rule = ConditionalToolRule( tool_name=flip_coin_name, default_child=secret_word, child_output_mapping={"*": secret_word}, ) # Set up agent with the tool rule agent_state = setup_agent( client, config_file, agent_uuid, tool_rules=[tool_rule], tool_ids=[t.id for t in tools], include_base_tools=False ) # Start conversation response = client.user_message(agent_id=agent_state.id, message="Help me test the tools.") # Verify the tool calls tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)] assert len(tool_calls) >= 2 # Should have at least flip_coin and fourth_secret_word calls assert_invoked_function_call(response.messages, flip_coin_name) assert_invoked_function_call(response.messages, secret_word) # Find the flip_coin call flip_coin_call = next((call for call in tool_calls if call.tool_call.name == "flip_coin"), None) # Verify that fourth_secret_word was called after flip_coin flip_coin_call_index = tool_calls.index(flip_coin_call) assert tool_calls[flip_coin_call_index + 1].tool_call.name == secret_word, "Fourth secret word should be called after flip_coin" cleanup(client, agent_uuid=agent_state.id) def test_init_tool_rule_always_fails_one_tool(): """ Test an init tool rule that always fails when called. The agent has only one tool available. Once that tool fails and the agent removes that tool, the agent should have 0 tools available. This means that the agent should return from `step` early. """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools bad_tool = client.create_or_update_tool(auto_error) # Create tool rule: InitToolRule tool_rule = InitToolRule( tool_name=bad_tool.name, ) # Set up agent with the tool rule claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=False) # Start conversation response = client.user_message(agent_id=agent_state.id, message="blah blah blah") # Verify the tool calls tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)] assert len(tool_calls) >= 1 # Should have at least flip_coin and fourth_secret_word calls assert_invoked_function_call(response.messages, bad_tool.name) def test_init_tool_rule_always_fails_multiple_tools(): """ Test an init tool rule that always fails when called. The agent has only 1+ tools available. Once that tool fails and the agent removes that tool, the agent should have other tools available. """ client = create_client() cleanup(client=client, agent_uuid=agent_uuid) # Create tools bad_tool = client.create_or_update_tool(auto_error) # Create tool rule: InitToolRule tool_rule = InitToolRule( tool_name=bad_tool.name, ) # Set up agent with the tool rule claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=True) # Start conversation response = client.user_message(agent_id=agent_state.id, message="blah blah blah") # Verify the tool calls tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)] assert len(tool_calls) >= 1 # Should have at least flip_coin and fourth_secret_word calls assert_invoked_function_call(response.messages, bad_tool.name) def test_continue_tool_rule(): """Test the continue tool rule by forcing the send_message tool to continue""" client = create_client() cleanup(client=client, agent_uuid=agent_uuid) continue_tool_rule = ContinueToolRule( tool_name="send_message", ) terminal_tool_rule = TerminalToolRule( tool_name="core_memory_append", ) rules = [continue_tool_rule, terminal_tool_rule] core_memory_append_tool = client.get_tool_id("core_memory_append") send_message_tool = client.get_tool_id("send_message") # Set up agent with the tool rule claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json" agent_state = setup_agent( client, claude_config, agent_uuid, tool_rules=rules, tool_ids=[core_memory_append_tool, send_message_tool], include_base_tools=False, include_base_tool_rules=False, ) # Start conversation response = client.user_message(agent_id=agent_state.id, message="blah blah blah") # Verify the tool calls tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)] assert len(tool_calls) >= 1 assert_invoked_function_call(response.messages, "send_message") assert_invoked_function_call(response.messages, "core_memory_append") # ensure send_message called before core_memory_append send_message_call_index = None core_memory_append_call_index = None for i, call in enumerate(tool_calls): if call.tool_call.name == "send_message": send_message_call_index = i if call.tool_call.name == "core_memory_append": core_memory_append_call_index = i assert send_message_call_index < core_memory_append_call_index, "send_message should have been called before core_memory_append"