MemGPT/tests/integration_test_agent_tool_graph.py
Sarah Wooders 3fd29d8cc5 feat: add "always continue" tool rule and configure default tool rules (#1033)
Co-authored-by: Shubham Naik <shub@letta.com>
2025-02-19 14:46:37 -08:00

771 lines
28 KiB
Python

import time
import uuid
import pytest
from letta import create_client
from letta.schemas.letta_message import ToolCallMessage
from letta.schemas.tool_rule import ChildToolRule, ConditionalToolRule, ContinueToolRule, InitToolRule, TerminalToolRule
from tests.helpers.endpoints_helper import (
assert_invoked_function_call,
assert_invoked_send_message_with_keyword,
assert_sanity_checks,
setup_agent,
)
from tests.helpers.utils import cleanup
# Generate uuid for agent name for this example
namespace = uuid.NAMESPACE_DNS
agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph"))
config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json"
"""Contrived tools for this test case"""
def first_secret_word():
"""
Call this to retrieve the first secret word, which you will need for the second_secret_word function.
"""
return "v0iq020i0g"
def second_secret_word(prev_secret_word: str):
"""
Call this to retrieve the second secret word, which you will need for the third_secret_word function. If you get the word wrong, this function will error.
Args:
prev_secret_word (str): The secret word retrieved from calling first_secret_word.
"""
if prev_secret_word != "v0iq020i0g":
raise RuntimeError(f"Expected secret {'v0iq020i0g'}, got {prev_secret_word}")
return "4rwp2b4gxq"
def third_secret_word(prev_secret_word: str):
"""
Call this to retrieve the third secret word, which you will need for the fourth_secret_word function. If you get the word wrong, this function will error.
Args:
prev_secret_word (str): The secret word retrieved from calling second_secret_word.
"""
if prev_secret_word != "4rwp2b4gxq":
raise RuntimeError(f'Expected secret "4rwp2b4gxq", got {prev_secret_word}')
return "hj2hwibbqm"
def fourth_secret_word(prev_secret_word: str):
"""
Call this to retrieve the last secret word, which you will need to output in a send_message later. If you get the word wrong, this function will error.
Args:
prev_secret_word (str): The secret word retrieved from calling third_secret_word.
"""
if prev_secret_word != "hj2hwibbqm":
raise RuntimeError(f"Expected secret {'hj2hwibbqm'}, got {prev_secret_word}")
return "banana"
def flip_coin():
"""
Call this to retrieve the password to the secret word, which you will need to output in a send_message later.
If it returns an empty string, try flipping again!
Returns:
str: The password or an empty string
"""
import random
# Flip a coin with 50% chance
if random.random() < 0.5:
return ""
return "hj2hwibbqm"
def flip_coin_hard():
"""
Call this to retrieve the password to the secret word, which you will need to output in a send_message later.
If it returns an empty string, try flipping again!
Returns:
str: The password or an empty string
"""
import random
# Flip a coin with 50% chance
result = random.random()
if result < 0.5:
return ""
if result < 0.75:
return "START_OVER"
return "hj2hwibbqm"
def can_play_game():
"""
Call this to start the tool chain.
"""
import random
return random.random() < 0.5
def return_none():
"""
Really simple function
"""
return None
def auto_error():
"""
If you call this function, it will throw an error automatically.
"""
raise RuntimeError("This should never be called.")
@pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely
def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none):
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Add tools
t1 = client.create_or_update_tool(first_secret_word)
t2 = client.create_or_update_tool(second_secret_word)
t3 = client.create_or_update_tool(third_secret_word)
t4 = client.create_or_update_tool(fourth_secret_word)
t_err = client.create_or_update_tool(auto_error)
tools = [t1, t2, t3, t4, t_err]
# Make tool rules
tool_rules = [
InitToolRule(tool_name="first_secret_word"),
ChildToolRule(tool_name="first_secret_word", children=["second_secret_word"]),
ChildToolRule(tool_name="second_secret_word", children=["third_secret_word"]),
ChildToolRule(tool_name="third_secret_word", children=["fourth_secret_word"]),
ChildToolRule(tool_name="fourth_secret_word", children=["send_message"]),
TerminalToolRule(tool_name="send_message"),
]
# Make agent state
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
response = client.user_message(agent_id=agent_state.id, message="What is the fourth secret word?")
# Make checks
assert_sanity_checks(response)
# Assert the tools were called
assert_invoked_function_call(response.messages, "first_secret_word")
assert_invoked_function_call(response.messages, "second_secret_word")
assert_invoked_function_call(response.messages, "third_secret_word")
assert_invoked_function_call(response.messages, "fourth_secret_word")
# Check ordering of tool calls
tool_names = [t.name for t in [t1, t2, t3, t4]]
tool_names += ["send_message"]
for m in response.messages:
if isinstance(m, ToolCallMessage):
# Check that it's equal to the first one
assert m.tool_call.name == tool_names[0]
# Pop out first one
tool_names = tool_names[1:]
# Check final send message contains "done"
assert_invoked_send_message_with_keyword(response.messages, "banana")
print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
def test_check_tool_rules_with_different_models(mock_e2b_api_key_none):
"""Test that tool rules are properly checked for different model configurations."""
client = create_client()
config_files = [
"tests/configs/llm_model_configs/claude-3-5-sonnet.json",
"tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json",
"tests/configs/llm_model_configs/openai-gpt-4o.json",
]
# Create two test tools
t1_name = "first_secret_word"
t2_name = "second_secret_word"
t1 = client.create_or_update_tool(first_secret_word)
t2 = client.create_or_update_tool(second_secret_word)
tool_rules = [InitToolRule(tool_name=t1_name), InitToolRule(tool_name=t2_name)]
tools = [t1, t2]
for config_file in config_files:
# Setup tools
agent_uuid = str(uuid.uuid4())
if "gpt-4o" in config_file:
# Structured output model (should work with multiple init tools)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
assert agent_state is not None
else:
# Non-structured output model (should raise error with multiple init tools)
with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"):
setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
# Cleanup
cleanup(client=client, agent_uuid=agent_uuid)
# Create tool rule with single initial tool
t3_name = "third_secret_word"
t3 = client.create_or_update_tool(third_secret_word)
tool_rules = [InitToolRule(tool_name=t3_name)]
tools = [t3]
for config_file in config_files:
agent_uuid = str(uuid.uuid4())
# Structured output model (should work with single init tool)
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
assert agent_state is not None
cleanup(client=client, agent_uuid=agent_uuid)
def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none):
"""Test that the initial tool rule is enforced for the first message."""
client = create_client()
# Create tool rules that require tool_a to be called first
t1_name = "first_secret_word"
t2_name = "second_secret_word"
t1 = client.create_or_update_tool(first_secret_word)
t2 = client.create_or_update_tool(second_secret_word)
tool_rules = [
InitToolRule(tool_name=t1_name),
ChildToolRule(tool_name=t1_name, children=[t2_name]),
TerminalToolRule(tool_name=t2_name),
]
tools = [t1, t2]
# Make agent state
anthropic_config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
for i in range(3):
agent_uuid = str(uuid.uuid4())
agent_state = setup_agent(
client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules
)
response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?")
assert_sanity_checks(response)
messages = response.messages
assert_invoked_function_call(messages, "first_secret_word")
assert_invoked_function_call(messages, "second_secret_word")
tool_names = [t.name for t in [t1, t2]]
tool_names += ["send_message"]
for m in messages:
if isinstance(m, ToolCallMessage):
# Check that it's equal to the first one
assert m.tool_call.name == tool_names[0]
# Pop out first one
tool_names = tool_names[1:]
print(f"Passed iteration {i}")
cleanup(client=client, agent_uuid=agent_uuid)
# Implement exponential backoff with initial time of 10 seconds
if i < 2:
backoff_time = 10 * (2**i)
time.sleep(backoff_time)
@pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely
def test_agent_no_structured_output_with_one_child_tool(mock_e2b_api_key_none):
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
send_message = client.server.tool_manager.get_tool_by_name(tool_name="send_message", actor=client.user)
archival_memory_search = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_search", actor=client.user)
archival_memory_insert = client.server.tool_manager.get_tool_by_name(tool_name="archival_memory_insert", actor=client.user)
# Make tool rules
tool_rules = [
InitToolRule(tool_name="archival_memory_search"),
ChildToolRule(tool_name="archival_memory_search", children=["archival_memory_insert"]),
ChildToolRule(tool_name="archival_memory_insert", children=["send_message"]),
TerminalToolRule(tool_name="send_message"),
]
tools = [send_message, archival_memory_search, archival_memory_insert]
config_files = [
"tests/configs/llm_model_configs/claude-3-5-sonnet.json",
"tests/configs/llm_model_configs/openai-gpt-4o.json",
]
for config in config_files:
max_retries = 3
last_error = None
for attempt in range(max_retries):
try:
agent_state = setup_agent(client, config, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
response = client.user_message(agent_id=agent_state.id, message="hi. run archival memory search")
# Make checks
assert_sanity_checks(response)
# Assert the tools were called
assert_invoked_function_call(response.messages, "archival_memory_search")
assert_invoked_function_call(response.messages, "archival_memory_insert")
assert_invoked_function_call(response.messages, "send_message")
# Check ordering of tool calls
tool_names = [t.name for t in [archival_memory_search, archival_memory_insert, send_message]]
for m in response.messages:
if isinstance(m, ToolCallMessage):
# Check that it's equal to the first one
assert m.tool_call.name == tool_names[0]
# Pop out first one
tool_names = tool_names[1:]
print(f"Got successful response from client: \n\n{response}")
break # Test passed, exit retry loop
except AssertionError as e:
last_error = e
print(f"Attempt {attempt + 1} failed, retrying..." if attempt < max_retries - 1 else f"All {max_retries} attempts failed")
cleanup(client=client, agent_uuid=agent_uuid)
continue
if last_error and attempt == max_retries - 1:
raise last_error # Re-raise the last error if all retries failed
cleanup(client=client, agent_uuid=agent_uuid)
@pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely
def test_agent_conditional_tool_easy(mock_e2b_api_key_none):
"""
Test the agent with a conditional tool that has a child tool.
Tool Flow:
-------
| |
| v
-- flip_coin
|
v
reveal_secret_word
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
coin_flip_name = "flip_coin"
secret_word_tool = "fourth_secret_word"
flip_coin_tool = client.create_or_update_tool(flip_coin)
reveal_secret = client.create_or_update_tool(fourth_secret_word)
# Make tool rules
tool_rules = [
InitToolRule(tool_name=coin_flip_name),
ConditionalToolRule(
tool_name=coin_flip_name,
default_child=coin_flip_name,
child_output_mapping={
"hj2hwibbqm": secret_word_tool,
},
),
TerminalToolRule(tool_name=secret_word_tool),
]
tools = [flip_coin_tool, reveal_secret]
config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
response = client.user_message(agent_id=agent_state.id, message="flip a coin until you get the secret word")
# Make checks
assert_sanity_checks(response)
# Assert the tools were called
assert_invoked_function_call(response.messages, "flip_coin")
assert_invoked_function_call(response.messages, "fourth_secret_word")
# Check ordering of tool calls
found_secret_word = False
for m in response.messages:
if isinstance(m, ToolCallMessage):
if m.tool_call.name == secret_word_tool:
# Should be the last tool call
found_secret_word = True
else:
# Before finding secret_word, only flip_coin should be called
assert m.tool_call.name == coin_flip_name
assert not found_secret_word
# Ensure we found the secret word exactly once
assert found_secret_word
print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
@pytest.mark.timeout(90) # Longer timeout since this test has more steps
def test_agent_conditional_tool_hard(mock_e2b_api_key_none):
"""
Test the agent with a complex conditional tool graph
Tool Flow:
can_play_game <---+
| |
v |
flip_coin -----+
|
v
fourth_secret_word
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools
play_game = "can_play_game"
coin_flip_name = "flip_coin_hard"
final_tool = "fourth_secret_word"
play_game_tool = client.create_or_update_tool(can_play_game)
flip_coin_tool = client.create_or_update_tool(flip_coin_hard)
reveal_secret = client.create_or_update_tool(fourth_secret_word)
# Make tool rules - chain them together with conditional rules
tool_rules = [
InitToolRule(tool_name=play_game),
ConditionalToolRule(
tool_name=play_game,
default_child=play_game, # Keep trying if we can't play
child_output_mapping={True: coin_flip_name}, # Only allow access when can_play_game returns True
),
ConditionalToolRule(
tool_name=coin_flip_name, default_child=coin_flip_name, child_output_mapping={"hj2hwibbqm": final_tool, "START_OVER": play_game}
),
TerminalToolRule(tool_name=final_tool),
]
# Setup agent with all tools
tools = [play_game_tool, flip_coin_tool, reveal_secret]
config_file = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
# Ask agent to try to get all secret words
response = client.user_message(agent_id=agent_state.id, message="hi")
# Make checks
assert_sanity_checks(response)
# Assert all tools were called
assert_invoked_function_call(response.messages, play_game)
assert_invoked_function_call(response.messages, final_tool)
# Check ordering of tool calls
found_words = []
for m in response.messages:
if isinstance(m, ToolCallMessage):
name = m.tool_call.name
if name in [play_game, coin_flip_name]:
# Before finding secret_word, only can_play_game and flip_coin should be called
assert name in [play_game, coin_flip_name]
else:
# Should find secret words in order
expected_word = final_tool
assert name == expected_word, f"Found {name} but expected {expected_word}"
found_words.append(name)
# Ensure we found all secret words in order
assert found_words == [final_tool]
print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
@pytest.mark.timeout(60)
def test_agent_conditional_tool_without_default_child(mock_e2b_api_key_none):
"""
Test the agent with a conditional tool that allows any child tool to be called if a function returns None.
Tool Flow:
return_none
|
v
any tool... <-- When output doesn't match mapping, agent can call any tool
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools - we'll make several available to the agent
tool_name = "return_none"
tool = client.create_or_update_tool(return_none)
secret_word = client.create_or_update_tool(first_secret_word)
# Make tool rules - only map one output, let others be free choice
tool_rules = [
InitToolRule(tool_name=tool_name),
ConditionalToolRule(
tool_name=tool_name,
default_child=None, # Allow any tool to be called if output doesn't match
child_output_mapping={"anything but none": "first_secret_word"},
),
]
tools = [tool, secret_word]
# Setup agent with all tools
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
# Ask agent to try different tools based on the game output
response = client.user_message(agent_id=agent_state.id, message="call a function, any function. then call send_message")
# Make checks
assert_sanity_checks(response)
# Assert return_none was called
assert_invoked_function_call(response.messages, tool_name)
# Assert any base function called afterward
found_any_tool = False
found_return_none = False
for m in response.messages:
if isinstance(m, ToolCallMessage):
if m.tool_call.name == tool_name:
found_return_none = True
elif found_return_none and m.tool_call.name:
found_any_tool = True
break
assert found_any_tool, "Should have called any tool after return_none"
print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
@pytest.mark.timeout(60)
def test_agent_reload_remembers_function_response(mock_e2b_api_key_none):
"""
Test that when an agent is reloaded, it remembers the last function response for conditional tool chaining.
Tool Flow:
flip_coin
|
v
fourth_secret_word <-- Should remember coin flip result after reload
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools
flip_coin_name = "flip_coin"
secret_word = "fourth_secret_word"
flip_coin_tool = client.create_or_update_tool(flip_coin)
secret_word_tool = client.create_or_update_tool(fourth_secret_word)
# Make tool rules - map coin flip to fourth_secret_word
tool_rules = [
InitToolRule(tool_name=flip_coin_name),
ConditionalToolRule(
tool_name=flip_coin_name,
default_child=flip_coin_name, # Allow any tool to be called if output doesn't match
child_output_mapping={"hj2hwibbqm": secret_word},
),
TerminalToolRule(tool_name=secret_word),
]
tools = [flip_coin_tool, secret_word_tool]
# Setup initial agent
agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules)
# Call flip_coin first
response = client.user_message(agent_id=agent_state.id, message="flip a coin")
assert_invoked_function_call(response.messages, flip_coin_name)
assert_invoked_function_call(response.messages, secret_word)
found_fourth_secret = False
for m in response.messages:
if isinstance(m, ToolCallMessage) and m.tool_call.name == secret_word:
found_fourth_secret = True
break
assert found_fourth_secret, "Reloaded agent should remember coin flip result and call fourth_secret_word if True"
# Reload the agent
reloaded_agent = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
assert reloaded_agent.last_function_response is not None
print(f"Got successful response from client: \n\n{response}")
cleanup(client=client, agent_uuid=agent_uuid)
@pytest.mark.timeout(60) # Sets a 60-second timeout for the test since this could loop infinitely
def test_simple_tool_rule(mock_e2b_api_key_none):
"""
Test a simple tool rule where fourth_secret_word must be called after flip_coin.
Tool Flow:
flip_coin
|
v
fourth_secret_word
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools
flip_coin_name = "flip_coin"
secret_word = "fourth_secret_word"
random_tool = "can_play_game"
flip_coin_tool = client.create_or_update_tool(flip_coin)
secret_word_tool = client.create_or_update_tool(fourth_secret_word)
another_secret_word_tool = client.create_or_update_tool(first_secret_word)
random_tool = client.create_or_update_tool(can_play_game)
tools = [flip_coin_tool, secret_word_tool, another_secret_word_tool, random_tool]
# Create tool rule: after flip_coin, must call fourth_secret_word
tool_rule = ConditionalToolRule(
tool_name=flip_coin_name,
default_child=secret_word,
child_output_mapping={"*": secret_word},
)
# Set up agent with the tool rule
agent_state = setup_agent(
client, config_file, agent_uuid, tool_rules=[tool_rule], tool_ids=[t.id for t in tools], include_base_tools=False
)
# Start conversation
response = client.user_message(agent_id=agent_state.id, message="Help me test the tools.")
# Verify the tool calls
tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)]
assert len(tool_calls) >= 2 # Should have at least flip_coin and fourth_secret_word calls
assert_invoked_function_call(response.messages, flip_coin_name)
assert_invoked_function_call(response.messages, secret_word)
# Find the flip_coin call
flip_coin_call = next((call for call in tool_calls if call.tool_call.name == "flip_coin"), None)
# Verify that fourth_secret_word was called after flip_coin
flip_coin_call_index = tool_calls.index(flip_coin_call)
assert tool_calls[flip_coin_call_index + 1].tool_call.name == secret_word, "Fourth secret word should be called after flip_coin"
cleanup(client, agent_uuid=agent_state.id)
def test_init_tool_rule_always_fails_one_tool():
"""
Test an init tool rule that always fails when called. The agent has only one tool available.
Once that tool fails and the agent removes that tool, the agent should have 0 tools available.
This means that the agent should return from `step` early.
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools
bad_tool = client.create_or_update_tool(auto_error)
# Create tool rule: InitToolRule
tool_rule = InitToolRule(
tool_name=bad_tool.name,
)
# Set up agent with the tool rule
claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=False)
# Start conversation
response = client.user_message(agent_id=agent_state.id, message="blah blah blah")
# Verify the tool calls
tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)]
assert len(tool_calls) >= 1 # Should have at least flip_coin and fourth_secret_word calls
assert_invoked_function_call(response.messages, bad_tool.name)
def test_init_tool_rule_always_fails_multiple_tools():
"""
Test an init tool rule that always fails when called. The agent has only 1+ tools available.
Once that tool fails and the agent removes that tool, the agent should have other tools available.
"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
# Create tools
bad_tool = client.create_or_update_tool(auto_error)
# Create tool rule: InitToolRule
tool_rule = InitToolRule(
tool_name=bad_tool.name,
)
# Set up agent with the tool rule
claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
agent_state = setup_agent(client, claude_config, agent_uuid, tool_rules=[tool_rule], tool_ids=[bad_tool.id], include_base_tools=True)
# Start conversation
response = client.user_message(agent_id=agent_state.id, message="blah blah blah")
# Verify the tool calls
tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)]
assert len(tool_calls) >= 1 # Should have at least flip_coin and fourth_secret_word calls
assert_invoked_function_call(response.messages, bad_tool.name)
def test_continue_tool_rule():
"""Test the continue tool rule by forcing the send_message tool to continue"""
client = create_client()
cleanup(client=client, agent_uuid=agent_uuid)
continue_tool_rule = ContinueToolRule(
tool_name="send_message",
)
terminal_tool_rule = TerminalToolRule(
tool_name="core_memory_append",
)
rules = [continue_tool_rule, terminal_tool_rule]
core_memory_append_tool = client.get_tool_id("core_memory_append")
send_message_tool = client.get_tool_id("send_message")
# Set up agent with the tool rule
claude_config = "tests/configs/llm_model_configs/claude-3-5-sonnet.json"
agent_state = setup_agent(
client,
claude_config,
agent_uuid,
tool_rules=rules,
tool_ids=[core_memory_append_tool, send_message_tool],
include_base_tools=False,
include_base_tool_rules=False,
)
# Start conversation
response = client.user_message(agent_id=agent_state.id, message="blah blah blah")
# Verify the tool calls
tool_calls = [msg for msg in response.messages if isinstance(msg, ToolCallMessage)]
assert len(tool_calls) >= 1
assert_invoked_function_call(response.messages, "send_message")
assert_invoked_function_call(response.messages, "core_memory_append")
# ensure send_message called before core_memory_append
send_message_call_index = None
core_memory_append_call_index = None
for i, call in enumerate(tool_calls):
if call.tool_call.name == "send_message":
send_message_call_index = i
if call.tool_call.name == "core_memory_append":
core_memory_append_call_index = i
assert send_message_call_index < core_memory_append_call_index, "send_message should have been called before core_memory_append"