MemGPT/tests/integration_test_experimental.py

import os
import threading
import time
import uuid

import httpx
import openai
import pytest
from dotenv import load_dotenv
from letta_client import CreateBlock, Letta, MessageCreate, TextContent
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk

from letta.agents.letta_agent import LettaAgent
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import MessageStreamStatus
from letta.schemas.letta_message_content import TextContent as LettaTextContent
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import MessageCreate as LettaMessageCreate
from letta.schemas.tool import ToolCreate
from letta.schemas.usage import LettaUsageStatistics
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.services.tool_manager import ToolManager
from letta.services.user_manager import UserManager
from letta.settings import model_settings

# --- Server Management --- #


def _run_server():
    """Starts the Letta server in a background thread."""
    load_dotenv()
    from letta.server.rest_api.app import start_server

    start_server(debug=True)


@pytest.fixture(scope="session")
def server_url():
    """Ensures a server is running and returns its base URL."""
    url = os.getenv("LETTA_SERVER_URL", "http://localhost:8283")

    if not os.getenv("LETTA_SERVER_URL"):
        thread = threading.Thread(target=_run_server, daemon=True)
        thread.start()
        time.sleep(5)  # Allow server startup time

    return url


# --- Client Setup --- #


@pytest.fixture(scope="session")
def client(server_url):
    """Creates a REST client for testing."""
    client = Letta(base_url=server_url)
    # llm_config = LLMConfig(
    #     model="claude-3-7-sonnet-latest",
    #     model_endpoint_type="anthropic",
    #     model_endpoint="https://api.anthropic.com/v1",
    #     context_window=32000,
    #     handle=f"anthropic/claude-3-7-sonnet-latest",
    #     put_inner_thoughts_in_kwargs=True,
    #     max_tokens=4096,
    # )
    #
    # client = create_client(base_url=server_url, token=None)
    # client.set_default_llm_config(llm_config)
    # client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
    yield client


@pytest.fixture(scope="function")
def roll_dice_tool(client):
    def roll_dice():
        """
        Rolls a 6 sided die.

        Returns:
            str: The roll result.
        """
        import time

        time.sleep(1)
        return "Rolled a 10!"

    # tool = client.create_or_update_tool(func=roll_dice)
    tool = client.tools.upsert_from_function(func=roll_dice)
    # Yield the created tool
    yield tool


@pytest.fixture(scope="function")
def weather_tool(client):
    def get_weather(location: str) -> str:
        """
        Fetches the current weather for a given location.

        Parameters:
            location (str): The location to get the weather for.

        Returns:
            str: A formatted string describing the weather in the given location.

        Raises:
            RuntimeError: If the request to fetch weather data fails.
        """
        import time

        import requests

        time.sleep(5)

        url = f"https://wttr.in/{location}?format=%C+%t"

        response = requests.get(url)
        if response.status_code == 200:
            weather_data = response.text
            return f"The weather in {location} is {weather_data}."
        else:
            raise RuntimeError(f"Failed to get weather data, status code: {response.status_code}")

    # tool = client.create_or_update_tool(func=get_weather)
    tool = client.tools.upsert_from_function(func=get_weather)
    # Yield the created tool
    yield tool


@pytest.fixture(scope="function")
def rethink_tool(client):
    def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: str) -> str:  # type: ignore
        """
        Re-evaluate the memory in block_name, integrating new and updated facts.
        Replace outdated information with the most likely truths, avoiding redundancy with original memories.
        Ensure consistency with other memory blocks.

        Args:
            new_memory (str): The new memory with information integrated from the memory block. If there is no new information, then this should be the same as the content in the source block.
            target_block_label (str): The name of the block to write to.
        Returns:
            str: None is always returned as this function does not produce a response.
        """
        agent_state.memory.update_block_value(label=target_block_label, value=new_memory)
        return None

    tool = client.tools.upsert_from_function(func=rethink_memory)
    # Yield the created tool
    yield tool


@pytest.fixture(scope="function")
def composio_gmail_get_profile_tool(default_user):
    tool_create = ToolCreate.from_composio(action_name="GMAIL_GET_PROFILE")
    tool = ToolManager().create_or_update_composio_tool(tool_create=tool_create, actor=default_user)
    yield tool


@pytest.fixture(scope="function")
def agent_state(client, roll_dice_tool, weather_tool, rethink_tool):
    """Creates an agent and ensures cleanup after tests."""
    # llm_config = LLMConfig(
    #     model="claude-3-7-sonnet-latest",
    #     model_endpoint_type="anthropic",
    #     model_endpoint="https://api.anthropic.com/v1",
    #     context_window=32000,
    #     handle=f"anthropic/claude-3-7-sonnet-latest",
    #     put_inner_thoughts_in_kwargs=True,
    #     max_tokens=4096,
    # )
    agent_state = client.agents.create(
        name=f"test_compl_{str(uuid.uuid4())[5:]}",
        tool_ids=[roll_dice_tool.id, weather_tool.id, rethink_tool.id],
        include_base_tools=True,
        memory_blocks=[
            {
                "label": "human",
                "value": "Name: Matt",
            },
            {
                "label": "persona",
                "value": "Friendly agent",
            },
        ],
        llm_config=LLMConfig.default_config(model_name="gpt-4o-mini"),
        embedding_config=EmbeddingConfig.default_config(provider="openai"),
    )
    yield agent_state
    client.agents.delete(agent_state.id)


@pytest.fixture(scope="function")
def openai_client(client, roll_dice_tool, weather_tool):
    """Creates an agent and ensures cleanup after tests."""
    client = openai.AsyncClient(
        api_key=model_settings.anthropic_api_key,
        base_url="https://api.anthropic.com/v1/",
        max_retries=0,
        http_client=httpx.AsyncClient(
            timeout=httpx.Timeout(connect=15.0, read=30.0, write=15.0, pool=15.0),
            follow_redirects=True,
            limits=httpx.Limits(
                max_connections=50,
                max_keepalive_connections=50,
                keepalive_expiry=120,
            ),
        ),
    )
    yield client


# --- Helper Functions --- #


def _assert_valid_chunk(chunk, idx, chunks):
    """Validates the structure of each streaming chunk."""
    if isinstance(chunk, ChatCompletionChunk):
        assert chunk.choices, "Each ChatCompletionChunk should have at least one choice."

    elif isinstance(chunk, LettaUsageStatistics):
        assert chunk.completion_tokens > 0, "Completion tokens must be > 0."
        assert chunk.prompt_tokens > 0, "Prompt tokens must be > 0."
        assert chunk.total_tokens > 0, "Total tokens must be > 0."
        assert chunk.step_count == 1, "Step count must be 1."

    elif isinstance(chunk, MessageStreamStatus):
        assert chunk == MessageStreamStatus.done, "Stream should end with 'done' status."
        assert idx == len(chunks) - 1, "The last chunk must be 'done'."

    else:
        pytest.fail(f"Unexpected chunk type: {chunk}")


# --- Test Cases --- #


@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["What is the weather today in SF?"])
async def test_new_agent_loop(disable_e2b_api_key, openai_client, agent_state, message):
    actor = UserManager().get_user_or_default(user_id="asf")
    agent = LettaAgent(
        agent_id=agent_state.id,
        message_manager=MessageManager(),
        agent_manager=AgentManager(),
        block_manager=BlockManager(),
        passage_manager=PassageManager(),
        actor=actor,
    )

    response = await agent.step([LettaMessageCreate(role="user", content=[LettaTextContent(text=message)])])


@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Use your rethink tool to rethink the human memory considering Matt likes chicken."])
async def test_rethink_tool(disable_e2b_api_key, openai_client, agent_state, message):
    actor = UserManager().get_user_or_default(user_id="asf")
    agent = LettaAgent(
        agent_id=agent_state.id,
        message_manager=MessageManager(),
        agent_manager=AgentManager(),
        block_manager=BlockManager(),
        passage_manager=PassageManager(),
        actor=actor,
    )

    assert "chicken" not in AgentManager().get_agent_by_id(agent_state.id, actor).memory.get_block("human").value
    response = await agent.step([LettaMessageCreate(role="user", content=[LettaTextContent(text=message)])])
    assert "chicken" in AgentManager().get_agent_by_id(agent_state.id, actor).memory.get_block("human").value


@pytest.mark.asyncio
async def test_multi_agent_broadcast(disable_e2b_api_key, client, openai_client, weather_tool):
    actor = UserManager().get_user_or_default(user_id="asf")

    stale_agents = AgentManager().list_agents(actor=actor, limit=300)
    for agent in stale_agents:
        AgentManager().delete_agent(agent_id=agent.id, actor=actor)

    manager_agent_state = client.agents.create(
        name=f"manager",
        include_base_tools=True,
        include_multi_agent_tools=True,
        tags=["manager"],
        model="openai/gpt-4o",
        embedding="letta/letta-free",
    )
    manager_agent = LettaAgent(
        agent_id=manager_agent_state.id,
        message_manager=MessageManager(),
        agent_manager=AgentManager(),
        block_manager=BlockManager(),
        passage_manager=PassageManager(),
        actor=actor,
    )

    tag = "subagent"
    workers = []
    for idx in range(30):
        workers.append(
            client.agents.create(
                name=f"worker_{idx}",
                include_base_tools=True,
                tags=[tag],
                tool_ids=[weather_tool.id],
                model="openai/gpt-4o",
                embedding="letta/letta-free",
            ),
        )

    response = await manager_agent.step(
        [
            LettaMessageCreate(
                role="user",
                content=[
                    LettaTextContent(
                        text=(
                            "Use the `send_message_to_agents_matching_tags` tool to send a message to agents with "
                            "tag 'subagent' asking them to check the weather in Seattle."
                        )
                    ),
                ],
            ),
        ]
    )


def test_multi_agent_broadcast_client(client: Letta, weather_tool):
    # delete any existing worker agents
    workers = client.agents.list(tags=["worker"])
    for worker in workers:
        client.agents.delete(agent_id=worker.id)

    # create worker agents
    num_workers = 10
    for idx in range(num_workers):
        client.agents.create(
            name=f"worker_{idx}",
            include_base_tools=True,
            tags=["worker"],
            tool_ids=[weather_tool.id],
            model="anthropic/claude-3-5-sonnet-20241022",
            embedding="letta/letta-free",
        )

    # create supervisor agent
    supervisor = client.agents.create(
        name="supervisor",
        include_base_tools=True,
        include_multi_agent_tools=True,
        model="anthropic/claude-3-5-sonnet-20241022",
        embedding="letta/letta-free",
        tags=["supervisor"],
    )

    # send a message to the supervisor
    import time

    start = time.perf_counter()
    response = client.agents.messages.create(
        agent_id=supervisor.id,
        messages=[
            MessageCreate(
                role="user",
                content=[
                    TextContent(
                        text="Use the `send_message_to_agents_matching_tags` tool to send a message to agents with tag 'worker' asking them to check the weather in Seattle."
                    )
                ],
            )
        ],
    )
    end = time.perf_counter()
    print("TIME ELAPSED: " + str(end - start))
    for message in response.messages:
        print(message)


def test_call_weather(client: Letta, weather_tool):
    # delete any existing worker agents
    workers = client.agents.list(tags=["worker", "supervisor"])
    for worker in workers:
        client.agents.delete(agent_id=worker.id)

    # create supervisor agent
    supervisor = client.agents.create(
        name="supervisor",
        include_base_tools=True,
        tool_ids=[weather_tool.id],
        model="openai/gpt-4o",
        embedding="letta/letta-free",
        tags=["supervisor"],
    )

    # send a message to the supervisor
    import time

    start = time.perf_counter()
    response = client.agents.messages.create(
        agent_id=supervisor.id,
        messages=[
            {
                "role": "user",
                "content": "What's the weather like in Seattle?",
            }
        ],
    )
    end = time.perf_counter()
    print("TIME ELAPSED: " + str(end - start))
    for message in response.messages:
        print(message)


def run_supervisor_worker_group(client: Letta, weather_tool, group_id: str):
    # Delete any existing agents for this group (if rerunning)
    existing_workers = client.agents.list(tags=[f"worker-{group_id}"])
    for worker in existing_workers:
        client.agents.delete(agent_id=worker.id)

    # Create worker agents
    num_workers = 50
    for idx in range(num_workers):
        client.agents.create(
            name=f"worker_{group_id}_{idx}",
            include_base_tools=True,
            tags=[f"worker-{group_id}"],
            tool_ids=[weather_tool.id],
            model="anthropic/claude-3-5-sonnet-20241022",
            embedding="letta/letta-free",
        )

    # Create supervisor agent
    supervisor = client.agents.create(
        name=f"supervisor_{group_id}",
        include_base_tools=True,
        include_multi_agent_tools=True,
        model="anthropic/claude-3-5-sonnet-20241022",
        embedding="letta/letta-free",
        tags=[f"supervisor-{group_id}"],
    )

    # Send message to supervisor to broadcast to workers
    response = client.agents.messages.create(
        agent_id=supervisor.id,
        messages=[
            {
                "role": "user",
                "content": "Use the `send_message_to_agents_matching_tags` tool to send a message to agents with tag "
                f"'worker-{group_id}' asking them to check the weather in Seattle.",
            }
        ],
    )

    return response


def test_anthropic_streaming(client: Letta):
    agent_name = "anthropic_tester"

    existing_agents = client.agents.list(tags=[agent_name])
    for worker in existing_agents:
        client.agents.delete(agent_id=worker.id)

    llm_config = LLMConfig(
        model="claude-3-7-sonnet-20250219",
        model_endpoint_type="anthropic",
        model_endpoint="https://api.anthropic.com/v1",
        context_window=32000,
        handle=f"anthropic/claude-3-5-sonnet-20241022",
        put_inner_thoughts_in_kwargs=False,
        max_tokens=4096,
        enable_reasoner=True,
        max_reasoning_tokens=1024,
    )

    agent = client.agents.create(
        name=agent_name,
        tags=[agent_name],
        include_base_tools=True,
        embedding="letta/letta-free",
        llm_config=llm_config,
        memory_blocks=[CreateBlock(label="human", value="")],
        # tool_rules=[InitToolRule(tool_name="core_memory_append")]
    )

    response = client.agents.messages.create_stream(
        agent_id=agent.id,
        messages=[
            MessageCreate(
                role="user",
                content=[TextContent(text="Use the core memory append tool to append `banana` to the persona core memory.")],
            ),
        ],
        stream_tokens=True,
    )

    print(list(response))


import time


def test_create_agents_telemetry(client: Letta):
    start_total = time.perf_counter()

    # delete any existing worker agents
    start_delete = time.perf_counter()
    workers = client.agents.list(tags=["worker"])
    for worker in workers:
        client.agents.delete(agent_id=worker.id)
    end_delete = time.perf_counter()
    print(f"[telemetry] Deleted {len(workers)} existing worker agents in {end_delete - start_delete:.2f}s")

    # create worker agents
    num_workers = 1
    agent_times = []
    for idx in range(num_workers):
        start = time.perf_counter()
        client.agents.create(
            name=f"worker_{idx}",
            include_base_tools=True,
            model="anthropic/claude-3-5-sonnet-20241022",
            embedding="letta/letta-free",
        )
        end = time.perf_counter()
        duration = end - start
        agent_times.append(duration)
        print(f"[telemetry] Created worker_{idx} in {duration:.2f}s")

    total_duration = time.perf_counter() - start_total
    avg_duration = sum(agent_times) / len(agent_times)

    print(f"[telemetry] Total time to create {num_workers} agents: {total_duration:.2f}s")
    print(f"[telemetry] Average agent creation time: {avg_duration:.2f}s")
    print(f"[telemetry] Fastest agent: {min(agent_times):.2f}s, Slowest agent: {max(agent_times):.2f}s")