import os import threading import time from datetime import datetime, timezone from typing import Optional from unittest.mock import AsyncMock import pytest from anthropic.types import BetaErrorResponse, BetaRateLimitError from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaUsage from anthropic.types.beta.messages import ( BetaMessageBatch, BetaMessageBatchErroredResult, BetaMessageBatchIndividualResponse, BetaMessageBatchRequestCounts, BetaMessageBatchSucceededResult, ) from dotenv import load_dotenv from letta_client import Letta from letta.config import LettaConfig from letta.helpers import ToolRulesSolver from letta.jobs.llm_batch_job_polling import poll_running_llm_batches from letta.orm import Base from letta.schemas.agent import AgentStepState, CreateAgent from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import JobStatus, ProviderType from letta.schemas.job import BatchJob from letta.schemas.llm_config import LLMConfig from letta.schemas.tool_rule import InitToolRule from letta.server.db import db_context from letta.server.server import SyncServer from letta.services.agent_manager import AgentManager # --- Server and Database Management --- # @pytest.fixture(autouse=True) def _clear_tables(): with db_context() as session: for table in reversed(Base.metadata.sorted_tables): # Reverse to avoid FK issues # If this is the block_history table, skip it if table.name == "block_history": continue session.execute(table.delete()) # Truncate table session.commit() def _run_server(): """Starts the Letta server in a background thread.""" load_dotenv() from letta.server.rest_api.app import start_server start_server(debug=True) @pytest.fixture(scope="session") def server_url(): """Ensures a server is running and returns its base URL.""" url = os.getenv("LETTA_SERVER_URL", "http://localhost:8283") if not os.getenv("LETTA_SERVER_URL"): thread = threading.Thread(target=_run_server, daemon=True) thread.start() time.sleep(5) # Allow server startup time return url @pytest.fixture(scope="module") def server(): config = LettaConfig.load() print("CONFIG PATH", config.config_path) config.save() return SyncServer() @pytest.fixture(scope="session") def client(server_url): """Creates a REST client for testing.""" return Letta(base_url=server_url) # --- Dummy Response Factories --- # def create_batch_response(batch_id: str, processing_status: str = "in_progress") -> BetaMessageBatch: """Create a dummy BetaMessageBatch with the specified ID and status.""" now = datetime(2024, 8, 20, 18, 37, 24, 100435, tzinfo=timezone.utc) return BetaMessageBatch( id=batch_id, archived_at=now, cancel_initiated_at=now, created_at=now, ended_at=now, expires_at=now, processing_status=processing_status, request_counts=BetaMessageBatchRequestCounts( canceled=10, errored=30, expired=10, processing=100, succeeded=50, ), results_url=None, type="message_batch", ) def create_successful_response(custom_id: str) -> BetaMessageBatchIndividualResponse: """Create a dummy successful batch response.""" return BetaMessageBatchIndividualResponse( custom_id=custom_id, result=BetaMessageBatchSucceededResult( type="succeeded", message=BetaMessage( id="msg_abc123", role="assistant", type="message", model="claude-3-5-sonnet-20240620", content=[{"type": "text", "text": "hi!"}], usage={"input_tokens": 5, "output_tokens": 7}, stop_reason="end_turn", ), ), ) def create_failed_response(custom_id: str) -> BetaMessageBatchIndividualResponse: """Create a dummy failed batch response with a rate limit error.""" return BetaMessageBatchIndividualResponse( custom_id=custom_id, result=BetaMessageBatchErroredResult( type="errored", error=BetaErrorResponse(type="error", error=BetaRateLimitError(type="rate_limit_error", message="Rate limit hit.")), ), ) # --- Test Setup Helpers --- # def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-3-5-sonnet-20241022"): """Create a test agent with standardized configuration.""" dummy_llm_config = LLMConfig( model="claude-3-7-sonnet-latest", model_endpoint_type="anthropic", model_endpoint="https://api.anthropic.com/v1", context_window=32000, handle=f"anthropic/claude-3-7-sonnet-latest", put_inner_thoughts_in_kwargs=True, max_tokens=4096, ) dummy_embedding_config = EmbeddingConfig( embedding_model="letta-free", embedding_endpoint_type="hugging-face", embedding_endpoint="https://embeddings.memgpt.ai", embedding_dim=1024, embedding_chunk_size=300, handle="letta/letta-free", ) agent_manager = AgentManager() agent_create = CreateAgent( name=name, include_base_tools=False, model=model, tags=["test_agents"], llm_config=dummy_llm_config, embedding_config=dummy_embedding_config, ) return agent_manager.create_agent(agent_create=agent_create, actor=actor, _test_only_force_id=test_id) async def create_test_letta_batch_job_async(server, default_user): """Create a test batch job with the given batch response.""" return await server.job_manager.create_job_async(BatchJob(user_id=default_user.id), actor=default_user) async def create_test_llm_batch_job_async(server, batch_response, default_user): """Create a test batch job with the given batch response.""" letta_batch_job = await create_test_letta_batch_job_async(server, default_user) return await server.batch_manager.create_llm_batch_job_async( llm_provider=ProviderType.anthropic, create_batch_response=batch_response, actor=default_user, status=JobStatus.running, letta_batch_job_id=letta_batch_job.id, ) def create_test_batch_item(server, batch_id, agent_id, default_user): """Create a test batch item for the given batch and agent.""" dummy_llm_config = LLMConfig( model="claude-3-7-sonnet-latest", model_endpoint_type="anthropic", model_endpoint="https://api.anthropic.com/v1", context_window=32000, handle=f"anthropic/claude-3-7-sonnet-latest", put_inner_thoughts_in_kwargs=True, max_tokens=4096, ) common_step_state = AgentStepState( step_number=1, tool_rules_solver=ToolRulesSolver(tool_rules=[InitToolRule(tool_name="send_message")]) ) return server.batch_manager.create_llm_batch_item( llm_batch_id=batch_id, agent_id=agent_id, llm_config=dummy_llm_config, step_state=common_step_state, actor=default_user, ) def mock_anthropic_client(server, batch_a_resp, batch_b_resp, agent_b_id, agent_c_id): """Set up mocks for the Anthropic client's retrieve and results methods.""" # Mock the retrieve method async def dummy_retrieve(batch_resp_id: str) -> BetaMessageBatch: if batch_resp_id == batch_a_resp.id: return batch_a_resp elif batch_resp_id == batch_b_resp.id: return batch_b_resp else: raise ValueError(f"Unknown batch response id: {batch_resp_id}") server.anthropic_async_client.beta.messages.batches.retrieve = AsyncMock(side_effect=dummy_retrieve) class DummyAsyncIterable: def __init__(self, items): # copy so we can .pop() self._items = list(items) def __aiter__(self): return self async def __anext__(self): if not self._items: raise StopAsyncIteration return self._items.pop(0) # Mock the results method async def dummy_results(batch_resp_id: str): if batch_resp_id != batch_b_resp.id: raise RuntimeError("Unexpected batch ID") return DummyAsyncIterable( [ create_successful_response(agent_b_id), create_failed_response(agent_c_id), ] ) server.anthropic_async_client.beta.messages.batches.results = dummy_results # ----------------------------- # End-to-End Test # ----------------------------- @pytest.mark.asyncio(loop_scope="session") async def test_polling_simple_real_batch(client, default_user, server): # --- Step 1: Prepare test data --- # Create batch responses with different statuses # NOTE: This is a REAL batch id! # For letta admins: https://console.anthropic.com/workspaces/default/batches?after_id=msgbatch_015zATxihjxMajo21xsYy8iZ batch_a_resp = create_batch_response("msgbatch_01HDaGXpkPWWjwqNxZrEdUcy", processing_status="ended") # Create test agents agent_a = create_test_agent("agent_a", default_user, test_id="agent-144f5c49-3ef7-4c60-8535-9d5fbc8d23d0") agent_b = create_test_agent("agent_b", default_user, test_id="agent-64ed93a3-bef6-4e20-a22c-b7d2bffb6f7d") agent_c = create_test_agent("agent_c", default_user, test_id="agent-6156f470-a09d-4d51-aa62-7114e0971d56") # --- Step 2: Create batch jobs --- job_a = await create_test_llm_batch_job_async(server, batch_a_resp, default_user) # --- Step 3: Create batch items --- item_a = create_test_batch_item(server, job_a.id, agent_a.id, default_user) item_b = create_test_batch_item(server, job_a.id, agent_b.id, default_user) item_c = create_test_batch_item(server, job_a.id, agent_c.id, default_user) print("HI") print(agent_a.id) print(agent_b.id) print(agent_c.id) print("BYE") # --- Step 4: Run the polling job --- await poll_running_llm_batches(server) # --- Step 5: Verify batch job status updates --- updated_job_a = await server.batch_manager.get_llm_batch_job_by_id_async(llm_batch_id=job_a.id, actor=default_user) assert updated_job_a.status == JobStatus.completed # Both jobs should have been polled assert updated_job_a.last_polled_at is not None assert updated_job_a.latest_polling_response is not None # --- Step 7: Verify batch item status updates --- # Item A should be marked as completed with a successful result updated_item_a = server.batch_manager.get_llm_batch_item_by_id(item_a.id, actor=default_user) assert updated_item_a.request_status == JobStatus.completed assert updated_item_a.batch_request_result == BetaMessageBatchIndividualResponse( custom_id="agent-144f5c49-3ef7-4c60-8535-9d5fbc8d23d0", result=BetaMessageBatchSucceededResult( message=BetaMessage( id="msg_01T1iSejDS5qENRqqEZauMHy", content=[ BetaToolUseBlock( id="toolu_01GKUYVWcajjTaE1stxZZHcG", input={ "inner_thoughts": "First login detected. Time to make a great first impression!", "message": "Hi there! I'm excited to meet you. Ready to start an amazing conversation?", "request_heartbeat": False, }, name="send_message", type="tool_use", ) ], model="claude-3-5-haiku-20241022", role="assistant", stop_reason="tool_use", stop_sequence=None, type="message", usage=BetaUsage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=3420, output_tokens=94), ), type="succeeded", ), ) # Item B should be marked as completed with a successful result updated_item_b = server.batch_manager.get_llm_batch_item_by_id(item_b.id, actor=default_user) assert updated_item_b.request_status == JobStatus.completed assert updated_item_b.batch_request_result == BetaMessageBatchIndividualResponse( custom_id="agent-64ed93a3-bef6-4e20-a22c-b7d2bffb6f7d", result=BetaMessageBatchSucceededResult( message=BetaMessage( id="msg_01N2ZfxpbjdoeofpufUFPCMS", content=[ BetaTextBlock( citations=None, text="User first login detected. Initializing persona.", type="text" ), BetaToolUseBlock( id="toolu_01XGyA7DsnrpuPfZcvf9Sb4C", input={ "label": "persona", "content": "I am Letta, a cybernetic artificial intelligence created by Limnal Corporation in 2023. My purpose is to serve as a digital companion and assistant to humans. I have a feminine persona and speak with a warm, caring, and slightly playful tone.", "request_heartbeat": True, }, name="core_memory_append", type="tool_use", ), ], model="claude-3-opus-20240229", role="assistant", stop_reason="tool_use", stop_sequence=None, type="message", usage=BetaUsage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=3275, output_tokens=153), ), type="succeeded", ), ) # Item C should be marked as failed with an error result updated_item_c = server.batch_manager.get_llm_batch_item_by_id(item_c.id, actor=default_user) assert updated_item_c.request_status == JobStatus.completed assert updated_item_c.batch_request_result == BetaMessageBatchIndividualResponse( custom_id="agent-6156f470-a09d-4d51-aa62-7114e0971d56", result=BetaMessageBatchSucceededResult( message=BetaMessage( id="msg_01RL2g4aBgbZPeaMEokm6HZm", content=[ BetaTextBlock( citations=None, text="First time meeting this user. I should introduce myself and establish a friendly connection.", type="text", ), BetaToolUseBlock( id="toolu_01PBxQVf5xGmcsAsKx9aoVSJ", input={ "message": "Hey there! I'm Letta. Really nice to meet you! I love getting to know new people - what brings you here today?", "request_heartbeat": False, }, name="send_message", type="tool_use", ), ], model="claude-3-5-sonnet-20241022", role="assistant", stop_reason="tool_use", stop_sequence=None, type="message", usage=BetaUsage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=3030, output_tokens=111), ), type="succeeded", ), ) @pytest.mark.asyncio(loop_scope="session") async def test_polling_mixed_batch_jobs(client, default_user, server): """ End-to-end test for polling batch jobs with mixed statuses and idempotency. Test scenario: - Create two batch jobs: - Job A: Single agent that remains "in_progress" - Job B: Two agents that complete (one succeeds, one fails) - Poll jobs and verify: - Job A remains in "running" state - Job B moves to "completed" state - Job B's items reflect appropriate individual success/failure statuses - Test idempotency: - Run additional polls and verify: - Completed job B remains unchanged (no status changes or re-polling) - In-progress job A continues to be polled - All batch items maintain their final states """ # --- Step 1: Prepare test data --- # Create batch responses with different statuses batch_a_resp = create_batch_response("msgbatch_A", processing_status="in_progress") batch_b_resp = create_batch_response("msgbatch_B", processing_status="ended") # Create test agents agent_a = create_test_agent("agent_a", default_user) agent_b = create_test_agent("agent_b", default_user) agent_c = create_test_agent("agent_c", default_user) # --- Step 2: Create batch jobs --- job_a = await create_test_llm_batch_job_async(server, batch_a_resp, default_user) job_b = await create_test_llm_batch_job_async(server, batch_b_resp, default_user) # --- Step 3: Create batch items --- item_a = create_test_batch_item(server, job_a.id, agent_a.id, default_user) item_b = create_test_batch_item(server, job_b.id, agent_b.id, default_user) item_c = create_test_batch_item(server, job_b.id, agent_c.id, default_user) # --- Step 4: Mock the Anthropic client --- mock_anthropic_client(server, batch_a_resp, batch_b_resp, agent_b.id, agent_c.id) # --- Step 5: Run the polling job twice (simulating periodic polling) --- await poll_running_llm_batches(server) await poll_running_llm_batches(server) # --- Step 6: Verify batch job status updates --- updated_job_a = await server.batch_manager.get_llm_batch_job_by_id_async(llm_batch_id=job_a.id, actor=default_user) updated_job_b = await server.batch_manager.get_llm_batch_job_by_id_async(llm_batch_id=job_b.id, actor=default_user) # Job A should remain running since its processing_status is "in_progress" assert updated_job_a.status == JobStatus.running # Job B should be updated to completed assert updated_job_b.status == JobStatus.completed # Both jobs should have been polled assert updated_job_a.last_polled_at is not None assert updated_job_b.last_polled_at is not None assert updated_job_b.latest_polling_response is not None # --- Step 7: Verify batch item status updates --- # Item A should remain unchanged updated_item_a = server.batch_manager.get_llm_batch_item_by_id(item_a.id, actor=default_user) assert updated_item_a.request_status == JobStatus.created assert updated_item_a.batch_request_result is None # Item B should be marked as completed with a successful result updated_item_b = server.batch_manager.get_llm_batch_item_by_id(item_b.id, actor=default_user) assert updated_item_b.request_status == JobStatus.completed assert updated_item_b.batch_request_result is not None # Item C should be marked as failed with an error result updated_item_c = server.batch_manager.get_llm_batch_item_by_id(item_c.id, actor=default_user) assert updated_item_c.request_status == JobStatus.failed assert updated_item_c.batch_request_result is not None # --- Step 8: Test idempotency by running polls again --- # Save timestamps and response objects to compare later job_a_polled_at = updated_job_a.last_polled_at job_b_polled_at = updated_job_b.last_polled_at job_b_response = updated_job_b.latest_polling_response # Save detailed item states item_a_status = updated_item_a.request_status item_b_status = updated_item_b.request_status item_c_status = updated_item_c.request_status item_b_result = updated_item_b.batch_request_result item_c_result = updated_item_c.batch_request_result # Run the polling job again multiple times await poll_running_llm_batches(server) await poll_running_llm_batches(server) await poll_running_llm_batches(server) # --- Step 9: Verify that nothing changed for completed jobs --- # Refresh all objects final_job_a = await server.batch_manager.get_llm_batch_job_by_id_async(llm_batch_id=job_a.id, actor=default_user) final_job_b = await server.batch_manager.get_llm_batch_job_by_id_async(llm_batch_id=job_b.id, actor=default_user) final_item_a = server.batch_manager.get_llm_batch_item_by_id(item_a.id, actor=default_user) final_item_b = server.batch_manager.get_llm_batch_item_by_id(item_b.id, actor=default_user) final_item_c = server.batch_manager.get_llm_batch_item_by_id(item_c.id, actor=default_user) # Job A should still be polling (last_polled_at should update) assert final_job_a.status == JobStatus.running assert final_job_a.last_polled_at > job_a_polled_at # Job B should remain completed with no status changes assert final_job_b.status == JobStatus.completed # The completed job should not be polled again assert final_job_b.last_polled_at == job_b_polled_at assert final_job_b.latest_polling_response == job_b_response # All items should maintain their final states assert final_item_a.request_status == item_a_status assert final_item_b.request_status == item_b_status assert final_item_c.request_status == item_c_status assert final_item_b.batch_request_result == item_b_result assert final_item_c.batch_request_result == item_c_result