feat: Enable voice agent with anthropic models (#1994)

This commit is contained in:
Matthew Zhou 2025-05-02 15:00:25 -07:00 committed by GitHub
parent 219e2fcc3d
commit 1e638e2ce5
3 changed files with 47 additions and 11 deletions

View File

@ -46,6 +46,7 @@ from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.services.summarizer.enums import SummarizationMode
from letta.services.summarizer.summarizer import Summarizer
from letta.settings import model_settings
from letta.utils import united_diff
logger = get_logger(__name__)
@ -125,6 +126,12 @@ class VoiceAgent(BaseAgent):
agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
# TODO: Refactor this so it uses our in-house clients
# TODO: For now, piggyback off of OpenAI client for ease
if agent_state.llm_config.model_endpoint_type == "anthropic":
self.openai_client.api_key = model_settings.anthropic_api_key
self.openai_client.base_url = "https://api.anthropic.com/v1/"
# Safety check
if agent_state.agent_type != AgentType.voice_convo_agent:
raise IncompatibleAgentType(expected_type=AgentType.voice_convo_agent, actual_type=agent_state.agent_type)

View File

@ -35,18 +35,19 @@ class OpenAIChatCompletionsStreamingInterface:
"""
async with stream:
async for chunk in stream:
choice = chunk.choices[0]
delta = choice.delta
finish_reason = choice.finish_reason
if chunk.choices:
choice = chunk.choices[0]
delta = choice.delta
finish_reason = choice.finish_reason
async for sse_chunk in self._process_content(delta, chunk):
yield sse_chunk
async for sse_chunk in self._process_content(delta, chunk):
yield sse_chunk
async for sse_chunk in self._process_tool_calls(delta, chunk):
yield sse_chunk
async for sse_chunk in self._process_tool_calls(delta, chunk):
yield sse_chunk
if self._handle_finish_reason(finish_reason):
break
if self._handle_finish_reason(finish_reason):
break
async def _process_content(self, delta: ChoiceDelta, chunk: ChatCompletionChunk) -> AsyncGenerator[str, None]:
"""Processes regular content tokens and streams them."""

View File

@ -216,6 +216,34 @@ def voice_agent(server, actor):
return main_agent
@pytest.fixture(scope="function")
def voice_agent_anthropic(server, actor):
server.tool_manager.upsert_base_tools(actor=actor)
main_agent = server.create_agent(
request=CreateAgent(
agent_type=AgentType.voice_convo_agent,
name="main_agent",
memory_blocks=[
CreateBlock(
label="persona",
value="You are a personal assistant that helps users with requests.",
),
CreateBlock(
label="human",
value="My favorite plant is the fiddle leaf\nMy favorite color is lavender",
),
],
model="anthropic/claude-3-5-sonnet-20241022",
embedding="openai/text-embedding-ada-002",
enable_sleeptime=True,
),
actor=actor,
)
return main_agent
@pytest.fixture
def group_id(voice_agent):
return voice_agent.multi_agent_group.id
@ -285,7 +313,7 @@ async def test_voice_recall_memory(disable_e2b_api_key, client, voice_agent, mes
@pytest.mark.asyncio
@pytest.mark.parametrize("endpoint", ["v1/voice-beta"])
async def test_trigger_summarization(disable_e2b_api_key, client, server, voice_agent, group_id, endpoint, actor):
async def test_trigger_summarization(disable_e2b_api_key, client, server, voice_agent_anthropic, group_id, endpoint, actor):
server.group_manager.modify_group(
group_id=group_id,
group_update=GroupUpdate(
@ -299,7 +327,7 @@ async def test_trigger_summarization(disable_e2b_api_key, client, server, voice_
)
request = _get_chat_request("How are you?")
async_client = AsyncOpenAI(base_url=f"http://localhost:8283/{endpoint}/{voice_agent.id}", max_retries=0)
async_client = AsyncOpenAI(base_url=f"http://localhost:8283/{endpoint}/{voice_agent_anthropic.id}", max_retries=0)
stream = await async_client.chat.completions.create(**request.model_dump(exclude_none=True))
async with stream: