mirror of
https://github.com/cpacker/MemGPT.git
synced 2025-06-03 04:30:22 +00:00
feat: add sonnet 3.7 support (#1302)
This commit is contained in:
parent
de16a17f65
commit
100431dce8
@ -424,7 +424,7 @@ class Agent(BaseAgent):
|
||||
self.logger.debug(f"Function call message: {messages[-1]}")
|
||||
|
||||
nonnull_content = False
|
||||
if response_message.content:
|
||||
if response_message.content or response_message.reasoning_content or response_message.redacted_reasoning_content:
|
||||
# The content if then internal monologue, not chat
|
||||
self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
|
||||
# Flag to avoid printing a duplicate if inner thoughts get popped from the function call
|
||||
|
@ -9,7 +9,7 @@ from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
||||
from letta.errors import LLMError
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.enums import MessageStreamStatus
|
||||
from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage
|
||||
from letta.schemas.letta_message import AssistantMessage, HiddenReasoningMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage
|
||||
from letta.schemas.letta_response import LettaStreamingResponse
|
||||
from letta.schemas.usage import LettaUsageStatistics
|
||||
|
||||
@ -57,6 +57,8 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStrea
|
||||
yield ReasoningMessage(**chunk_data)
|
||||
elif chunk_data.get("message_type") == "assistant_message":
|
||||
yield AssistantMessage(**chunk_data)
|
||||
elif "hidden_reasoning" in chunk_data:
|
||||
yield HiddenReasoningMessage(**chunk_data)
|
||||
elif "tool_call" in chunk_data:
|
||||
yield ToolCallMessage(**chunk_data)
|
||||
elif "tool_return" in chunk_data:
|
||||
|
@ -13,7 +13,9 @@ from anthropic.types.beta import (
|
||||
BetaRawMessageDeltaEvent,
|
||||
BetaRawMessageStartEvent,
|
||||
BetaRawMessageStopEvent,
|
||||
BetaRedactedThinkingBlock,
|
||||
BetaTextBlock,
|
||||
BetaThinkingBlock,
|
||||
BetaToolUseBlock,
|
||||
)
|
||||
|
||||
@ -345,43 +347,32 @@ def convert_anthropic_response_to_chatcompletion(
|
||||
finish_reason = remap_finish_reason(response.stop_reason)
|
||||
|
||||
content = None
|
||||
reasoning_content = None
|
||||
reasoning_content_signature = None
|
||||
redacted_reasoning_content = None
|
||||
tool_calls = None
|
||||
|
||||
if len(response.content) > 1:
|
||||
# inner mono + function call
|
||||
assert len(response.content) == 2
|
||||
text_block = response.content[0]
|
||||
tool_block = response.content[1]
|
||||
assert text_block.type == "text"
|
||||
assert tool_block.type == "tool_use"
|
||||
content = strip_xml_tags(string=text_block.text, tag=inner_thoughts_xml_tag)
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=tool_block.id,
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=tool_block.name,
|
||||
arguments=json.dumps(tool_block.input, indent=2),
|
||||
),
|
||||
)
|
||||
]
|
||||
elif len(response.content) == 1:
|
||||
block = response.content[0]
|
||||
if block.type == "tool_use":
|
||||
# function call only
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=block.name,
|
||||
arguments=json.dumps(block.input, indent=2),
|
||||
),
|
||||
)
|
||||
]
|
||||
else:
|
||||
# inner mono only
|
||||
content = strip_xml_tags(string=block.text, tag=inner_thoughts_xml_tag)
|
||||
for content_part in response.content:
|
||||
if content_part.type == "text":
|
||||
content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
|
||||
if content_part.type == "tool_use":
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=content_part.id,
|
||||
type="function",
|
||||
function=FunctionCall(
|
||||
name=content_part.name,
|
||||
arguments=json.dumps(content_part.input, indent=2),
|
||||
),
|
||||
)
|
||||
]
|
||||
if content_part.type == "thinking":
|
||||
reasoning_content = content_part.thinking
|
||||
reasoning_content_signature = content_part.signature
|
||||
if content_part.type == "redacted_thinking":
|
||||
redacted_reasoning_content = content_part.data
|
||||
|
||||
else:
|
||||
raise RuntimeError("Unexpected empty content in response")
|
||||
|
||||
@ -392,6 +383,9 @@ def convert_anthropic_response_to_chatcompletion(
|
||||
message=ChoiceMessage(
|
||||
role=response.role,
|
||||
content=content,
|
||||
reasoning_content=reasoning_content,
|
||||
reasoning_content_signature=reasoning_content_signature,
|
||||
redacted_reasoning_content=redacted_reasoning_content,
|
||||
tool_calls=tool_calls,
|
||||
),
|
||||
)
|
||||
@ -462,7 +456,31 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
"""
|
||||
# Get finish reason
|
||||
finish_reason = None
|
||||
if isinstance(event, BetaRawMessageDeltaEvent):
|
||||
completion_chunk_tokens = 0
|
||||
|
||||
# Get content and tool calls
|
||||
content = None
|
||||
reasoning_content = None
|
||||
reasoning_content_signature = None
|
||||
redacted_reasoning_content = None # NOTE called "data" in the stream
|
||||
tool_calls = None
|
||||
if isinstance(event, BetaRawMessageStartEvent):
|
||||
"""
|
||||
BetaRawMessageStartEvent(
|
||||
message=BetaMessage(
|
||||
content=[],
|
||||
usage=BetaUsage(
|
||||
input_tokens=3086,
|
||||
output_tokens=1,
|
||||
),
|
||||
...,
|
||||
),
|
||||
type='message_start'
|
||||
)
|
||||
"""
|
||||
completion_chunk_tokens += event.message.usage.output_tokens
|
||||
|
||||
elif isinstance(event, BetaRawMessageDeltaEvent):
|
||||
"""
|
||||
BetaRawMessageDeltaEvent(
|
||||
delta=Delta(
|
||||
@ -474,11 +492,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
)
|
||||
"""
|
||||
finish_reason = remap_finish_reason(event.delta.stop_reason)
|
||||
completion_chunk_tokens += event.usage.output_tokens
|
||||
|
||||
# Get content and tool calls
|
||||
content = None
|
||||
tool_calls = None
|
||||
if isinstance(event, BetaRawContentBlockDeltaEvent):
|
||||
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
||||
"""
|
||||
BetaRawContentBlockDeltaEvent(
|
||||
delta=BetaInputJSONDelta(
|
||||
@ -501,9 +517,24 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
)
|
||||
|
||||
"""
|
||||
# ReACT COT
|
||||
if event.delta.type == "text_delta":
|
||||
content = strip_xml_tags_streaming(string=event.delta.text, tag=inner_thoughts_xml_tag)
|
||||
|
||||
# Extended thought COT
|
||||
elif event.delta.type == "thinking_delta":
|
||||
# Redacted doesn't come in the delta chunks, comes all at once
|
||||
# "redacted_thinking blocks will not have any deltas associated and will be sent as a single event."
|
||||
# Thinking might start with ""
|
||||
if len(event.delta.thinking) > 0:
|
||||
reasoning_content = event.delta.thinking
|
||||
|
||||
# Extended thought COT signature
|
||||
elif event.delta.type == "signature_delta":
|
||||
if len(event.delta.signature) > 0:
|
||||
reasoning_content_signature = event.delta.signature
|
||||
|
||||
# Tool calling
|
||||
elif event.delta.type == "input_json_delta":
|
||||
tool_calls = [
|
||||
ToolCallDelta(
|
||||
@ -514,6 +545,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
),
|
||||
)
|
||||
]
|
||||
else:
|
||||
warnings.warn("Unexpected delta type: " + event.delta.type)
|
||||
|
||||
elif isinstance(event, BetaRawContentBlockStartEvent):
|
||||
"""
|
||||
BetaRawContentBlockStartEvent(
|
||||
@ -551,6 +585,15 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
]
|
||||
elif isinstance(event.content_block, BetaTextBlock):
|
||||
content = event.content_block.text
|
||||
elif isinstance(event.content_block, BetaThinkingBlock):
|
||||
reasoning_content = event.content_block.thinking
|
||||
elif isinstance(event.content_block, BetaRedactedThinkingBlock):
|
||||
redacted_reasoning_content = event.content_block.data
|
||||
else:
|
||||
warnings.warn("Unexpected content start type: " + str(type(event.content_block)))
|
||||
|
||||
else:
|
||||
warnings.warn("Unexpected event type: " + event.type)
|
||||
|
||||
# Initialize base response
|
||||
choice = ChunkChoice(
|
||||
@ -558,6 +601,9 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
finish_reason=finish_reason,
|
||||
delta=MessageDelta(
|
||||
content=content,
|
||||
reasoning_content=reasoning_content,
|
||||
reasoning_content_signature=reasoning_content_signature,
|
||||
redacted_reasoning_content=redacted_reasoning_content,
|
||||
tool_calls=tool_calls,
|
||||
),
|
||||
)
|
||||
@ -566,6 +612,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
||||
choices=[choice],
|
||||
created=get_utc_time(),
|
||||
model=model,
|
||||
output_tokens=completion_chunk_tokens,
|
||||
)
|
||||
|
||||
|
||||
@ -577,8 +624,20 @@ def _prepare_anthropic_request(
|
||||
# if true, put COT inside the tool calls instead of inside the content
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
bedrock: bool = False,
|
||||
# extended thinking related fields
|
||||
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
||||
extended_thinking: bool = False,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
) -> dict:
|
||||
"""Prepare the request data for Anthropic API format."""
|
||||
if extended_thinking:
|
||||
assert (
|
||||
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
||||
), "max tokens must be greater than thinking budget"
|
||||
assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
||||
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
||||
# Silently disable prefix_fill for now
|
||||
prefix_fill = False
|
||||
|
||||
# if needed, put inner thoughts as a kwarg for all tools
|
||||
if data.tools and put_inner_thoughts_in_kwargs:
|
||||
@ -595,6 +654,14 @@ def _prepare_anthropic_request(
|
||||
# pydantic -> dict
|
||||
data = data.model_dump(exclude_none=True)
|
||||
|
||||
if extended_thinking:
|
||||
data["thinking"] = {
|
||||
"type": "enabled",
|
||||
"budget_tokens": max_reasoning_tokens,
|
||||
}
|
||||
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
||||
data["temperature"] = 1.0
|
||||
|
||||
if "functions" in data:
|
||||
raise ValueError(f"'functions' unexpected in Anthropic API payload")
|
||||
|
||||
@ -665,6 +732,8 @@ def anthropic_chat_completions_request(
|
||||
data: ChatCompletionRequest,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
extended_thinking: bool = False,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
betas: List[str] = ["tools-2024-04-04"],
|
||||
) -> ChatCompletionResponse:
|
||||
"""https://docs.anthropic.com/claude/docs/tool-use"""
|
||||
@ -678,6 +747,8 @@ def anthropic_chat_completions_request(
|
||||
data=data,
|
||||
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
||||
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=extended_thinking,
|
||||
max_reasoning_tokens=max_reasoning_tokens,
|
||||
)
|
||||
log_event(name="llm_request_sent", attributes=data)
|
||||
response = anthropic_client.beta.messages.create(
|
||||
@ -717,6 +788,8 @@ def anthropic_chat_completions_request_stream(
|
||||
data: ChatCompletionRequest,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
extended_thinking: bool = False,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
betas: List[str] = ["tools-2024-04-04"],
|
||||
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
||||
"""Stream chat completions from Anthropic API.
|
||||
@ -728,6 +801,8 @@ def anthropic_chat_completions_request_stream(
|
||||
data=data,
|
||||
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
||||
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=extended_thinking,
|
||||
max_reasoning_tokens=max_reasoning_tokens,
|
||||
)
|
||||
|
||||
anthropic_override_key = ProviderManager().get_anthropic_override_key()
|
||||
@ -777,6 +852,8 @@ def anthropic_chat_completions_process_stream(
|
||||
stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
extended_thinking: bool = False,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
create_message_id: bool = True,
|
||||
create_message_datetime: bool = True,
|
||||
betas: List[str] = ["tools-2024-04-04"],
|
||||
@ -839,7 +916,6 @@ def anthropic_chat_completions_process_stream(
|
||||
created=dummy_message.created_at,
|
||||
model=chat_completion_request.model,
|
||||
usage=UsageStatistics(
|
||||
completion_tokens=0,
|
||||
prompt_tokens=prompt_tokens,
|
||||
total_tokens=prompt_tokens,
|
||||
),
|
||||
@ -850,13 +926,15 @@ def anthropic_chat_completions_process_stream(
|
||||
if stream_interface:
|
||||
stream_interface.stream_start()
|
||||
|
||||
n_chunks = 0
|
||||
completion_tokens = 0
|
||||
try:
|
||||
for chunk_idx, chat_completion_chunk in enumerate(
|
||||
anthropic_chat_completions_request_stream(
|
||||
data=chat_completion_request,
|
||||
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
||||
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=extended_thinking,
|
||||
max_reasoning_tokens=max_reasoning_tokens,
|
||||
betas=betas,
|
||||
)
|
||||
):
|
||||
@ -868,6 +946,9 @@ def anthropic_chat_completions_process_stream(
|
||||
chat_completion_chunk,
|
||||
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
||||
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
||||
# if extended_thinking is on, then reasoning_content will be flowing as chunks
|
||||
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
||||
expect_reasoning_content=extended_thinking,
|
||||
)
|
||||
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
||||
stream_interface.process_refresh(chat_completion_response)
|
||||
@ -908,6 +989,30 @@ def anthropic_chat_completions_process_stream(
|
||||
else:
|
||||
accum_message.content += content_delta
|
||||
|
||||
# NOTE: for extended_thinking mode
|
||||
if extended_thinking and message_delta.reasoning_content is not None:
|
||||
reasoning_content_delta = message_delta.reasoning_content
|
||||
if accum_message.reasoning_content is None:
|
||||
accum_message.reasoning_content = reasoning_content_delta
|
||||
else:
|
||||
accum_message.reasoning_content += reasoning_content_delta
|
||||
|
||||
# NOTE: extended_thinking sends a signature
|
||||
if extended_thinking and message_delta.reasoning_content_signature is not None:
|
||||
reasoning_content_signature_delta = message_delta.reasoning_content_signature
|
||||
if accum_message.reasoning_content_signature is None:
|
||||
accum_message.reasoning_content_signature = reasoning_content_signature_delta
|
||||
else:
|
||||
accum_message.reasoning_content_signature += reasoning_content_signature_delta
|
||||
|
||||
# NOTE: extended_thinking also has the potential for redacted_reasoning_content
|
||||
if extended_thinking and message_delta.redacted_reasoning_content is not None:
|
||||
redacted_reasoning_content_delta = message_delta.redacted_reasoning_content
|
||||
if accum_message.redacted_reasoning_content is None:
|
||||
accum_message.redacted_reasoning_content = redacted_reasoning_content_delta
|
||||
else:
|
||||
accum_message.redacted_reasoning_content += redacted_reasoning_content_delta
|
||||
|
||||
# TODO(charles) make sure this works for parallel tool calling?
|
||||
if message_delta.tool_calls is not None:
|
||||
tool_calls_delta = message_delta.tool_calls
|
||||
@ -966,7 +1071,8 @@ def anthropic_chat_completions_process_stream(
|
||||
chat_completion_response.system_fingerprint = chat_completion_chunk.system_fingerprint
|
||||
|
||||
# increment chunk counter
|
||||
n_chunks += 1
|
||||
if chat_completion_chunk.output_tokens is not None:
|
||||
completion_tokens += chat_completion_chunk.output_tokens
|
||||
|
||||
except Exception as e:
|
||||
if stream_interface:
|
||||
@ -990,8 +1096,8 @@ def anthropic_chat_completions_process_stream(
|
||||
|
||||
# compute token usage before returning
|
||||
# TODO try actually computing the #tokens instead of assuming the chunks is the same
|
||||
chat_completion_response.usage.completion_tokens = n_chunks
|
||||
chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
|
||||
chat_completion_response.usage.completion_tokens = completion_tokens
|
||||
chat_completion_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
||||
|
||||
|
@ -406,6 +406,8 @@ def create(
|
||||
chat_completion_request=chat_completion_request,
|
||||
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
||||
stream_interface=stream_interface,
|
||||
extended_thinking=llm_config.enable_reasoner,
|
||||
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
||||
)
|
||||
|
||||
else:
|
||||
@ -413,6 +415,8 @@ def create(
|
||||
response = anthropic_chat_completions_request(
|
||||
data=chat_completion_request,
|
||||
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=llm_config.enable_reasoner,
|
||||
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
||||
)
|
||||
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
|
@ -147,6 +147,14 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
||||
)
|
||||
context_window_limit: Optional[int] = Field(None, description="The context window limit used by the agent.")
|
||||
embedding_chunk_size: Optional[int] = Field(DEFAULT_EMBEDDING_CHUNK_SIZE, description="The embedding chunk size used by the agent.")
|
||||
max_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="The maximum number of tokens to generate, including reasoning step. If not set, the model will use its default value.",
|
||||
)
|
||||
max_reasoning_tokens: Optional[int] = Field(
|
||||
None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
|
||||
)
|
||||
enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
|
||||
from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
|
||||
template: bool = Field(False, description="Whether the agent is a template")
|
||||
project: Optional[str] = Field(
|
||||
|
@ -88,11 +88,13 @@ class ReasoningMessage(LettaMessage):
|
||||
source (Literal["reasoner_model", "non_reasoner_model"]): Whether the reasoning
|
||||
content was generated natively by a reasoner model or derived via prompting
|
||||
reasoning (str): The internal reasoning of the agent
|
||||
signature (Optional[str]): The model-generated signature of the reasoning step
|
||||
"""
|
||||
|
||||
message_type: Literal["reasoning_message"] = "reasoning_message"
|
||||
source: Literal["reasoner_model", "non_reasoner_model"] = "non_reasoner_model"
|
||||
reasoning: str
|
||||
signature: Optional[str] = None
|
||||
|
||||
|
||||
class HiddenReasoningMessage(LettaMessage):
|
||||
@ -106,12 +108,12 @@ class HiddenReasoningMessage(LettaMessage):
|
||||
name (Optional[str]): The name of the sender of the message
|
||||
state (Literal["redacted", "omitted"]): Whether the reasoning
|
||||
content was redacted by the provider or simply omitted by the API
|
||||
reasoning (str): The internal reasoning of the agent
|
||||
hidden_reasoning (Optional[str]): The internal reasoning of the agent
|
||||
"""
|
||||
|
||||
message_type: Literal["reasoning_message"] = "reasoning_message"
|
||||
message_type: Literal["hidden_reasoning_message"] = "hidden_reasoning_message"
|
||||
state: Literal["redacted", "omitted"]
|
||||
reasoning: str
|
||||
hidden_reasoning: Optional[str] = None
|
||||
|
||||
|
||||
class ToolCall(BaseModel):
|
||||
@ -229,7 +231,7 @@ class AssistantMessage(LettaMessage):
|
||||
|
||||
# NOTE: use Pydantic's discriminated unions feature: https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions
|
||||
LettaMessageUnion = Annotated[
|
||||
Union[SystemMessage, UserMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
|
||||
Union[SystemMessage, UserMessage, ReasoningMessage, HiddenReasoningMessage, ToolCallMessage, ToolReturnMessage, AssistantMessage],
|
||||
Field(discriminator="message_type"),
|
||||
]
|
||||
|
||||
@ -240,6 +242,7 @@ def create_letta_message_union_schema():
|
||||
{"$ref": "#/components/schemas/SystemMessage"},
|
||||
{"$ref": "#/components/schemas/UserMessage"},
|
||||
{"$ref": "#/components/schemas/ReasoningMessage"},
|
||||
{"$ref": "#/components/schemas/HiddenReasoningMessage"},
|
||||
{"$ref": "#/components/schemas/ToolCallMessage"},
|
||||
{"$ref": "#/components/schemas/ToolReturnMessage"},
|
||||
{"$ref": "#/components/schemas/AssistantMessage"},
|
||||
@ -250,6 +253,7 @@ def create_letta_message_union_schema():
|
||||
"system_message": "#/components/schemas/SystemMessage",
|
||||
"user_message": "#/components/schemas/UserMessage",
|
||||
"reasoning_message": "#/components/schemas/ReasoningMessage",
|
||||
"hidden_reasoning_message": "#/components/schemas/HiddenReasoningMessage",
|
||||
"tool_call_message": "#/components/schemas/ToolCallMessage",
|
||||
"tool_return_message": "#/components/schemas/ToolReturnMessage",
|
||||
"assistant_message": "#/components/schemas/AssistantMessage",
|
||||
|
@ -60,6 +60,12 @@ class LLMConfig(BaseModel):
|
||||
4096,
|
||||
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
|
||||
)
|
||||
enable_reasoner: bool = Field(
|
||||
False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
|
||||
)
|
||||
max_reasoning_tokens: int = Field(
|
||||
0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
|
||||
)
|
||||
|
||||
# FIXME hack to silence pydantic protected namespace warning
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
@ -19,6 +19,7 @@ from letta.schemas.enums import MessageRole
|
||||
from letta.schemas.letta_base import OrmMetadataBase
|
||||
from letta.schemas.letta_message import (
|
||||
AssistantMessage,
|
||||
HiddenReasoningMessage,
|
||||
LettaMessage,
|
||||
ReasoningMessage,
|
||||
SystemMessage,
|
||||
@ -27,7 +28,13 @@ from letta.schemas.letta_message import (
|
||||
ToolReturnMessage,
|
||||
UserMessage,
|
||||
)
|
||||
from letta.schemas.letta_message_content import LettaMessageContentUnion, TextContent, get_letta_message_content_union_str_json_schema
|
||||
from letta.schemas.letta_message_content import (
|
||||
LettaMessageContentUnion,
|
||||
ReasoningContent,
|
||||
RedactedReasoningContent,
|
||||
TextContent,
|
||||
get_letta_message_content_union_str_json_schema,
|
||||
)
|
||||
from letta.system import unpack_message
|
||||
|
||||
|
||||
@ -206,23 +213,58 @@ class Message(BaseMessage):
|
||||
assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
||||
) -> List[LettaMessage]:
|
||||
"""Convert message object (in DB format) to the style used by the original Letta API"""
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
else:
|
||||
text_content = None
|
||||
|
||||
messages = []
|
||||
|
||||
if self.role == MessageRole.assistant:
|
||||
if text_content is not None:
|
||||
# This is type InnerThoughts
|
||||
messages.append(
|
||||
ReasoningMessage(
|
||||
id=self.id,
|
||||
date=self.created_at,
|
||||
reasoning=text_content,
|
||||
|
||||
# Handle reasoning
|
||||
if self.content:
|
||||
# Check for ReACT-style COT inside of TextContent
|
||||
if len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
messages.append(
|
||||
ReasoningMessage(
|
||||
id=self.id,
|
||||
date=self.created_at,
|
||||
reasoning=self.content[0].text,
|
||||
)
|
||||
)
|
||||
)
|
||||
# Otherwise, we may have a list of multiple types
|
||||
else:
|
||||
# TODO we can probably collapse these two cases into a single loop
|
||||
for content_part in self.content:
|
||||
if isinstance(content_part, TextContent):
|
||||
# COT
|
||||
messages.append(
|
||||
ReasoningMessage(
|
||||
id=self.id,
|
||||
date=self.created_at,
|
||||
reasoning=content_part.text,
|
||||
)
|
||||
)
|
||||
elif isinstance(content_part, ReasoningContent):
|
||||
# "native" COT
|
||||
messages.append(
|
||||
ReasoningMessage(
|
||||
id=self.id,
|
||||
date=self.created_at,
|
||||
reasoning=content_part.reasoning,
|
||||
source="reasoner_model", # TODO do we want to tag like this?
|
||||
signature=content_part.signature,
|
||||
)
|
||||
)
|
||||
elif isinstance(content_part, RedactedReasoningContent):
|
||||
# "native" redacted/hidden COT
|
||||
messages.append(
|
||||
HiddenReasoningMessage(
|
||||
id=self.id,
|
||||
date=self.created_at,
|
||||
state="redacted",
|
||||
hidden_reasoning=content_part.data,
|
||||
)
|
||||
)
|
||||
else:
|
||||
warnings.warn(f"Unrecognized content part in assistant message: {content_part}")
|
||||
|
||||
if self.tool_calls is not None:
|
||||
# This is type FunctionCall
|
||||
for tool_call in self.tool_calls:
|
||||
@ -264,7 +306,11 @@ class Message(BaseMessage):
|
||||
# "message": response_string,
|
||||
# "time": formatted_time,
|
||||
# }
|
||||
assert text_content is not None, self
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
else:
|
||||
raise ValueError(f"Invalid tool return (no text object on message): {self.content}")
|
||||
|
||||
try:
|
||||
function_return = json.loads(text_content)
|
||||
status = function_return["status"]
|
||||
@ -292,7 +338,11 @@ class Message(BaseMessage):
|
||||
)
|
||||
elif self.role == MessageRole.user:
|
||||
# This is type UserMessage
|
||||
assert text_content is not None, self
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
else:
|
||||
raise ValueError(f"Invalid user message (no text object on message): {self.content}")
|
||||
|
||||
message_str = unpack_message(text_content)
|
||||
messages.append(
|
||||
UserMessage(
|
||||
@ -303,7 +353,11 @@ class Message(BaseMessage):
|
||||
)
|
||||
elif self.role == MessageRole.system:
|
||||
# This is type SystemMessage
|
||||
assert text_content is not None, self
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
else:
|
||||
raise ValueError(f"Invalid system message (no text object on system): {self.content}")
|
||||
|
||||
messages.append(
|
||||
SystemMessage(
|
||||
id=self.id,
|
||||
@ -335,6 +389,29 @@ class Message(BaseMessage):
|
||||
assert "role" in openai_message_dict, openai_message_dict
|
||||
assert "content" in openai_message_dict, openai_message_dict
|
||||
|
||||
# TODO(caren) implicit support for only non-parts/list content types
|
||||
if openai_message_dict["content"] is not None and type(openai_message_dict["content"]) is not str:
|
||||
raise ValueError(f"Invalid content type: {type(openai_message_dict['content'])}")
|
||||
content = [TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else []
|
||||
|
||||
# TODO(caren) bad assumption here that "reasoning_content" always comes before "redacted_reasoning_content"
|
||||
if "reasoning_content" in openai_message_dict and openai_message_dict["reasoning_content"]:
|
||||
content.append(
|
||||
ReasoningContent(
|
||||
reasoning=openai_message_dict["reasoning_content"],
|
||||
is_native=True,
|
||||
signature=(
|
||||
openai_message_dict["reasoning_content_signature"] if openai_message_dict["reasoning_content_signature"] else None
|
||||
),
|
||||
),
|
||||
)
|
||||
if "redacted_reasoning_content" in openai_message_dict and openai_message_dict["redacted_reasoning_content"]:
|
||||
content.append(
|
||||
RedactedReasoningContent(
|
||||
data=openai_message_dict["redacted_reasoning_content"] if "redacted_reasoning_content" in openai_message_dict else None,
|
||||
),
|
||||
)
|
||||
|
||||
# If we're going from deprecated function form
|
||||
if openai_message_dict["role"] == "function":
|
||||
if not allow_functions_style:
|
||||
@ -348,7 +425,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole.tool, # NOTE
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=openai_message_dict["tool_calls"] if "tool_calls" in openai_message_dict else None,
|
||||
tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
|
||||
@ -362,7 +439,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole.tool, # NOTE
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=openai_message_dict["tool_calls"] if "tool_calls" in openai_message_dict else None,
|
||||
tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
|
||||
@ -395,7 +472,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole(openai_message_dict["role"]),
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=tool_calls,
|
||||
tool_call_id=None, # NOTE: None, since this field is only non-null for role=='tool'
|
||||
@ -409,7 +486,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole(openai_message_dict["role"]),
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=tool_calls,
|
||||
tool_call_id=None, # NOTE: None, since this field is only non-null for role=='tool'
|
||||
@ -442,7 +519,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole(openai_message_dict["role"]),
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=tool_calls,
|
||||
tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
|
||||
@ -456,7 +533,7 @@ class Message(BaseMessage):
|
||||
model=model,
|
||||
# standard fields expected in an OpenAI ChatCompletion message object
|
||||
role=MessageRole(openai_message_dict["role"]),
|
||||
content=[TextContent(text=openai_message_dict["content"])] if openai_message_dict["content"] else [],
|
||||
content=content,
|
||||
name=openai_message_dict["name"] if "name" in openai_message_dict else None,
|
||||
tool_calls=tool_calls,
|
||||
tool_call_id=openai_message_dict["tool_call_id"] if "tool_call_id" in openai_message_dict else None,
|
||||
@ -477,11 +554,25 @@ class Message(BaseMessage):
|
||||
"""Go from Message class to ChatCompletion message object"""
|
||||
|
||||
# TODO change to pydantic casting, eg `return SystemMessageModel(self)`
|
||||
# If we only have one content part and it's text, treat it as COT
|
||||
parse_content_parts = False
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
# Otherwise, check if we have TextContent and multiple other parts
|
||||
elif self.content and len(self.content) > 1:
|
||||
text = [content for content in self.content if isinstance(self.content[0], TextContent)]
|
||||
if len(text) > 1:
|
||||
assert len(text) == 1, f"multiple text content parts found in a single message: {self.content}"
|
||||
text_content = text[0].text
|
||||
parse_content_parts = True
|
||||
else:
|
||||
text_content = None
|
||||
|
||||
# TODO(caren) we should eventually support multiple content parts here?
|
||||
# ie, actually make dict['content'] type list
|
||||
# But for now, it's OK until we support multi-modal,
|
||||
# since the only "parts" we have are for supporting various COT
|
||||
|
||||
if self.role == "system":
|
||||
assert all([v is not None for v in [self.role]]), vars(self)
|
||||
openai_message = {
|
||||
@ -539,6 +630,15 @@ class Message(BaseMessage):
|
||||
else:
|
||||
raise ValueError(self.role)
|
||||
|
||||
if parse_content_parts:
|
||||
for content in self.content:
|
||||
if isinstance(content, ReasoningContent):
|
||||
openai_message["reasoning_content"] = content.reasoning
|
||||
if content.signature:
|
||||
openai_message["reasoning_content_signature"] = content.signature
|
||||
if isinstance(content, RedactedReasoningContent):
|
||||
openai_message["redacted_reasoning_content"] = content.data
|
||||
|
||||
return openai_message
|
||||
|
||||
def to_anthropic_dict(
|
||||
@ -552,6 +652,8 @@ class Message(BaseMessage):
|
||||
Args:
|
||||
inner_thoughts_xml_tag (str): The XML tag to wrap around inner thoughts
|
||||
"""
|
||||
|
||||
# Check for COT
|
||||
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
||||
text_content = self.content[0].text
|
||||
else:
|
||||
@ -587,7 +689,24 @@ class Message(BaseMessage):
|
||||
}
|
||||
content = []
|
||||
# COT / reasoning / thinking
|
||||
if text_content is not None and not put_inner_thoughts_in_kwargs:
|
||||
if len(self.content) > 1:
|
||||
for content_part in self.content:
|
||||
if isinstance(content_part, ReasoningContent):
|
||||
content.append(
|
||||
{
|
||||
"type": "thinking",
|
||||
"thinking": content_part.reasoning,
|
||||
"signature": content_part.signature,
|
||||
}
|
||||
)
|
||||
if isinstance(content_part, RedactedReasoningContent):
|
||||
content.append(
|
||||
{
|
||||
"type": "redacted_thinking",
|
||||
"data": content_part.data,
|
||||
}
|
||||
)
|
||||
elif text_content is not None:
|
||||
content.append(
|
||||
{
|
||||
"type": "text",
|
||||
|
@ -40,6 +40,8 @@ class Message(BaseModel):
|
||||
role: str
|
||||
function_call: Optional[FunctionCall] = None # Deprecated
|
||||
reasoning_content: Optional[str] = None # Used in newer reasoning APIs
|
||||
reasoning_content_signature: Optional[str] = None # NOTE: for Anthropic
|
||||
redacted_reasoning_content: Optional[str] = None # NOTE: for Anthropic
|
||||
|
||||
|
||||
class Choice(BaseModel):
|
||||
@ -117,6 +119,8 @@ class MessageDelta(BaseModel):
|
||||
|
||||
content: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
reasoning_content_signature: Optional[str] = None # NOTE: for Anthropic
|
||||
redacted_reasoning_content: Optional[str] = None # NOTE: for Anthropic
|
||||
tool_calls: Optional[List[ToolCallDelta]] = None
|
||||
role: Optional[str] = None
|
||||
function_call: Optional[FunctionCallDelta] = None # Deprecated
|
||||
@ -140,3 +144,4 @@ class ChatCompletionChunkResponse(BaseModel):
|
||||
system_fingerprint: Optional[str] = None
|
||||
# object: str = Field(default="chat.completion")
|
||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||
output_tokens: int = 0
|
||||
|
@ -13,6 +13,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
||||
from letta.schemas.enums import MessageStreamStatus
|
||||
from letta.schemas.letta_message import (
|
||||
AssistantMessage,
|
||||
HiddenReasoningMessage,
|
||||
LegacyFunctionCallMessage,
|
||||
LegacyLettaMessage,
|
||||
LettaMessage,
|
||||
@ -22,6 +23,7 @@ from letta.schemas.letta_message import (
|
||||
ToolCallMessage,
|
||||
ToolReturnMessage,
|
||||
)
|
||||
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
||||
from letta.schemas.message import Message
|
||||
from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
|
||||
from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
|
||||
@ -478,7 +480,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
|
||||
if (
|
||||
message_delta.content is None
|
||||
and (expect_reasoning_content and message_delta.reasoning_content is None)
|
||||
and (expect_reasoning_content and message_delta.reasoning_content is None and message_delta.redacted_reasoning_content is None)
|
||||
and message_delta.tool_calls is None
|
||||
and message_delta.function_call is None
|
||||
and choice.finish_reason is None
|
||||
@ -493,6 +495,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
id=message_id,
|
||||
date=message_date,
|
||||
reasoning=message_delta.reasoning_content,
|
||||
signature=message_delta.reasoning_content_signature,
|
||||
source="reasoner_model" if message_delta.reasoning_content_signature else "non_reasoner_model",
|
||||
)
|
||||
elif expect_reasoning_content and message_delta.redacted_reasoning_content is not None:
|
||||
processed_chunk = HiddenReasoningMessage(
|
||||
id=message_id,
|
||||
date=message_date,
|
||||
hidden_reasoning=message_delta.redacted_reasoning_content,
|
||||
state="redacted",
|
||||
)
|
||||
elif expect_reasoning_content and message_delta.content is not None:
|
||||
# "ignore" content if we expect reasoning content
|
||||
@ -1071,13 +1082,39 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
# "id": str(msg_obj.id) if msg_obj is not None else None,
|
||||
# }
|
||||
assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=msg_obj.id,
|
||||
date=msg_obj.created_at,
|
||||
reasoning=msg,
|
||||
)
|
||||
if msg_obj.content and len(msg_obj.content) == 1 and isinstance(msg_obj.content[0], TextContent):
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=msg_obj.id,
|
||||
date=msg_obj.created_at,
|
||||
reasoning=msg,
|
||||
)
|
||||
|
||||
self._push_to_buffer(processed_chunk)
|
||||
self._push_to_buffer(processed_chunk)
|
||||
else:
|
||||
for content in msg_obj.content:
|
||||
if isinstance(content, TextContent):
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=msg_obj.id,
|
||||
date=msg_obj.created_at,
|
||||
reasoning=content.text,
|
||||
)
|
||||
elif isinstance(content, ReasoningContent):
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=msg_obj.id,
|
||||
date=msg_obj.created_at,
|
||||
source="reasoner_model",
|
||||
reasoning=content.reasoning,
|
||||
signature=content.signature,
|
||||
)
|
||||
elif isinstance(content, RedactedReasoningContent):
|
||||
processed_chunk = HiddenReasoningMessage(
|
||||
id=msg_obj.id,
|
||||
date=msg_obj.created_at,
|
||||
state="redacted",
|
||||
hidden_reasoning=content.data,
|
||||
)
|
||||
|
||||
self._push_to_buffer(processed_chunk)
|
||||
|
||||
return
|
||||
|
||||
|
@ -746,7 +746,13 @@ class SyncServer(Server):
|
||||
if request.llm_config is None:
|
||||
if request.model is None:
|
||||
raise ValueError("Must specify either model or llm_config in request")
|
||||
request.llm_config = self.get_llm_config_from_handle(handle=request.model, context_window_limit=request.context_window_limit)
|
||||
request.llm_config = self.get_llm_config_from_handle(
|
||||
handle=request.model,
|
||||
context_window_limit=request.context_window_limit,
|
||||
max_tokens=request.max_tokens,
|
||||
max_reasoning_tokens=request.max_reasoning_tokens,
|
||||
enable_reasoner=request.enable_reasoner,
|
||||
)
|
||||
|
||||
if request.embedding_config is None:
|
||||
if request.embedding is None:
|
||||
@ -1056,7 +1062,14 @@ class SyncServer(Server):
|
||||
# Merge the two dictionaries, keeping the values from providers_from_db where conflicts occur
|
||||
return {**providers_from_env, **providers_from_db}.values()
|
||||
|
||||
def get_llm_config_from_handle(self, handle: str, context_window_limit: Optional[int] = None) -> LLMConfig:
|
||||
def get_llm_config_from_handle(
|
||||
self,
|
||||
handle: str,
|
||||
context_window_limit: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
enable_reasoner: Optional[bool] = None,
|
||||
) -> LLMConfig:
|
||||
try:
|
||||
provider_name, model_name = handle.split("/", 1)
|
||||
provider = self.get_provider_from_name(provider_name)
|
||||
@ -1078,13 +1091,22 @@ class SyncServer(Server):
|
||||
else:
|
||||
llm_config = llm_configs[0]
|
||||
|
||||
if context_window_limit:
|
||||
if context_window_limit is not None:
|
||||
if context_window_limit > llm_config.context_window:
|
||||
raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})")
|
||||
llm_config.context_window = context_window_limit
|
||||
else:
|
||||
llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
|
||||
|
||||
if max_tokens is not None:
|
||||
llm_config.max_tokens = max_tokens
|
||||
if max_reasoning_tokens is not None:
|
||||
if not max_tokens or max_reasoning_tokens > max_tokens:
|
||||
raise ValueError(f"Max reasoning tokens ({max_reasoning_tokens}) must be less than max tokens ({max_tokens})")
|
||||
llm_config.max_reasoning_tokens = max_reasoning_tokens
|
||||
if enable_reasoner is not None:
|
||||
llm_config.enable_reasoner = enable_reasoner
|
||||
|
||||
return llm_config
|
||||
|
||||
def get_embedding_config_from_handle(
|
||||
|
1294
package-lock.json
generated
Normal file
1294
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
497
poetry.lock
generated
497
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -56,8 +56,8 @@ nltk = "^3.8.1"
|
||||
jinja2 = "^3.1.5"
|
||||
locust = {version = "^2.31.5", optional = true}
|
||||
wikipedia = {version = "^1.4.0", optional = true}
|
||||
composio-langchain = "^0.7.10"
|
||||
composio-core = "^0.7.10"
|
||||
composio-langchain = "^0.7.7"
|
||||
composio-core = "^0.7.7"
|
||||
alembic = "^1.13.3"
|
||||
pyhumps = "^3.8.0"
|
||||
psycopg2 = {version = "^2.9.10", optional = true}
|
||||
@ -73,7 +73,7 @@ grpcio-tools = "^1.68.1"
|
||||
llama-index = "^0.12.2"
|
||||
llama-index-embeddings-openai = "^0.3.1"
|
||||
e2b-code-interpreter = {version = "^1.0.3", optional = true}
|
||||
anthropic = "^0.43.0"
|
||||
anthropic = "^0.49.0"
|
||||
letta_client = "^0.1.65"
|
||||
openai = "^1.60.0"
|
||||
opentelemetry-api = "1.30.0"
|
||||
|
Loading…
Reference in New Issue
Block a user