Merge branch 'main' into bump-7-17

2025-06-03 04:30:22 +00:00 · 2025-05-16 01:36:43 -07:00 · 2025-05-16 01:36:43 -07:00 · 472811a563
commit 472811a563
parent f3437ba98b c819024c3c
33 changed files with 359 additions and 87 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -11,20 +11,25 @@ assignees: ''
 A clear and concise description of what the bug is.
 **Please describe your setup**
- [ ] How did you install letta?
+- [ ] How are you running Letta?  
-  - `pip install letta`? `pip install letta-nightly`? `git clone`?
+  - Docker 
  - pip (legacy) 
  - From source 
  - Desktop  
 - [ ] Describe your setup
  - What's your OS (Windows/MacOS/Linux)?
-  - How are you running `letta`? (`cmd.exe`/Powershell/Anaconda Shell/Terminal)
+  - What is your `docker run ...` command (if applicable)
 **Screenshots**
 If applicable, add screenshots to help explain your problem.
 **Additional context**
 Add any other context about the problem here.
 - What model you are using 
 **Agent File (optional)**
 Please attach your `.af` file, as this helps with reproducing issues. 
 **Letta Config**
 Please attach your `~/.letta/config` file or copy paste it below.
 ---
--- a/.github/workflows/notify-letta-cloud.yml
+++ b/.github/workflows/notify-letta-cloud.yml
@ -1,19 +0,0 @@
 name: Notify Letta Cloud
 on:
  push:
    branches:
      - main
 jobs:
  notify:
    runs-on: ubuntu-latest
    if: ${{ !contains(github.event.head_commit.message, '[sync-skip]') }}
    steps:
      - name: Trigger repository_dispatch
        run: |
          curl -X POST \
            -H "Authorization: token ${{ secrets.SYNC_PAT }}" \
            -H "Accept: application/vnd.github.v3+json" \
            https://api.github.com/repos/letta-ai/letta-cloud/dispatches \
            -d '{"event_type":"oss-update"}'
--- a/.github/workflows/send-message-integration-tests.yaml
+++ b/.github/workflows/send-message-integration-tests.yaml
@ -0,0 +1,155 @@
 name: Send Message SDK Tests
 on:
  pull_request_target:
    # branches: [main] # TODO: uncomment before merge
    types: [labeled]
    paths:
      - 'letta/**'
 jobs:
  send-messages:
    # Only run when the "safe to test" label is applied
    if: contains(github.event.pull_request.labels.*.name, 'safe to test')
    runs-on: ubuntu-latest
    timeout-minutes: 15
    strategy:
      fail-fast: false
      matrix:
        config_file:
          - "openai-gpt-4o-mini.json"
          - "azure-gpt-4o-mini.json"
          - "claude-3-5-sonnet.json"
          - "claude-3-7-sonnet.json"
          - "claude-3-7-sonnet-extended.json"
          - "gemini-pro.json"
          - "gemini-vertex.json"
    services:
      qdrant:
        image: qdrant/qdrant
        ports:
          - 6333:6333
      postgres:
        image: pgvector/pgvector:pg17
        ports:
          - 5432:5432
        env:
          POSTGRES_HOST_AUTH_METHOD: trust
          POSTGRES_DB: postgres
          POSTGRES_USER: postgres
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      # Ensure secrets don't leak
      - name: Configure git to hide secrets
        run: |
          git config --global core.logAllRefUpdates false
          git config --global log.hideCredentials true
      - name: Set up secret masking
        run: |
          # Automatically mask any environment variable ending with _KEY
          for var in $(env | grep '_KEY=' | cut -d= -f1); do
            value="${!var}"
            if [[ -n "$value" ]]; then
              # Mask the full value
              echo "::add-mask::$value"
              # Also mask partial values (first and last several characters)
              # This helps when only parts of keys appear in logs
              if [[ ${#value} -gt 8 ]]; then
                echo "::add-mask::${value:0:8}"
                echo "::add-mask::${value:(-8)}"
              fi
              # Also mask with common formatting changes
              # Some logs might add quotes or other characters
              echo "::add-mask::\"$value\""
              echo "::add-mask::$value\""
              echo "::add-mask::\"$value"
              echo "Masked secret: $var (length: ${#value})"
            fi
          done
      # Check out base repository code, not the PR's code (for security)
      - name: Checkout base repository
        uses: actions/checkout@v4 # No ref specified means it uses base branch
      # Only extract relevant files from the PR (for security, specifically prevent modification of workflow files)
      - name: Extract PR schema files
        run: |
          # Fetch PR without checking it out
          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
          # Extract ONLY the schema files
          git checkout pr-${{ github.event.pull_request.number }} -- letta/
      - name: Set up python 3.12
        id: setup-python
        uses: actions/setup-python@v5
        with:
          python-version: 3.12
      - name: Load cached Poetry Binary
        id: cached-poetry-binary
        uses: actions/cache@v4
        with:
          path: ~/.local
          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-1.8.3
      - name: Install Poetry
        uses: snok/install-poetry@v1
        with:
          version: 1.8.3
          virtualenvs-create: true
          virtualenvs-in-project: true
      - name: Load cached venv
        id: cached-poetry-dependencies
        uses: actions/cache@v4
        with:
          path: .venv
          key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}${{ inputs.install-args || '-E dev -E postgres -E external-tools -E tests -E cloud-tool-sandbox' }}
          # Restore cache with this prefix if not exact match with key
          # Note cache-hit returns false in this case, so the below step will run
          restore-keys: |
            venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-
      - name: Install dependencies
        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
        shell: bash
        run: poetry install --no-interaction --no-root ${{ inputs.install-args || '-E dev -E postgres -E external-tools -E tests -E cloud-tool-sandbox -E google' }}
      - name: Install letta packages via Poetry
        run: |
          poetry run pip install --upgrade letta-client letta
      - name: Migrate database
        env:
          LETTA_PG_PORT: 5432
          LETTA_PG_USER: postgres
          LETTA_PG_PASSWORD: postgres
          LETTA_PG_DB: postgres
          LETTA_PG_HOST: localhost
        run: |
          psql -h localhost -U postgres -d postgres -c 'CREATE EXTENSION vector'
          poetry run alembic upgrade head
      - name: Run integration tests for ${{ matrix.config_file }}
        env:
          LLM_CONFIG_FILE: ${{ matrix.config_file }}
          LETTA_PG_PORT: 5432
          LETTA_PG_USER: postgres
          LETTA_PG_PASSWORD: postgres
          LETTA_PG_DB: postgres
          LETTA_PG_HOST: localhost
          LETTA_SERVER_PASS: test_server_token
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
          AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
          AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          COMPOSIO_API_KEY: ${{ secrets.COMPOSIO_API_KEY }}
          DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
          GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
          GOOGLE_CLOUD_LOCATION: ${{ secrets.GOOGLE_CLOUD_LOCATION }}
        run: |
          poetry run pytest \
            -s -vv \
            tests/integration_test_send_message.py \
            --maxfail=1 --durations=10
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -28,7 +28,7 @@ First, install Poetry using [the official instructions here](https://python-poet
 Once Poetry is installed, navigate to the letta directory and install the Letta project with Poetry:
 ```shell
 cd letta
-poetry shell
+eval $(poetry env activate)
 poetry install --all-extras
 ```
 #### Setup PostgreSQL environment (optional)
--- a/1
+++ b/1
@ -66,7 +66,6 @@ ENV LETTA_ENVIRONMENT=${LETTA_ENVIRONMENT} \
    POSTGRES_DB=letta \
    COMPOSIO_DISABLE_VERSION_CHECK=true
 WORKDIR /app
 # Copy virtual environment and app from builder
--- a/README.md
+++ b/README.md
@ -8,26 +8,13 @@
 <div align="center">
 <h1>Letta (previously MemGPT)</h1>
 **☄️ New release: Letta Agent Development Environment (_read more [here](#-access-the-ade-agent-development-environment)_) ☄️**
 <p align="center">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot.png">
    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot_light.png">
    <img alt="Letta logo" src="https://raw.githubusercontent.com/letta-ai/letta/refs/heads/main/assets/example_ade_screenshot.png" width="800">
  </picture>
 </p>
 ---
 <h3>
 [Homepage](https://letta.com) // [Documentation](https://docs.letta.com) // [ADE](https://docs.letta.com/agent-development-environment) // [Letta Cloud](https://forms.letta.com/early-access)
 </h3>
-**👾 Letta** is an open source framework for building stateful LLM applications. You can use Letta to build **stateful agents** with advanced reasoning capabilities and transparent long-term memory. The Letta framework is white box and model-agnostic.
+**👾 Letta** is an open source framework for building **stateful agents** with advanced reasoning capabilities and transparent long-term memory. The Letta framework is white box and model-agnostic.
 [![Discord](https://img.shields.io/discord/1161736243340640419?label=Discord&logo=discord&logoColor=5865F2&style=flat-square&color=5865F2)](https://discord.gg/letta)
 [![Twitter Follow](https://img.shields.io/badge/Follow-%40Letta__AI-1DA1F2?style=flat-square&logo=x&logoColor=white)](https://twitter.com/Letta_AI)
@ -157,7 +144,7 @@ No, the data in your Letta server database stays on your machine. The Letta ADE
 > _"Do I have to use your ADE? Can I build my own?"_
-The ADE is built on top of the (fully open source) Letta server and Letta Agents API. You can build your own application like the ADE on top of the REST API (view the documention [here](https://docs.letta.com/api-reference)).
+The ADE is built on top of the (fully open source) Letta server and Letta Agents API. You can build your own application like the ADE on top of the REST API (view the documentation [here](https://docs.letta.com/api-reference)).
 > _"Can I interact with Letta agents via the CLI?"_
--- a/dev-compose.yaml
+++ b/dev-compose.yaml
@ -28,7 +28,6 @@ services:
      - "8083:8083"
      - "8283:8283"
    environment:
      - SERPAPI_API_KEY=${SERPAPI_API_KEY}
      - LETTA_PG_DB=${LETTA_PG_DB:-letta}
      - LETTA_PG_USER=${LETTA_PG_USER:-letta}
      - LETTA_PG_PASSWORD=${LETTA_PG_PASSWORD:-letta}
--- a/examples/docs/example.py
+++ b/examples/docs/example.py
@ -8,6 +8,7 @@ If you're using Letta Cloud, replace 'baseURL' with 'token'
 See: https://docs.letta.com/api-reference/overview
 Execute this script using `poetry run python3 example.py`
 This will install `letta_client` and other dependencies.
 """
 client = Letta(
    base_url="http://localhost:8283",
--- a/examples/mcp_example.py
+++ b/examples/mcp_example.py
@ -2,22 +2,33 @@ from pprint import pprint
 from letta_client import Letta
 # Connect to Letta server
 client = Letta(base_url="http://localhost:8283")
 # Use the "everything" mcp server:
 # https://github.com/modelcontextprotocol/servers/tree/main/src/everything
 mcp_server_name = "everything"
 mcp_tool_name = "echo"
 # List all McpTool belonging to the "everything" mcp server.
 mcp_tools = client.tools.list_mcp_tools_by_server(
    mcp_server_name=mcp_server_name,
 )
 # We can see that "echo" is one of the tools, but it's not
 # a letta tool that can be added to a client (it has no tool id).
 for tool in mcp_tools:
    pprint(tool)
 # Create a Tool (with a tool id) using the server and tool names.
 mcp_tool = client.tools.add_mcp_tool(
    mcp_server_name=mcp_server_name,
    mcp_tool_name=mcp_tool_name
 )
 # Create an agent with the tool, using tool.id -- note that
 # this is the ONLY tool in the agent, you typically want to
 # also include the default tools.
 agent = client.agents.create(
    memory_blocks=[
        {
@ -31,6 +42,7 @@ agent = client.agents.create(
 )
 print(f"Created agent id {agent.id}")
 # Ask the agent to call the tool.
 response = client.agents.messages.create(
    agent_id=agent.id,
    messages=[
--- a/examples/notebooks/Customizing
+++ b/examples/notebooks/Customizing
@ -253,15 +253,18 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "7808912f-831b-4cdc-8606-40052eb809b4",
   "metadata": {},
   "outputs": [],
   "source": [
-    "from typing import Optional, List\n",
+    "from typing import Optional, List, TYPE_CHECKING\n",
    "import json\n",
    "\n",
-    "def task_queue_push(self: \"Agent\", task_description: str):\n",
+    "if TYPE_CHECKING:\n",
    "    from letta import AgentState\n",
    "\n",
    "def task_queue_push(agent_state: \"AgentState\", task_description: str):\n",
    "    \"\"\"\n",
    "    Push to a task queue stored in core memory. \n",
    "\n",
@ -273,12 +276,12 @@
    "        does not produce a response.\n",
    "    \"\"\"\n",
    "    import json\n",
-    "    tasks = json.loads(self.memory.get_block(\"tasks\").value)\n",
+    "    tasks = json.loads(agent_state.memory.get_block(\"tasks\").value)\n",
    "    tasks.append(task_description)\n",
-    "    self.memory.update_block_value(\"tasks\", json.dumps(tasks))\n",
+    "    agent_state.memory.update_block_value(\"tasks\", json.dumps(tasks))\n",
    "    return None\n",
    "\n",
-    "def task_queue_pop(self: \"Agent\"):\n",
+    "def task_queue_pop(agent_state: \"AgentState\"):\n",
    "    \"\"\"\n",
    "    Get the next task from the task queue \n",
    "\n",
@ -288,12 +291,12 @@
    "        None (the task queue is empty)\n",
    "    \"\"\"\n",
    "    import json\n",
-    "    tasks = json.loads(self.memory.get_block(\"tasks\").value)\n",
+    "    tasks = json.loads(agent_state.memory.get_block(\"tasks\").value)\n",
    "    if len(tasks) == 0: \n",
    "        return None\n",
    "    task = tasks[0]\n",
    "    print(\"CURRENT TASKS: \", tasks)\n",
-    "    self.memory.update_block_value(\"tasks\", json.dumps(tasks[1:]))\n",
+    "    agent_state.memory.update_block_value(\"tasks\", json.dumps(tasks[1:]))\n",
    "    return task\n",
    "\n",
    "push_task_tool = client.tools.upsert_from_function(func=task_queue_push)\n",
@ -310,7 +313,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
   "id": "135fcf3e-59c4-4da3-b86b-dbffb21aa343",
   "metadata": {},
   "outputs": [],
@ -336,10 +339,12 @@
    "        ),\n",
    "        CreateBlock(\n",
    "            label=\"tasks\",\n",
-    "            value=\"\",\n",
+    "            value=\"[]\",\n",
    "        ),\n",
    "    ],\n",
    "    tool_ids=[push_task_tool.id, pop_task_tool.id],\n",
    "    model=\"letta/letta-free\",\n",
    "    embedding=\"letta/letta-free\",\n",
    ")"
   ]
  },
--- a/letta/init.py
+++ b/letta/init.py
@ -1,9 +1,9 @@
-__version__ = "0.7.14"
+__version__ = "0.7.16"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client
-# # imports for easier access
+# imports for easier access
 from letta.schemas.agent import AgentState
 from letta.schemas.block import Block
 from letta.schemas.embedding_config import EmbeddingConfig
--- a/letta/agent.py
+++ b/letta/agent.py
@ -483,7 +483,7 @@ class Agent(BaseAgent):
                response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
            )
            function_name = function_call.name
-            self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
+            self.logger.debug(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
            # Failure case 1: function name is wrong (not in agent_state.tools)
            target_letta_tool = None
--- a/letta/embeddings.py
+++ b/letta/embeddings.py
@ -235,7 +235,9 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
    if endpoint_type == "openai":
        return OpenAIEmbeddings(
-            api_key=model_settings.openai_api_key, model=config.embedding_model, base_url=model_settings.openai_api_base
+            api_key=model_settings.openai_api_key,
            model=config.embedding_model,
            base_url=model_settings.openai_api_base,
        )
    elif endpoint_type == "azure":
--- a/letta/functions/ast_parsers.py
+++ b/letta/functions/ast_parsers.py
@ -34,6 +34,19 @@ def resolve_type(annotation: str):
        return BUILTIN_TYPES[annotation]
    try:
        if annotation.startswith("list["):
            inner_type = annotation[len("list[") : -1]
            resolve_type(inner_type)
            return list
        elif annotation.startswith("dict["):
            inner_types = annotation[len("dict[") : -1]
            key_type, value_type = inner_types.split(",")
            return dict
        elif annotation.startswith("tuple["):
            inner_types = annotation[len("tuple[") : -1]
            [resolve_type(t.strip()) for t in inner_types.split(",")]
            return tuple
        parsed = ast.literal_eval(annotation)
        if isinstance(parsed, type):
            return parsed
--- a/letta/functions/function_sets/base.py
+++ b/letta/functions/function_sets/base.py
@ -46,7 +46,7 @@ def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> O
    count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
    # TODO: add paging by page number. currently cursor only works with strings.
    # original: start=page * count
-    messages = self.message_manager.list_user_messages_for_agent(
+    messages = self.message_manager.list_messages_for_agent(
        agent_id=self.agent_state.id,
        actor=self.user,
        query_text=query,
--- a/letta/llm_api/aws_bedrock.py
+++ b/letta/llm_api/aws_bedrock.py
@ -3,14 +3,19 @@ from typing import Any, Dict, List
 from anthropic import AnthropicBedrock
 from letta.log import get_logger
 from letta.settings import model_settings
 logger = get_logger(__name__)
 def has_valid_aws_credentials() -> bool:
    """
    Check if AWS credentials are properly configured.
    """
-    valid_aws_credentials = os.getenv("AWS_ACCESS_KEY") and os.getenv("AWS_SECRET_ACCESS_KEY") and os.getenv("AWS_REGION")
+    valid_aws_credentials = (
        os.getenv("AWS_ACCESS_KEY") is not None and os.getenv("AWS_SECRET_ACCESS_KEY") is not None and os.getenv("AWS_REGION") is not None
    )
    return valid_aws_credentials
@ -20,6 +25,7 @@ def get_bedrock_client():
    """
    import boto3
    logger.debug(f"Getting Bedrock client for {model_settings.aws_region}")
    sts_client = boto3.client(
        "sts",
        aws_access_key_id=model_settings.aws_access_key,
@ -51,12 +57,13 @@ def bedrock_get_model_list(region_name: str) -> List[dict]:
    """
    import boto3
    logger.debug(f"Getting model list for {region_name}")
    try:
        bedrock = boto3.client("bedrock", region_name=region_name)
        response = bedrock.list_inference_profiles()
        return response["inferenceProfileSummaries"]
    except Exception as e:
-        print(f"Error getting model list: {str(e)}")
+        logger.exception(f"Error getting model list: {str(e)}", e)
        raise e
@ -67,6 +74,7 @@ def bedrock_get_model_details(region_name: str, model_id: str) -> Dict[str, Any]
    import boto3
    from botocore.exceptions import ClientError
    logger.debug(f"Getting model details for {model_id}")
    try:
        bedrock = boto3.client("bedrock", region_name=region_name)
        response = bedrock.get_foundation_model(modelIdentifier=model_id)
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@ -55,7 +55,6 @@ def openai_check_valid_api_key(base_url: str, api_key: Union[str, None]) -> None
    else:
        raise ValueError("No API key provided")
 def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool = False, extra_params: Optional[dict] = None) -> dict:
    """https://platform.openai.com/docs/api-reference/models/list"""
    from letta.utils import printd
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@ -75,7 +75,8 @@ class LLMConfig(BaseModel):
        description="The reasoning effort to use when generating text reasoning models",
    )
    max_reasoning_tokens: int = Field(
-        0, description="Configurable thinking budget for extended thinking, only used if enable_reasoner is True. Minimum value is 1024."
+        0,
        description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
    )
    # FIXME hack to silence pydantic protected namespace warning
--- a/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
+++ b/letta/server/rest_api/routers/openai/chat_completions/chat_completions.py
@ -30,9 +30,7 @@ logger = get_logger(__name__)
    responses={
        200: {
            "description": "Successful response",
-            "content": {
+            "content": {"text/event-stream": {}},
                "text/event-stream": {"description": "Server-Sent Events stream"},
            },
        }
    },
 )
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@ -669,9 +669,7 @@ async def send_message(
    responses={
        200: {
            "description": "Successful response",
-            "content": {
+            "content": {"text/event-stream": {}},
                "text/event-stream": {"description": "Server-Sent Events stream"},
            },
        }
    },
 )
@ -696,7 +694,7 @@ async def send_message_streaming(
    feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
-    if agent_eligible and feature_enabled and model_compatible:
+    if agent_eligible and feature_enabled and model_compatible and request.stream_tokens:
        experimental_agent = LettaAgent(
            agent_id=agent_id,
            message_manager=server.message_manager,
--- a/letta/server/rest_api/routers/v1/sources.py
+++ b/letta/server/rest_api/routers/v1/sources.py
@ -78,6 +78,17 @@ def list_sources(
    return server.list_all_sources(actor=actor)
@router.get("/count", response_model=int, operation_id="count_sources")
 def count_sources(
    server: "SyncServer" = Depends(get_letta_server),
    actor_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
 ):
    """
    Count all data sources created by a user.
    """
    return server.source_manager.size(actor=server.user_manager.get_user_or_default(user_id=actor_id))
@router.post("/", response_model=Source, operation_id="create_source")
 def create_source(
    source_create: SourceCreate,
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@ -98,6 +98,21 @@ async def list_tools(
        raise HTTPException(status_code=500, detail=str(e))
@router.get("/count", response_model=int, operation_id="count_tools")
 def count_tools(
    server: SyncServer = Depends(get_letta_server),
    actor_id: Optional[str] = Header(None, alias="user_id"),
 ):
    """
    Get a count of all tools available to agents belonging to the org of the user
    """
    try:
        return server.tool_manager.size(actor=server.user_manager.get_user_or_default(user_id=actor_id))
    except Exception as e:
        print(f"Error occurred: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@router.post("/", response_model=Tool, operation_id="create_tool")
 def create_tool(
    request: ToolCreate = Body(...),
--- a/letta/server/rest_api/routers/v1/voice.py
+++ b/letta/server/rest_api/routers/v1/voice.py
@ -26,9 +26,7 @@ logger = get_logger(__name__)
    responses={
        200: {
            "description": "Successful response",
-            "content": {
+            "content": {"text/event-stream": {}},
                "text/event-stream": {"description": "Server-Sent Events stream"},
            },
        }
    },
 )
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@ -1191,8 +1191,13 @@ class AgentManager:
    @enforce_types
    async def get_in_context_messages_async(self, agent_id: str, actor: PydanticUser) -> List[PydanticMessage]:
 <<<<<<< HEAD
        agent = await self.get_agent_by_id_async(agent_id=agent_id, actor=actor)
        return await self.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
 =======
        message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
        return await self.message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=actor)
 >>>>>>> main
    @enforce_types
    def get_system_message(self, agent_id: str, actor: PydanticUser) -> PydanticMessage:
--- a/letta/services/message_manager.py
+++ b/letta/services/message_manager.py
@ -373,17 +373,24 @@ class MessageManager:
            if group_id:
                query = query.filter(MessageModel.group_id == group_id)
-            # If query_text is provided, filter messages using subquery + json_array_elements.
+            # If query_text is provided, filter messages by matching any "text" type content block
            # whose text includes the query string (case-insensitive).
            if query_text:
-                content_element = func.json_array_elements(MessageModel.content).alias("content_element")
+                dialect_name = session.bind.dialect.name
-                query = query.filter(
+
-                    exists(
+                if dialect_name == "postgresql":  # using subquery + json_array_elements.
-                        select(1)
+                    content_element = func.json_array_elements(MessageModel.content).alias("content_element")
-                        .select_from(content_element)
+                    subquery_sql = text("content_element->>'type' = 'text' AND content_element->>'text' ILIKE :query_text")
-                        .where(text("content_element->>'type' = 'text' AND content_element->>'text' ILIKE :query_text"))
+                    subquery = select(1).select_from(content_element).where(subquery_sql)
-                        .params(query_text=f"%{query_text}%")
+
                elif dialect_name == "sqlite":  # using `json_each` and JSON path expressions
                    json_item = func.json_each(MessageModel.content).alias("json_item")
                    subquery_sql = text(
                        "json_extract(value, '$.type') = 'text' AND lower(json_extract(value, '$.text')) LIKE lower(:query_text)"
                    )
-                )
+                    subquery = select(1).select_from(json_item).where(subquery_sql)
                query = query.filter(exists(subquery.params(query_text=f"%{query_text}%")))
            # If role(s) are provided, filter messages by those roles.
            if roles:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.7.14"
+version = "0.7.16"
 packages = [
    {include = "letta"},
 ]
@ -106,6 +106,7 @@ google = ["google-genai"]
 desktop = ["pgvector", "pg8000", "psycopg2-binary", "psycopg2", "pyright", "websockets", "fastapi", "uvicorn", "docker", "langchain", "wikipedia", "langchain-community", "locust"]
 all = ["pgvector", "pg8000", "psycopg2-binary", "psycopg2", "pytest", "pytest-asyncio", "pexpect", "black", "pre-commit", "pyright", "pytest-order", "autoflake", "isort", "websockets", "fastapi", "uvicorn", "docker", "langchain", "wikipedia", "langchain-community", "locust"]
 [tool.poetry.group.dev.dependencies]
 black = "^24.4.2"
 ipykernel = "^6.29.5"
--- a/scripts/docker-compose.yml
+++ b/scripts/docker-compose.yml
@ -0,0 +1,32 @@
 version: '3.7'
 services:
  redis:
    image: redis:alpine
    container_name: redis
    healthcheck:
      test: ['CMD-SHELL', 'redis-cli ping | grep PONG']
      interval: 1s
      timeout: 3s
      retries: 5
    ports:
      - '6379:6379'
    volumes:
      - ./data/redis:/data
    command: redis-server --appendonly yes
  postgres:
    image: ankane/pgvector
    container_name: postgres
    healthcheck:
      test: ['CMD-SHELL', 'pg_isready -U postgres']
      interval: 1s
      timeout: 3s
      retries: 5
    ports:
      - '5432:5432'
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: postgres
      POSTGRES_DB: letta
    volumes:
      - ./data/postgres:/var/lib/postgresql/data
      - ./scripts/postgres-db-init/init.sql:/docker-entrypoint-initdb.d/init.sql
--- a/tests/integration_test_sleeptime_agent.py
+++ b/tests/integration_test_sleeptime_agent.py
@ -155,6 +155,7 @@ async def test_sleeptime_group_chat(server, actor):
    # 6. Verify run status after sleep
    time.sleep(2)
    for run_id in run_ids:
        job = server.job_manager.get_job_by_id(job_id=run_id, actor=actor)
        assert job.status == JobStatus.running or job.status == JobStatus.completed
--- a/tests/integration_test_voice_agent.py
+++ b/tests/integration_test_voice_agent.py
@ -564,7 +564,6 @@ def _modify(group_id, server, actor, max_val, min_val):
        actor=actor,
    )
 def test_valid_buffer_lengths_above_four(group_id, server, actor):
    # both > 4 and max > min
    updated = _modify(group_id, server, actor, max_val=10, min_val=5)
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@ -127,13 +127,14 @@ def test_archival(agent_obj):
        pass
-def test_recall(client, agent_obj):
+def test_recall_self(client, agent_obj):
    # keyword
    keyword = "banana"
    keyword_backwards = "".join(reversed(keyword))
    # Send messages to agent
    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="hello")
-    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword)
+    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="what word is '{}' backwards?".format(keyword_backwards))
    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="tell me a fun fact")
    # Conversation search
--- a/tests/test_managers.py
+++ b/tests/test_managers.py
@ -1612,6 +1612,46 @@ def test_modify_letta_message(server: SyncServer, sarah_agent, default_user):
    # TODO: tool calls/responses
 def test_list_messages_with_query_text_filter(server: SyncServer, sarah_agent, default_user):
    """
    Ensure that list_messages_for_agent correctly filters messages by query_text.
    """
    test_contents = [
        "This is a message about unicorns and rainbows.",
        "Another message discussing dragons in the sky.",
        "Plain message with no magical beasts.",
        "Mentioning unicorns again for good measure.",
        "Something unrelated entirely.",
    ]
    created_messages = []
    for content in test_contents:
        message = PydanticMessage(
            agent_id=sarah_agent.id,
            role=MessageRole.user,
            content=[{"type": "text", "text": content}],
        )
        created = server.message_manager.create_message(pydantic_msg=message, actor=default_user)
        created_messages.append(created)
    # Query messages that include "unicorns"
    unicorn_messages = server.message_manager.list_messages_for_agent(agent_id=sarah_agent.id, actor=default_user, query_text="unicorns")
    assert len(unicorn_messages) == 2
    for msg in unicorn_messages:
        assert any(chunk.type == "text" and "unicorns" in chunk.text.lower() for chunk in msg.content or [])
    # Query messages that include "dragons"
    dragon_messages = server.message_manager.list_messages_for_agent(agent_id=sarah_agent.id, actor=default_user, query_text="dragons")
    assert len(dragon_messages) == 1
    assert any(chunk.type == "text" and "dragons" in chunk.text.lower() for chunk in dragon_messages[0].content or [])
    # Query with a word that shouldn't match any message
    no_match_messages = server.message_manager.list_messages_for_agent(
        agent_id=sarah_agent.id, actor=default_user, query_text="nonexistentcreature"
    )
    assert len(no_match_messages) == 0
 # ======================================================================================================================
 # AgentManager Tests - Blocks Relationship
 # ======================================================================================================================
--- a/tests/test_sdk_client.py
+++ b/tests/test_sdk_client.py
@ -115,7 +115,7 @@ def test_shared_blocks(client: LettaSDKClient):
    )
    assert (
        "charles" in client.agents.blocks.retrieve(agent_id=agent_state2.id, block_label="human").value.lower()
-    ), f"Shared block update failed {client.agents.blocks.retrieve(agent_id=agent_state2.id, block_label="human").value}"
+    ), f"Shared block update failed {client.agents.blocks.retrieve(agent_id=agent_state2.id, block_label='human').value}"
    # cleanup
    client.agents.delete(agent_state1.id)
--- a/tests/test_tool_sandbox/restaurant_management_system/adjust_menu_prices.py
+++ b/tests/test_tool_sandbox/restaurant_management_system/adjust_menu_prices.py
@ -8,10 +8,9 @@ def adjust_menu_prices(percentage: float) -> str:
        str: A formatted string summarizing the price adjustments.
    """
    import cowsay
    from tqdm import tqdm
    from core.menu import Menu, MenuItem  # Import a class from the codebase
    from core.utils import format_currency  # Use a utility function to test imports
    from tqdm import tqdm
    if not isinstance(percentage, (int, float)):
        raise TypeError("percentage must be a number")