chore: support alembic (#1867)

Co-authored-by: Shubham Naik <shub@memgpt.ai> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
2025-06-03 04:30:22 +00:00 · 2024-10-11 15:51:14 -07:00 · 2024-10-11 15:51:14 -07:00 · 8fc8c55f14
commit 8fc8c55f14
parent d7340eaa4d
15 changed files with 334 additions and 112 deletions
--- a/.github/workflows/migration-test.yml
+++ b/.github/workflows/migration-test.yml
@ -0,0 +1,34 @@
+name: Alembic Migration Tester
+on:
+  pull_request:
+    paths:
+      - '**.py'
+  workflow_dispatch:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Build and run container
+        run: bash db/run_postgres.sh
+
+      - name: "Setup Python, Poetry and Dependencies"
+        uses: packetcoders/action-setup-cache-python-poetry@main
+        with:
+          python-version: "3.12"
+          poetry-version: "1.8.2"
+          install-args: "--all-extras"
+      - name: Test alembic migration
+        env:
+          LETTA_PG_PORT: 8888
+          LETTA_PG_USER: letta
+          LETTA_PG_PASSWORD: letta
+          LETTA_PG_DB: letta
+          LETTA_PG_HOST: localhost
+          LETTA_SERVER_PASS: test_server_token
+        run: |
+          poetry run alembic upgrade head
+          poetry run alembic check
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -65,6 +65,7 @@ $ . venv/bin/activate

 If you are having dependency issues using `pip`, we recommend you install the package using Poetry. Installing Letta from source using Poetry will ensure that you are using exact package versions that have been tested for the production build.

+
 #### (Optional) Installing pre-commit
 We recommend installing pre-commit to ensure proper formatting during development:
 ```
@ -86,6 +87,21 @@ git checkout -b feature/your-feature

 Now, the world is your oyster! Go ahead and craft your fabulous changes. 🎨

+
+#### Handling Database Migrations
+If you are running Letta for the first time, your database will be automatically be setup. If you are updating Letta, you may need to run migrations. To run migrations, use the following command:
+```shell
+poetry run alembic upgrade head
+```
+
+#### Creating a new Database Migration
+If you have made changes to the database models, you will need to create a new migration. To create a new migration, use the following command:
+```shell
+poetry run alembic revision --autogenerate -m "Your migration message here"
+```
+
+Visit the [Alembic documentation](https://alembic.sqlalchemy.org/en/latest/tutorial.html) for more information on creating and running migrations.
+
 ## 3. ✅ Testing

 Before we hit the 'Wow, I'm Done' button, let's make sure everything works as expected. Run tests and make sure the existing ones don't throw a fit. And if needed, create new tests. 🕵️
--- a/alembic.ini
+++ b/alembic.ini
@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/alembic/README
+++ b/alembic/README
@ -0,0 +1 @@
+Generic single-database configuration.
--- a/alembic/env.py
+++ b/alembic/env.py
@ -0,0 +1,84 @@
+import os
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config, pool
+
+from alembic import context
+from letta.base import Base
+from letta.settings import settings
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+print(settings.letta_pg_uri_no_default)
+if settings.letta_pg_uri_no_default:
+    config.set_main_option("sqlalchemy.url", settings.letta_pg_uri)
+else:
+    config.set_main_option("sqlalchemy.url", "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True)
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/alembic/script.py.mako
+++ b/alembic/script.py.mako
@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
--- a/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
+++ b/alembic/versions/9a505cc7eca9_create_a_baseline_migrations.py
@ -0,0 +1,27 @@
+"""Create a baseline migrations
+
+Revision ID: 9a505cc7eca9
+Revises:
+Create Date: 2024-10-11 14:19:19.875656
+
+"""
+
+from typing import Sequence, Union
+
+# revision identifiers, used by Alembic.
+revision: str = "9a505cc7eca9"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
--- a/letta/agent_store/db.py
+++ b/letta/agent_store/db.py
@ -18,13 +18,14 @@ from sqlalchemy import (
    select,
    text,
 )
-from sqlalchemy.orm import declarative_base, mapped_column
+from sqlalchemy.orm import mapped_column
 from sqlalchemy.orm.session import close_all_sessions
 from sqlalchemy.sql import func
 from sqlalchemy_json import MutableJson
 from tqdm import tqdm

 from letta.agent_store.storage import StorageConnector, TableType
+from letta.base import Base
 from letta.config import LettaConfig
 from letta.constants import MAX_EMBEDDING_DIM
 from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
@ -35,7 +36,6 @@ from letta.schemas.openai.chat_completions import ToolCall
 from letta.schemas.passage import Passage
 from letta.settings import settings

-Base = declarative_base()
 config = LettaConfig()


@ -560,3 +560,9 @@ class SQLLiteStorageConnector(SQLStorageConnector):

            # Commit the changes to the database
            session.commit()
+
+
+def attach_base():
+    # This should be invoked in server.py to make sure Base gets initialized properly
+    # DO NOT REMOVE
+    print("Initializing database...")
--- a/letta/base.py
+++ b/letta/base.py
@ -0,0 +1,3 @@
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
--- a/letta/metadata.py
+++ b/letta/metadata.py
@ -14,11 +14,10 @@ from sqlalchemy import (
    String,
    TypeDecorator,
    desc,
-    func,
 )
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func

+from letta.base import Base
 from letta.config import LettaConfig
 from letta.schemas.agent import AgentState
 from letta.schemas.api_key import APIKey
@ -28,6 +27,8 @@ from letta.schemas.enums import JobStatus
 from letta.schemas.job import Job
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.memory import Memory
+
+# from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
 from letta.schemas.openai.chat_completions import ToolCall, ToolCallFunction
 from letta.schemas.organization import Organization
 from letta.schemas.source import Source
@ -36,8 +37,6 @@ from letta.schemas.user import User
 from letta.settings import settings
 from letta.utils import enforce_types, get_utc_time, printd

-Base = declarative_base()
-

 class LLMConfigColumn(TypeDecorator):
    """Custom type for storing LLMConfig as JSON"""
--- a/letta/server/server.py
+++ b/letta/server/server.py
@ -14,8 +14,8 @@ import letta.constants as constants
 import letta.server.utils as server_utils
 import letta.system as system
 from letta.agent import Agent, save_agent
+from letta.agent_store.db import attach_base
 from letta.agent_store.storage import StorageConnector, TableType
-from letta.config import LettaConfig
 from letta.credentials import LettaCredentials
 from letta.data_sources.connectors import DataConnector, load_data

@ -41,7 +41,7 @@ from letta.interface import AgentInterface  # abstract
 from letta.interface import CLIInterface  # for printing to terminal
 from letta.log import get_logger
 from letta.memory import get_memory_functions
-from letta.metadata import MetadataStore
+from letta.metadata import Base, MetadataStore
 from letta.prompts import gpt_system
 from letta.providers import (
    AnthropicProvider,
@ -150,23 +150,11 @@ class Server(object):


 from sqlalchemy import create_engine
-from sqlalchemy.orm import declarative_base, sessionmaker
+from sqlalchemy.orm import sessionmaker

-from letta.agent_store.db import MessageModel, PassageModel
 from letta.config import LettaConfig

 # NOTE: hack to see if single session management works
-from letta.metadata import (
-    AgentModel,
-    AgentSourceMappingModel,
-    APIKeyModel,
-    BlockModel,
-    JobModel,
-    OrganizationModel,
-    SourceModel,
-    ToolModel,
-    UserModel,
-)
 from letta.settings import model_settings, settings

 config = LettaConfig.load()
@ -183,24 +171,12 @@ else:
    # TODO: don't rely on config storage
    engine = create_engine("sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))

-Base = declarative_base()
+
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-Base.metadata.create_all(
-    engine,
-    tables=[
-        UserModel.__table__,
-        AgentModel.__table__,
-        SourceModel.__table__,
-        AgentSourceMappingModel.__table__,
-        APIKeyModel.__table__,
-        BlockModel.__table__,
-        ToolModel.__table__,
-        JobModel.__table__,
-        PassageModel.__table__,
-        MessageModel.__table__,
-        OrganizationModel.__table__,
-    ],
-)
+
+attach_base()
+
+Base.metadata.create_all(bind=engine)


 # Dependency
--- a/poetry.lock
+++ b/poetry.lock
@ -139,13 +139,13 @@ frozenlist = ">=1.1.0"

 [[package]]
 name = "alembic"
-version = "1.13.2"
+version = "1.13.3"
 description = "A database migration tool for SQLAlchemy."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"},
-    {file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"},
+    {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
+    {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
 ]

 [package.dependencies]
@ -3814,7 +3814,7 @@ Werkzeug = ">=2.0.0"
 name = "mako"
 version = "1.3.5"
 description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
-optional = true
+optional = false
 python-versions = ">=3.8"
 files = [
    {file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"},
@ -8354,4 +8354,4 @@ tests = ["wikipedia"]
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.13,>=3.10"
-content-hash = "aa0bbf5825741bdc9c06388e7e27c1d9a2d85d517abb7f51cca71cc8349d1170"
+content-hash = "2302d430ae353f5453bbf4223e9e00be38fcca45259de2924b38b14e36ab8024"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -76,6 +76,7 @@ langchain = {version = "^0.2.16", optional = true}
 langchain-community = {version = "^0.2.17", optional = true}
 composio-langchain = "^0.5.28"
 composio-core = "^0.5.28"
+alembic = "^1.13.3"

 [tool.poetry.extras]
 #local = ["llama-index-embeddings-huggingface"]
--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -385,7 +385,7 @@ def test_sources(client: Union[LocalClient, RESTClient], agent: AgentState):
    # list archival memory
    archival_memories = client.get_archival_memory(agent_id=agent.id)
    # print(archival_memories)
-    assert len(archival_memories) == created_passages
+    assert len(archival_memories) == created_passages, f"Mismatched length {len(archival_memories)} vs. {created_passages}"

    # check number of passages
    sources = client.list_sources()
--- a/tests/test_new_client.py
+++ b/tests/test_new_client.py
@ -405,70 +405,3 @@ def test_tool_creation_langchain_missing_imports(client):
    # Intentionally missing {"langchain_community.utilities": "WikipediaAPIWrapper"}
    with pytest.raises(RuntimeError):
        Tool.from_langchain(langchain_tool)
-
-
-def test_sources(client, agent):
-    # list sources (empty)
-    sources = client.list_sources()
-    assert len(sources) == 0
-
-    # create a source
-    test_source_name = "test_source"
-    source = client.create_source(name=test_source_name)
-
-    # list sources
-    sources = client.list_sources()
-    assert len(sources) == 1
-    assert sources[0].metadata_["num_passages"] == 0
-    assert sources[0].metadata_["num_documents"] == 0
-
-    # update the source
-    original_id = source.id
-    original_name = source.name
-    new_name = original_name + "_new"
-    client.update_source(source_id=source.id, name=new_name)
-
-    # get the source name (check that it's been updated)
-    source = client.get_source(source_id=source.id)
-    assert source.name == new_name
-    assert source.id == original_id
-
-    # get the source id (make sure that it's the same)
-    assert str(original_id) == client.get_source_id(source_name=new_name)
-
-    # check agent archival memory size
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    print(archival_memories)
-    assert len(archival_memories) == 0
-
-    # load a file into a source
-    filename = "CONTRIBUTING.md"
-    upload_job = client.load_file_into_source(filename=filename, source_id=source.id)
-    print("Upload job", upload_job, upload_job.status, upload_job.metadata_)
-
-    # TODO: make sure things run in the right order
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    assert len(archival_memories) == 0
-
-    # attach a source
-    client.attach_source_to_agent(source_id=source.id, agent_id=agent.id)
-
-    # list archival memory
-    archival_memories = client.get_archival_memory(agent_id=agent.id)
-    # print(archival_memories)
-    assert len(archival_memories) == 20 or len(archival_memories) == 21
-
-    # check number of passages
-    sources = client.list_sources()
-
-    # TODO: do we want to add this metadata back?
-    # assert sources[0].metadata_["num_passages"] > 0
-    # assert sources[0].metadata_["num_documents"] == 0  # TODO: fix this once document store added
-    print(sources)
-
-    # detach the source
-    # TODO: add when implemented
-    # client.detach_source(source.name, agent.id)
-
-    # delete the source
-    client.delete_source(source.id)