chore: support alembic (#1867)

Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
This commit is contained in:
Shubham Naik 2024-10-11 15:51:14 -07:00 committed by GitHub
parent d7340eaa4d
commit 8fc8c55f14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 334 additions and 112 deletions

34
.github/workflows/migration-test.yml vendored Normal file
View File

@ -0,0 +1,34 @@
name: Alembic Migration Tester
on:
pull_request:
paths:
- '**.py'
workflow_dispatch:
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build and run container
run: bash db/run_postgres.sh
- name: "Setup Python, Poetry and Dependencies"
uses: packetcoders/action-setup-cache-python-poetry@main
with:
python-version: "3.12"
poetry-version: "1.8.2"
install-args: "--all-extras"
- name: Test alembic migration
env:
LETTA_PG_PORT: 8888
LETTA_PG_USER: letta
LETTA_PG_PASSWORD: letta
LETTA_PG_DB: letta
LETTA_PG_HOST: localhost
LETTA_SERVER_PASS: test_server_token
run: |
poetry run alembic upgrade head
poetry run alembic check

View File

@ -65,6 +65,7 @@ $ . venv/bin/activate
If you are having dependency issues using `pip`, we recommend you install the package using Poetry. Installing Letta from source using Poetry will ensure that you are using exact package versions that have been tested for the production build.
#### (Optional) Installing pre-commit
We recommend installing pre-commit to ensure proper formatting during development:
```
@ -86,6 +87,21 @@ git checkout -b feature/your-feature
Now, the world is your oyster! Go ahead and craft your fabulous changes. 🎨
#### Handling Database Migrations
If you are running Letta for the first time, your database will be automatically be setup. If you are updating Letta, you may need to run migrations. To run migrations, use the following command:
```shell
poetry run alembic upgrade head
```
#### Creating a new Database Migration
If you have made changes to the database models, you will need to create a new migration. To create a new migration, use the following command:
```shell
poetry run alembic revision --autogenerate -m "Your migration message here"
```
Visit the [Alembic documentation](https://alembic.sqlalchemy.org/en/latest/tutorial.html) for more information on creating and running migrations.
## 3. ✅ Testing
Before we hit the 'Wow, I'm Done' button, let's make sure everything works as expected. Run tests and make sure the existing ones don't throw a fit. And if needed, create new tests. 🕵️

116
alembic.ini Normal file
View File

@ -0,0 +1,116 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts
# Use forward slashes (/) also on windows to provide an os agnostic path
script_location = alembic
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =
# max length of characters to apply to the "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
sqlalchemy.url = driver://user:pass@localhost/dbname
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
# hooks = ruff
# ruff.type = exec
# ruff.executable = %(here)s/.venv/bin/ruff
# ruff.options = --fix REVISION_SCRIPT_FILENAME
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

1
alembic/README Normal file
View File

@ -0,0 +1 @@
Generic single-database configuration.

84
alembic/env.py Normal file
View File

@ -0,0 +1,84 @@
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from letta.base import Base
from letta.settings import settings
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
print(settings.letta_pg_uri_no_default)
if settings.letta_pg_uri_no_default:
config.set_main_option("sqlalchemy.url", settings.letta_pg_uri)
else:
config.set_main_option("sqlalchemy.url", "sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata, include_schemas=True)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

26
alembic/script.py.mako Normal file
View File

@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@ -0,0 +1,27 @@
"""Create a baseline migrations
Revision ID: 9a505cc7eca9
Revises:
Create Date: 2024-10-11 14:19:19.875656
"""
from typing import Sequence, Union
# revision identifiers, used by Alembic.
revision: str = "9a505cc7eca9"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@ -18,13 +18,14 @@ from sqlalchemy import (
select,
text,
)
from sqlalchemy.orm import declarative_base, mapped_column
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm.session import close_all_sessions
from sqlalchemy.sql import func
from sqlalchemy_json import MutableJson
from tqdm import tqdm
from letta.agent_store.storage import StorageConnector, TableType
from letta.base import Base
from letta.config import LettaConfig
from letta.constants import MAX_EMBEDDING_DIM
from letta.metadata import EmbeddingConfigColumn, ToolCallColumn
@ -35,7 +36,6 @@ from letta.schemas.openai.chat_completions import ToolCall
from letta.schemas.passage import Passage
from letta.settings import settings
Base = declarative_base()
config = LettaConfig()
@ -560,3 +560,9 @@ class SQLLiteStorageConnector(SQLStorageConnector):
# Commit the changes to the database
session.commit()
def attach_base():
# This should be invoked in server.py to make sure Base gets initialized properly
# DO NOT REMOVE
print("Initializing database...")

3
letta/base.py Normal file
View File

@ -0,0 +1,3 @@
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

View File

@ -14,11 +14,10 @@ from sqlalchemy import (
String,
TypeDecorator,
desc,
func,
)
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
from letta.base import Base
from letta.config import LettaConfig
from letta.schemas.agent import AgentState
from letta.schemas.api_key import APIKey
@ -28,6 +27,8 @@ from letta.schemas.enums import JobStatus
from letta.schemas.job import Job
from letta.schemas.llm_config import LLMConfig
from letta.schemas.memory import Memory
# from letta.schemas.message import Message, Passage, Record, RecordType, ToolCall
from letta.schemas.openai.chat_completions import ToolCall, ToolCallFunction
from letta.schemas.organization import Organization
from letta.schemas.source import Source
@ -36,8 +37,6 @@ from letta.schemas.user import User
from letta.settings import settings
from letta.utils import enforce_types, get_utc_time, printd
Base = declarative_base()
class LLMConfigColumn(TypeDecorator):
"""Custom type for storing LLMConfig as JSON"""

View File

@ -14,8 +14,8 @@ import letta.constants as constants
import letta.server.utils as server_utils
import letta.system as system
from letta.agent import Agent, save_agent
from letta.agent_store.db import attach_base
from letta.agent_store.storage import StorageConnector, TableType
from letta.config import LettaConfig
from letta.credentials import LettaCredentials
from letta.data_sources.connectors import DataConnector, load_data
@ -41,7 +41,7 @@ from letta.interface import AgentInterface # abstract
from letta.interface import CLIInterface # for printing to terminal
from letta.log import get_logger
from letta.memory import get_memory_functions
from letta.metadata import MetadataStore
from letta.metadata import Base, MetadataStore
from letta.prompts import gpt_system
from letta.providers import (
AnthropicProvider,
@ -150,23 +150,11 @@ class Server(object):
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.orm import sessionmaker
from letta.agent_store.db import MessageModel, PassageModel
from letta.config import LettaConfig
# NOTE: hack to see if single session management works
from letta.metadata import (
AgentModel,
AgentSourceMappingModel,
APIKeyModel,
BlockModel,
JobModel,
OrganizationModel,
SourceModel,
ToolModel,
UserModel,
)
from letta.settings import model_settings, settings
config = LettaConfig.load()
@ -183,24 +171,12 @@ else:
# TODO: don't rely on config storage
engine = create_engine("sqlite:///" + os.path.join(config.recall_storage_path, "sqlite.db"))
Base = declarative_base()
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base.metadata.create_all(
engine,
tables=[
UserModel.__table__,
AgentModel.__table__,
SourceModel.__table__,
AgentSourceMappingModel.__table__,
APIKeyModel.__table__,
BlockModel.__table__,
ToolModel.__table__,
JobModel.__table__,
PassageModel.__table__,
MessageModel.__table__,
OrganizationModel.__table__,
],
)
attach_base()
Base.metadata.create_all(bind=engine)
# Dependency

12
poetry.lock generated
View File

@ -139,13 +139,13 @@ frozenlist = ">=1.1.0"
[[package]]
name = "alembic"
version = "1.13.2"
version = "1.13.3"
description = "A database migration tool for SQLAlchemy."
optional = true
optional = false
python-versions = ">=3.8"
files = [
{file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"},
{file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"},
{file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
{file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
]
[package.dependencies]
@ -3814,7 +3814,7 @@ Werkzeug = ">=2.0.0"
name = "mako"
version = "1.3.5"
description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
optional = true
optional = false
python-versions = ">=3.8"
files = [
{file = "Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a"},
@ -8354,4 +8354,4 @@ tests = ["wikipedia"]
[metadata]
lock-version = "2.0"
python-versions = "<3.13,>=3.10"
content-hash = "aa0bbf5825741bdc9c06388e7e27c1d9a2d85d517abb7f51cca71cc8349d1170"
content-hash = "2302d430ae353f5453bbf4223e9e00be38fcca45259de2924b38b14e36ab8024"

View File

@ -76,6 +76,7 @@ langchain = {version = "^0.2.16", optional = true}
langchain-community = {version = "^0.2.17", optional = true}
composio-langchain = "^0.5.28"
composio-core = "^0.5.28"
alembic = "^1.13.3"
[tool.poetry.extras]
#local = ["llama-index-embeddings-huggingface"]

View File

@ -385,7 +385,7 @@ def test_sources(client: Union[LocalClient, RESTClient], agent: AgentState):
# list archival memory
archival_memories = client.get_archival_memory(agent_id=agent.id)
# print(archival_memories)
assert len(archival_memories) == created_passages
assert len(archival_memories) == created_passages, f"Mismatched length {len(archival_memories)} vs. {created_passages}"
# check number of passages
sources = client.list_sources()

View File

@ -405,70 +405,3 @@ def test_tool_creation_langchain_missing_imports(client):
# Intentionally missing {"langchain_community.utilities": "WikipediaAPIWrapper"}
with pytest.raises(RuntimeError):
Tool.from_langchain(langchain_tool)
def test_sources(client, agent):
# list sources (empty)
sources = client.list_sources()
assert len(sources) == 0
# create a source
test_source_name = "test_source"
source = client.create_source(name=test_source_name)
# list sources
sources = client.list_sources()
assert len(sources) == 1
assert sources[0].metadata_["num_passages"] == 0
assert sources[0].metadata_["num_documents"] == 0
# update the source
original_id = source.id
original_name = source.name
new_name = original_name + "_new"
client.update_source(source_id=source.id, name=new_name)
# get the source name (check that it's been updated)
source = client.get_source(source_id=source.id)
assert source.name == new_name
assert source.id == original_id
# get the source id (make sure that it's the same)
assert str(original_id) == client.get_source_id(source_name=new_name)
# check agent archival memory size
archival_memories = client.get_archival_memory(agent_id=agent.id)
print(archival_memories)
assert len(archival_memories) == 0
# load a file into a source
filename = "CONTRIBUTING.md"
upload_job = client.load_file_into_source(filename=filename, source_id=source.id)
print("Upload job", upload_job, upload_job.status, upload_job.metadata_)
# TODO: make sure things run in the right order
archival_memories = client.get_archival_memory(agent_id=agent.id)
assert len(archival_memories) == 0
# attach a source
client.attach_source_to_agent(source_id=source.id, agent_id=agent.id)
# list archival memory
archival_memories = client.get_archival_memory(agent_id=agent.id)
# print(archival_memories)
assert len(archival_memories) == 20 or len(archival_memories) == 21
# check number of passages
sources = client.list_sources()
# TODO: do we want to add this metadata back?
# assert sources[0].metadata_["num_passages"] > 0
# assert sources[0].metadata_["num_documents"] == 0 # TODO: fix this once document store added
print(sources)
# detach the source
# TODO: add when implemented
# client.detach_source(source.name, agent.id)
# delete the source
client.delete_source(source.id)