MemGPT/memgpt/migrate.py
2024-08-16 19:52:47 -07:00

717 lines
31 KiB
Python

import configparser
import glob
import json
import os
import pickle
import shutil
import sys
import traceback
import uuid
from datetime import datetime
from typing import List, Optional
import pytz
import questionary
import typer
from tqdm import tqdm
from memgpt.agent import Agent, save_agent
from memgpt.agent_store.storage import StorageConnector, TableType
from memgpt.cli.cli_config import configure
from memgpt.config import MemGPTConfig
from memgpt.data_types import AgentState, Message, Passage, Source, User
from memgpt.metadata import MetadataStore
from memgpt.persistence_manager import LocalStateManager
from memgpt.utils import (
MEMGPT_DIR,
OpenAIBackcompatUnpickler,
annotate_message_json_list_with_tool_calls,
get_utc_time,
parse_formatted_time,
version_less_than,
)
# This is the version where the breaking change was made
VERSION_CUTOFF = "0.2.12"
# Migration backup dir (where we'll dump old agents that we successfully migrated)
MIGRATION_BACKUP_FOLDER = "migration_backups"
def wipe_config_and_reconfigure(data_dir: str = MEMGPT_DIR, run_configure=True, create_config=True):
"""Wipe (backup) the config file, and launch `memgpt configure`"""
if not os.path.exists(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER)):
os.makedirs(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER))
os.makedirs(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER, "agents"))
# Get the current timestamp in a readable format (e.g., YYYYMMDD_HHMMSS)
timestamp = get_utc_time().strftime("%Y%m%d_%H%M%S")
# Construct the new backup directory name with the timestamp
backup_filename = os.path.join(data_dir, MIGRATION_BACKUP_FOLDER, f"config_backup_{timestamp}")
existing_filename = os.path.join(data_dir, "config")
# Check if the existing file exists before moving
if os.path.exists(existing_filename):
# shutil should work cross-platform
shutil.move(existing_filename, backup_filename)
typer.secho(f"Deleted config file ({existing_filename}) and saved as backup ({backup_filename})", fg=typer.colors.GREEN)
else:
typer.secho(f"Couldn't find an existing config file to delete", fg=typer.colors.RED)
if run_configure:
# Either run configure
configure()
elif create_config:
# Or create a new config with defaults
MemGPTConfig.load()
def config_is_compatible(data_dir: str = MEMGPT_DIR, allow_empty=False, echo=False) -> bool:
"""Check if the config is OK to use with 0.2.12, or if it needs to be deleted"""
# NOTE: don't use built-in load(), since that will apply defaults
# memgpt_config = MemGPTConfig.load()
memgpt_config_file = os.path.join(data_dir, "config")
if not os.path.exists(memgpt_config_file):
return True if allow_empty else False
parser = configparser.ConfigParser()
parser.read(memgpt_config_file)
if "version" in parser and "memgpt_version" in parser["version"]:
version = parser["version"]["memgpt_version"]
else:
version = None
if version is None:
# no version -- assume pre-determined config (does not need to be migrated)
return True
elif version_less_than(version, VERSION_CUTOFF):
if echo:
typer.secho(f"Current config version ({version}) is older than migration cutoff ({VERSION_CUTOFF})", fg=typer.colors.RED)
return False
else:
if echo:
typer.secho(f"Current config version {version} is compatible!", fg=typer.colors.GREEN)
return True
def agent_is_migrateable(agent_name: str, data_dir: str = MEMGPT_DIR) -> bool:
"""Determine whether or not the agent folder is a migration target"""
agent_folder = os.path.join(data_dir, "agents", agent_name)
if not os.path.exists(agent_folder):
raise ValueError(f"Folder {agent_folder} does not exist")
agent_config_file = os.path.join(agent_folder, "config.json")
if not os.path.exists(agent_config_file):
raise ValueError(f"Agent folder {agent_folder} does not have a config file")
try:
with open(agent_config_file, "r", encoding="utf-8") as fh:
agent_config = json.load(fh)
except Exception as e:
raise ValueError(f"Failed to load agent config file ({agent_config_file}), error = {e}")
if not hasattr(agent_config, "memgpt_version") or version_less_than(agent_config.memgpt_version, VERSION_CUTOFF):
return True
else:
return False
def migrate_source(source_name: str, data_dir: str = MEMGPT_DIR, ms: Optional[MetadataStore] = None):
"""
Migrate an old source folder (`~/.memgpt/sources/{source_name}`).
"""
# 1. Load the VectorIndex from ~/.memgpt/sources/{source_name}/index
# TODO
source_path = os.path.join(data_dir, "archival", source_name, "nodes.pkl")
assert os.path.exists(source_path), f"Source {source_name} does not exist at {source_path}"
# load state from old checkpoint file
# 2. Create a new AgentState using the agent config + agent internal state
config = MemGPTConfig.load()
if ms is None:
ms = MetadataStore(config)
# gets default user
user_id = uuid.UUID(config.anon_clientid)
user = ms.get_user(user_id=user_id)
if user is None:
ms.create_user(User(id=user_id))
user = ms.get_user(user_id=user_id)
if user is None:
typer.secho(f"Failed to create default user in database.", fg=typer.colors.RED)
sys.exit(1)
# raise ValueError(
# f"Failed to load user {str(user_id)} from database. Please make sure to migrate your config before migrating agents."
# )
# insert source into metadata store
source = Source(user_id=user.id, name=source_name)
ms.create_source(source)
try:
try:
nodes = pickle.load(open(source_path, "rb"))
except ModuleNotFoundError as e:
if "No module named 'llama_index.schema'" in str(e):
# cannot load source at all, so throw error
raise ValueError(
"Failed to load archival memory due thanks to llama_index's breaking changes. Please downgrade to MemGPT version 0.3.3 or earlier to migrate this agent."
)
else:
raise e
passages = []
for node in nodes:
# print(len(node.embedding))
# TODO: make sure embedding config matches embedding size?
if len(node.embedding) != config.default_embedding_config.embedding_dim:
raise ValueError(
f"Cannot migrate source {source_name} due to incompatible embedding dimentions. Please re-load this source with `memgpt load`."
)
passages.append(
Passage(
user_id=user.id,
data_source=source_name,
text=node.text,
embedding=node.embedding,
embedding_dim=config.default_embedding_config.embedding_dim,
embedding_model=config.default_embedding_config.embedding_model,
)
)
assert len(passages) > 0, f"Source {source_name} has no passages"
conn = StorageConnector.get_storage_connector(TableType.PASSAGES, config=config, user_id=user_id)
conn.insert_many(passages)
# print(f"Inserted {len(passages)} to {source_name}")
except Exception as e:
# delete from metadata store
ms.delete_source(source.id)
raise ValueError(f"Failed to migrate {source_name}: {str(e)}")
# basic checks
source = ms.get_source(user_id=user.id, source_name=source_name)
assert source is not None, f"Failed to load source {source_name} from database after migration"
def migrate_agent(agent_name: str, data_dir: str = MEMGPT_DIR, ms: Optional[MetadataStore] = None) -> List[str]:
"""Migrate an old agent folder (`~/.memgpt/agents/{agent_name}`)
Steps:
1. Load the agent state JSON from the old folder
2. Create a new AgentState using the agent config + agent internal state
3. Instantiate a new Agent by passing AgentState to Agent.__init__
(This will automatically run into a new database)
If success, returns empty list
If warning, returns a list of strings (warning message)
If error, raises an Exception
"""
warnings = []
# 1. Load the agent state JSON from the old folder
# TODO
agent_folder = os.path.join(data_dir, "agents", agent_name)
# migration_file = os.path.join(agent_folder, MIGRATION_FILE_NAME)
# load state from old checkpoint file
agent_ckpt_directory = os.path.join(agent_folder, "agent_state")
json_files = glob.glob(os.path.join(agent_ckpt_directory, "*.json")) # This will list all .json files in the current directory.
if not json_files:
raise ValueError(f"Cannot load {agent_name} - no saved checkpoints found in {agent_ckpt_directory}")
# NOTE this is a soft fail, just allow it to pass
# return
# return [f"Cannot load {agent_name} - no saved checkpoints found in {agent_ckpt_directory}"]
# Sort files based on modified timestamp, with the latest file being the first.
state_filename = max(json_files, key=os.path.getmtime)
state_dict = json.load(open(state_filename, "r"))
# print(state_dict.keys())
# print(state_dict["memory"])
# dict_keys(['model', 'system', 'functions', 'messages', 'messages_total', 'memory'])
# load old data from the persistence manager
persistence_filename = os.path.basename(state_filename).replace(".json", ".persistence.pickle")
persistence_filename = os.path.join(agent_folder, "persistence_manager", persistence_filename)
archival_filename = os.path.join(agent_folder, "persistence_manager", "index", "nodes.pkl")
if not os.path.exists(persistence_filename):
raise ValueError(f"Cannot load {agent_name} - no saved persistence pickle found at {persistence_filename}")
# return [f"Cannot load {agent_name} - no saved persistence pickle found at {persistence_filename}"]
try:
with open(persistence_filename, "rb") as f:
data = pickle.load(f)
except ModuleNotFoundError:
# Patch for stripped openai package
# ModuleNotFoundError: No module named 'openai.openai_object'
with open(persistence_filename, "rb") as f:
unpickler = OpenAIBackcompatUnpickler(f)
data = unpickler.load()
from memgpt.openai_backcompat.openai_object import OpenAIObject
def convert_openai_objects_to_dict(obj):
if isinstance(obj, OpenAIObject):
# Convert to dict or handle as needed
# print(f"detected OpenAIObject on {obj}")
return obj.to_dict_recursive()
elif isinstance(obj, dict):
return {k: convert_openai_objects_to_dict(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_openai_objects_to_dict(v) for v in obj]
else:
return obj
data = convert_openai_objects_to_dict(data)
# data will contain:
# print("data.keys()", data.keys())
# manager.all_messages = data["all_messages"]
# manager.messages = data["messages"]
# manager.recall_memory = data["recall_memory"]
agent_config_filename = os.path.join(agent_folder, "config.json")
with open(agent_config_filename, "r", encoding="utf-8") as fh:
agent_config = json.load(fh)
# 2. Create a new AgentState using the agent config + agent internal state
config = MemGPTConfig.load()
if ms is None:
ms = MetadataStore(config)
# gets default user
user_id = uuid.UUID(config.anon_clientid)
user = ms.get_user(user_id=user_id)
if user is None:
ms.create_user(User(id=user_id))
user = ms.get_user(user_id=user_id)
if user is None:
typer.secho(f"Failed to create default user in database.", fg=typer.colors.RED)
sys.exit(1)
# raise ValueError(
# f"Failed to load user {str(user_id)} from database. Please make sure to migrate your config before migrating agents."
# )
# ms.create_user(User(id=user_id))
# user = ms.get_user(user_id=user_id)
# if user is None:
# typer.secho(f"Failed to create default user in database.", fg=typer.colors.RED)
# sys.exit(1)
# create an agent_id ahead of time
agent_id = uuid.uuid4()
# create all the Messages in the database
# message_objs = []
# for message_dict in annotate_message_json_list_with_tool_calls(state_dict["messages"]):
# message_obj = Message.dict_to_message(
# user_id=user.id,
# agent_id=agent_id,
# openai_message_dict=message_dict,
# model=state_dict["model"] if "model" in state_dict else None,
# # allow_functions_style=False,
# allow_functions_style=True,
# )
# message_objs.append(message_obj)
agent_state = AgentState(
id=agent_id,
name=agent_config["name"],
user_id=user.id,
# persona_name=agent_config["persona"], # eg 'sam_pov'
# human_name=agent_config["human"], # eg 'basic'
persona=state_dict["memory"]["persona"], # NOTE: hacky (not init, but latest)
human=state_dict["memory"]["human"], # NOTE: hacky (not init, but latest)
preset=agent_config["preset"], # eg 'memgpt_chat'
state=dict(
human=state_dict["memory"]["human"],
persona=state_dict["memory"]["persona"],
system=state_dict["system"],
functions=state_dict["functions"], # this shouldn't matter, since Agent.__init__ will re-link
# messages=[str(m.id) for m in message_objs], # this is a list of uuids, not message dicts
),
llm_config=config.default_llm_config,
embedding_config=config.default_embedding_config,
)
persistence_manager = LocalStateManager(agent_state=agent_state)
# First clean up the recall message history to add tool call ids
# allow_tool_roles in case some of the old messages were actually already in tool call format (for whatever reason)
full_message_history_buffer = annotate_message_json_list_with_tool_calls(
[d["message"] for d in data["all_messages"]], allow_tool_roles=True
)
for i in range(len(data["all_messages"])):
data["all_messages"][i]["message"] = full_message_history_buffer[i]
# Figure out what messages in recall are in-context, and which are out-of-context
agent_message_cache = state_dict["messages"]
recall_message_full = data["all_messages"]
def messages_are_equal(msg1, msg2):
if msg1["role"] != msg2["role"]:
return False
if msg1["content"] != msg2["content"]:
return False
if "function_call" in msg1 and "function_call" in msg2 and msg1["function_call"] != msg2["function_call"]:
return False
if "name" in msg1 and "name" in msg2 and msg1["name"] != msg2["name"]:
return False
# otherwise checks pass, ~= equal
return True
in_context_messages = []
out_of_context_messages = []
assert len(agent_message_cache) <= len(recall_message_full), (len(agent_message_cache), len(recall_message_full))
for i, d in enumerate(recall_message_full):
# unpack into "timestamp" and "message"
recall_message = d["message"]
recall_timestamp = str(d["timestamp"])
try:
recall_datetime = parse_formatted_time(recall_timestamp.strip()).astimezone(pytz.utc)
except ValueError:
recall_datetime = datetime.strptime(recall_timestamp.strip(), "%Y-%m-%d %I:%M:%S %p").astimezone(pytz.utc)
# message object
message_obj = Message.dict_to_message(
created_at=recall_datetime,
user_id=user.id,
agent_id=agent_id,
openai_message_dict=recall_message,
allow_functions_style=True,
)
# message is either in-context, or out-of-context
if i >= (len(recall_message_full) - len(agent_message_cache)):
# there are len(agent_message_cache) total messages on the agent
# this will correspond to the last N messages in the recall memory (though possibly out-of-order)
message_is_in_context = [messages_are_equal(recall_message, cache_message) for cache_message in agent_message_cache]
# assert sum(message_is_in_context) <= 1, message_is_in_context
# if any(message_is_in_context):
# in_context_messages.append(message_obj)
# else:
# out_of_context_messages.append(message_obj)
if not any(message_is_in_context):
# typer.secho(
# f"Warning: didn't find late buffer recall message (i={i}/{len(recall_message_full)-1}) inside agent context\n{recall_message}",
# fg=typer.colors.RED,
# )
warnings.append(
f"Didn't find late buffer recall message (i={i}/{len(recall_message_full)-1}) inside agent context\n{recall_message}"
)
out_of_context_messages.append(message_obj)
else:
if sum(message_is_in_context) > 1:
# typer.secho(
# f"Warning: found multiple occurences of recall message (i={i}/{len(recall_message_full)-1}) inside agent context\n{recall_message}",
# fg=typer.colors.RED,
# )
warnings.append(
f"Found multiple occurences of recall message (i={i}/{len(recall_message_full)-1}) inside agent context\n{recall_message}"
)
in_context_messages.append(message_obj)
else:
# if we're not in the final portion of the recall memory buffer, then it's 100% out-of-context
out_of_context_messages.append(message_obj)
assert len(in_context_messages) > 0, f"Couldn't find any in-context messages (agent_cache = {len(agent_message_cache)})"
# assert len(in_context_messages) == len(agent_message_cache), (len(in_context_messages), len(agent_message_cache))
if len(in_context_messages) != len(agent_message_cache):
# typer.secho(
# f"Warning: uneven match of new in-context messages vs loaded cache ({len(in_context_messages)} != {len(agent_message_cache)})",
# fg=typer.colors.RED,
# )
warnings.append(
f"Uneven match of new in-context messages vs loaded cache ({len(in_context_messages)} != {len(agent_message_cache)})"
)
# assert (
# len(in_context_messages) + len(out_of_context_messages) == state_dict["messages_total"]
# ), f"{len(in_context_messages)} + {len(out_of_context_messages)} != {state_dict['messages_total']}"
# Now we can insert the messages into the actual recall database
# So when we construct the agent from the state, they will be available
persistence_manager.recall_memory.insert_many(out_of_context_messages)
persistence_manager.recall_memory.insert_many(in_context_messages)
# Overwrite the agent_state message object
agent_state.state["messages"] = [str(m.id) for m in in_context_messages] # this is a list of uuids, not message dicts
## 4. Insert into recall
# TODO should this be 'messages', or 'all_messages'?
# all_messages in recall will have fields "timestamp" and "message"
# full_message_history_buffer = annotate_message_json_list_with_tool_calls([d["message"] for d in data["all_messages"]])
# We want to keep the timestamp
# for i in range(len(data["all_messages"])):
# data["all_messages"][i]["message"] = full_message_history_buffer[i]
# messages_to_insert = [
# Message.dict_to_message(
# user_id=user.id,
# agent_id=agent_id,
# openai_message_dict=msg,
# allow_functions_style=True,
# )
# # for msg in data["all_messages"]
# for msg in full_message_history_buffer
# ]
# agent.persistence_manager.recall_memory.insert_many(messages_to_insert)
# print("Finished migrating recall memory")
# 3. Instantiate a new Agent by passing AgentState to Agent.__init__
# NOTE: the Agent.__init__ will trigger a save, which will write to the DB
try:
agent = Agent(
agent_state=agent_state,
# messages_total=state_dict["messages_total"], # TODO: do we need this?
messages_total=len(in_context_messages) + len(out_of_context_messages),
interface=None,
)
save_agent(agent, ms=ms)
except Exception:
# if "Agent with name" in str(e):
# print(e)
# return
# elif "was specified in agent.state.functions":
# print(e)
# return
# else:
# raise
raise
# Wrap the rest in a try-except so that we can cleanup by deleting the agent if we fail
try:
# TODO should we also assign data["messages"] to RecallMemory.messages?
# 5. Insert into archival
if os.path.exists(archival_filename):
try:
nodes = pickle.load(open(archival_filename, "rb"))
except ModuleNotFoundError as e:
if "No module named 'llama_index.schema'" in str(e):
print(
"Failed to load archival memory due thanks to llama_index's breaking changes. Please downgrade to MemGPT version 0.3.3 or earlier to migrate this agent."
)
nodes = []
else:
raise e
passages = []
failed_inserts = []
for node in nodes:
if len(node.embedding) != config.default_embedding_config.embedding_dim:
# raise ValueError(f"Cannot migrate agent {agent_state.name} due to incompatible embedding dimentions.")
# raise ValueError(f"Cannot migrate agent {agent_state.name} due to incompatible embedding dimentions.")
failed_inserts.append(
f"Cannot migrate passage due to incompatible embedding dimentions ({len(node.embedding)} != {config.default_embedding_config.embedding_dim}) - content = '{node.text}'."
)
passages.append(
Passage(
user_id=user.id,
agent_id=agent_state.id,
text=node.text,
embedding=node.embedding,
embedding_dim=agent_state.embedding_config.embedding_dim,
embedding_model=agent_state.embedding_config.embedding_model,
)
)
if len(passages) > 0:
agent.persistence_manager.archival_memory.storage.insert_many(passages)
# print(f"Inserted {len(passages)} passages into archival memory")
if len(failed_inserts) > 0:
warnings.append(
f"Failed to transfer {len(failed_inserts)}/{len(nodes)} passages from old archival memory: " + ", ".join(failed_inserts)
)
else:
warnings.append("No archival memory found at", archival_filename)
except:
ms.delete_agent(agent_state.id)
raise
try:
new_agent_folder = os.path.join(data_dir, MIGRATION_BACKUP_FOLDER, "agents", agent_name)
shutil.move(agent_folder, new_agent_folder)
except Exception:
print(f"Failed to move agent folder from {agent_folder} to {new_agent_folder}")
raise
return warnings
# def migrate_all_agents(stop_on_fail=True):
def migrate_all_agents(data_dir: str = MEMGPT_DIR, stop_on_fail: bool = False, debug: bool = False) -> dict:
"""Scan over all agent folders in data_dir and migrate each agent."""
if not os.path.exists(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER)):
os.makedirs(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER))
os.makedirs(os.path.join(data_dir, MIGRATION_BACKUP_FOLDER, "agents"))
if not config_is_compatible(data_dir, echo=True):
typer.secho(f"Your current config file is incompatible with MemGPT versions >= {VERSION_CUTOFF}", fg=typer.colors.RED)
if questionary.confirm(
"To migrate old MemGPT agents, you must delete your config file and run `memgpt configure`. Would you like to proceed?"
).ask():
try:
wipe_config_and_reconfigure(data_dir)
except Exception as e:
typer.secho(f"Fresh config generation failed - error:\n{e}", fg=typer.colors.RED)
raise
else:
typer.secho("Migration cancelled (to migrate old agents, run `memgpt migrate`)", fg=typer.colors.RED)
raise KeyboardInterrupt()
agents_dir = os.path.join(data_dir, "agents")
# Ensure the directory exists
if not os.path.exists(agents_dir):
raise ValueError(f"Directory {agents_dir} does not exist.")
# Get a list of all folders in agents_dir
agent_folders = [f for f in os.listdir(agents_dir) if os.path.isdir(os.path.join(agents_dir, f))]
# Iterate over each folder with a tqdm progress bar
count = 0
successes = [] # agents that migrated w/o warnings
warnings = [] # agents that migrated but had warnings
failures = [] # agents that failed to migrate (fatal error)
candidates = []
config = MemGPTConfig.load()
print(config)
ms = MetadataStore(config)
try:
for agent_name in tqdm(agent_folders, desc="Migrating agents"):
# Assuming migrate_agent is a function that takes the agent name and performs migration
try:
if agent_is_migrateable(agent_name=agent_name, data_dir=data_dir):
candidates.append(agent_name)
migration_warnings = migrate_agent(agent_name, data_dir=data_dir, ms=ms)
if len(migration_warnings) == 0:
successes.append(agent_name)
else:
warnings.append((agent_name, migration_warnings))
count += 1
else:
continue
except Exception as e:
failures.append({"name": agent_name, "reason": str(e)})
# typer.secho(f"Migrating {agent_name} failed with: {str(e)}", fg=typer.colors.RED)
if debug:
traceback.print_exc()
if stop_on_fail:
raise
except KeyboardInterrupt:
typer.secho(f"User cancelled operation", fg=typer.colors.RED)
if len(candidates) == 0:
typer.secho(f"No migration candidates found ({len(agent_folders)} agent folders total)", fg=typer.colors.GREEN)
else:
typer.secho(f"Inspected {len(agent_folders)} agent folders for migration")
if len(warnings) > 0:
typer.secho(f"Migration warnings:", fg=typer.colors.BRIGHT_YELLOW)
for warn in warnings:
typer.secho(f"{warn[0]}: {warn[1]}", fg=typer.colors.BRIGHT_YELLOW)
if len(failures) > 0:
typer.secho(f"Failed migrations:", fg=typer.colors.RED)
for fail in failures:
typer.secho(f"{fail['name']}: {fail['reason']}", fg=typer.colors.RED)
if len(failures) > 0:
typer.secho(
f"🔴 {len(failures)}/{len(candidates)} agents failed to migrate (see reasons above)",
fg=typer.colors.RED,
)
typer.secho(f"{[d['name'] for d in failures]}", fg=typer.colors.RED)
if len(warnings) > 0:
typer.secho(
f"🟠 {len(warnings)}/{len(candidates)} agents successfully migrated with warnings (see reasons above)",
fg=typer.colors.BRIGHT_YELLOW,
)
typer.secho(f"{[t[0] for t in warnings]}", fg=typer.colors.BRIGHT_YELLOW)
if len(successes) > 0:
typer.secho(
f"🟢 {len(successes)}/{len(candidates)} agents successfully migrated with no warnings",
fg=typer.colors.GREEN,
)
typer.secho(f"{successes}", fg=typer.colors.GREEN)
del ms
return {
"agent_folders": len(agent_folders),
"migration_candidates": candidates,
"successful_migrations": len(successes) + len(warnings),
"failed_migrations": failures,
"user_id": uuid.UUID(MemGPTConfig.load().anon_clientid),
}
def migrate_all_sources(data_dir: str = MEMGPT_DIR, stop_on_fail: bool = False, debug: bool = False) -> dict:
"""Scan over all agent folders in data_dir and migrate each agent."""
sources_dir = os.path.join(data_dir, "archival")
# Ensure the directory exists
if not os.path.exists(sources_dir):
raise ValueError(f"Directory {sources_dir} does not exist.")
# Get a list of all folders in agents_dir
source_folders = [f for f in os.listdir(sources_dir) if os.path.isdir(os.path.join(sources_dir, f))]
# Iterate over each folder with a tqdm progress bar
count = 0
failures = []
candidates = []
config = MemGPTConfig.load()
ms = MetadataStore(config)
try:
for source_name in tqdm(source_folders, desc="Migrating data sources"):
# Assuming migrate_agent is a function that takes the agent name and performs migration
try:
candidates.append(source_name)
migrate_source(source_name, data_dir, ms=ms)
count += 1
except Exception as e:
failures.append({"name": source_name, "reason": str(e)})
if debug:
traceback.print_exc()
if stop_on_fail:
raise
# typer.secho(f"Migrating {agent_name} failed with: {str(e)}", fg=typer.colors.RED)
except KeyboardInterrupt:
typer.secho(f"User cancelled operation", fg=typer.colors.RED)
if len(candidates) == 0:
typer.secho(f"No migration candidates found ({len(source_folders)} source folders total)", fg=typer.colors.GREEN)
else:
typer.secho(f"Inspected {len(source_folders)} source folders")
if len(failures) > 0:
typer.secho(f"Failed migrations:", fg=typer.colors.RED)
for fail in failures:
typer.secho(f"{fail['name']}: {fail['reason']}", fg=typer.colors.RED)
typer.secho(f"{len(failures)}/{len(candidates)} migration targets failed (see reasons above)", fg=typer.colors.RED)
if count > 0:
typer.secho(
f"{count}/{len(candidates)} sources were successfully migrated to the new database format", fg=typer.colors.GREEN
)
del ms
return {
"source_folders": len(source_folders),
"migration_candidates": candidates,
"successful_migrations": count,
"failed_migrations": failures,
"user_id": uuid.UUID(MemGPTConfig.load().anon_clientid),
}