feat: archival memory file upload (#2228)

Co-authored-by: Mindy Long <mindy@letta.com>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
This commit is contained in:
mlong93 2024-12-12 11:36:26 -08:00 committed by GitHub
parent 6970a01cbc
commit 95fc4e7521
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 400 additions and 16 deletions

View File

@ -1371,10 +1371,9 @@ class Agent(BaseAgent):
# TODO: recall memory
raise NotImplementedError()
def attach_source(self, user: PydanticUser, source_id: str, source_manager: SourceManager, ms: MetadataStore):
def attach_source(self, user: PydanticUser, source_id: str, source_manager: SourceManager, ms: MetadataStore, page_size: Optional[int] = None):
"""Attach data with name `source_name` to the agent from source_connector."""
# TODO: eventually, adding a data source should just give access to the retriever the source table, rather than modifying archival memory
page_size = 100
passages = self.passage_manager.list_passages(actor=user, source_id=source_id, limit=page_size)
for passage in passages:

View File

@ -1,4 +1,4 @@
from typing import Dict, Iterator, List, Tuple, Optional
from typing import Dict, Iterator, List, Tuple
import typer
@ -13,8 +13,6 @@ from letta.schemas.passage import Passage
from letta.schemas.source import Source
from letta.services.passage_manager import PassageManager
from letta.services.source_manager import SourceManager
from letta.utils import create_uuid_from_string
class DataConnector:
"""
@ -42,7 +40,7 @@ class DataConnector:
"""
def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User", agent_id: Optional[str] = None):
def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User"):
"""Load data from a connector (generates file and passages) into a specified source_id, associated with a user_id."""
embedding_config = source.embedding_config
@ -79,10 +77,8 @@ def load_data(connector: DataConnector, source: Source, passage_manager: Passage
continue
passage = Passage(
id=create_uuid_from_string(f"{str(source.id)}_{passage_text}"),
text=passage_text,
file_id=file_metadata.id,
agent_id=agent_id,
source_id=source.id,
metadata_=passage_metadata,
organization_id=source.organization_id,

View File

@ -1584,6 +1584,8 @@ class SyncServer(Server):
from letta.data_sources.connectors import DirectoryConnector
source = self.source_manager.get_source_by_id(source_id=source_id)
if source is None:
raise ValueError(f"Source {source_id} does not exist")
connector = DirectoryConnector(input_files=[file_path])
num_passages, num_documents = self.load_data(user_id=source.created_by_id, source_name=source.name, connector=connector)
@ -1593,6 +1595,15 @@ class SyncServer(Server):
job.metadata_["num_documents"] = num_documents
self.job_manager.update_job_by_id(job_id=job_id, job_update=JobUpdate(**job.model_dump()), actor=actor)
# update all agents who have this source attached
agent_ids = self.ms.list_attached_agents(source_id=source_id)
for agent_id in agent_ids:
agent = self.load_agent(agent_id=agent_id)
curr_passage_size = self.passage_manager.size(actor=actor, agent_id=agent_id, source_id=source_id)
agent.attach_source(user=actor, source_id=source_id, source_manager=self.source_manager, ms=self.ms)
new_passage_size = self.passage_manager.size(actor=actor, agent_id=agent_id, source_id=source_id)
assert new_passage_size >= curr_passage_size # in case empty files are added
return job
def load_data(
@ -1600,7 +1611,6 @@ class SyncServer(Server):
user_id: str,
connector: DataConnector,
source_name: str,
agent_id: Optional[str] = None,
) -> Tuple[int, int]:
"""Load data from a DataConnector into a source for a specified user_id"""
# TODO: this should be implemented as a batch job or at least async, since it may take a long time
@ -1612,9 +1622,7 @@ class SyncServer(Server):
raise ValueError(f"Data source {source_name} does not exist for user {user_id}")
# load data into the document store
passage_count, document_count = load_data(
connector, source, self.passage_manager, self.source_manager, actor=user, agent_id=agent_id
)
passage_count, document_count = load_data(connector, source, self.passage_manager, self.source_manager, actor=user)
return passage_count, document_count
def attach_source_to_agent(

View File

@ -25,13 +25,246 @@ utils.DEBUG = True
from letta.config import LettaConfig
from letta.schemas.agent import CreateAgent
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.job import Job as PydanticJob
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message
from letta.schemas.source import Source
from letta.schemas.source import Source as PydanticSource
from letta.server.server import SyncServer
from .utils import DummyDataConnector
WAR_AND_PEACE = """BOOK ONE: 1805
CHAPTER I
Well, Prince, so Genoa and Lucca are now just family estates of the
Buonapartes. But I warn you, if you don't tell me that this means war,
if you still try to defend the infamies and horrors perpetrated by that
AntichristI really believe he is AntichristI will have nothing
more to do with you and you are no longer my friend, no longer my
'faithful slave,' as you call yourself! But how do you do? I see I
have frightened yousit down and tell me all the news.
It was in July, 1805, and the speaker was the well-known Anna Pávlovna
Schérer, maid of honor and favorite of the Empress Márya Fëdorovna.
With these words she greeted Prince Vasíli Kurágin, a man of high
rank and importance, who was the first to arrive at her reception. Anna
Pávlovna had had a cough for some days. She was, as she said, suffering
from la grippe; grippe being then a new word in St. Petersburg, used
only by the elite.
All her invitations without exception, written in French, and delivered
by a scarlet-liveried footman that morning, ran as follows:
If you have nothing better to do, Count (or Prince), and if the
prospect of spending an evening with a poor invalid is not too terrible,
I shall be very charmed to see you tonight between 7 and 10Annette
Schérer.
Heavens! what a virulent attack! replied the prince, not in the
least disconcerted by this reception. He had just entered, wearing an
embroidered court uniform, knee breeches, and shoes, and had stars on
his breast and a serene expression on his flat face. He spoke in that
refined French in which our grandfathers not only spoke but thought, and
with the gentle, patronizing intonation natural to a man of importance
who had grown old in society and at court. He went up to Anna Pávlovna,
kissed her hand, presenting to her his bald, scented, and shining head,
and complacently seated himself on the sofa.
First of all, dear friend, tell me how you are. Set your friend's
mind at rest, said he without altering his tone, beneath the
politeness and affected sympathy of which indifference and even irony
could be discerned.
Can one be well while suffering morally? Can one be calm in times
like these if one has any feeling? said Anna Pávlovna. You are
staying the whole evening, I hope?
And the fete at the English ambassador's? Today is Wednesday. I
must put in an appearance there, said the prince. My daughter is
coming for me to take me there.
I thought today's fete had been canceled. I confess all these
festivities and fireworks are becoming wearisome.
If they had known that you wished it, the entertainment would have
been put off, said the prince, who, like a wound-up clock, by force
of habit said things he did not even wish to be believed.
Don't tease! Well, and what has been decided about Novosíltsev's
dispatch? You know everything.
What can one say about it? replied the prince in a cold, listless
tone. What has been decided? They have decided that Buonaparte has
burnt his boats, and I believe that we are ready to burn ours.
Prince Vasíli always spoke languidly, like an actor repeating a stale
part. Anna Pávlovna Schérer on the contrary, despite her forty years,
overflowed with animation and impulsiveness. To be an enthusiast had
become her social vocation and, sometimes even when she did not
feel like it, she became enthusiastic in order not to disappoint the
expectations of those who knew her. The subdued smile which, though it
did not suit her faded features, always played round her lips expressed,
as in a spoiled child, a continual consciousness of her charming defect,
which she neither wished, nor could, nor considered it necessary, to
correct.
In the midst of a conversation on political matters Anna Pávlovna burst
out:
Oh, don't speak to me of Austria. Perhaps I don't understand
things, but Austria never has wished, and does not wish, for war. She
is betraying us! Russia alone must save Europe. Our gracious sovereign
recognizes his high vocation and will be true to it. That is the one
thing I have faith in! Our good and wonderful sovereign has to perform
the noblest role on earth, and he is so virtuous and noble that God will
not forsake him. He will fulfill his vocation and crush the hydra of
revolution, which has become more terrible than ever in the person of
this murderer and villain! We alone must avenge the blood of the just
one.... Whom, I ask you, can we rely on?... England with her commercial
spirit will not and cannot understand the Emperor Alexander's
loftiness of soul. She has refused to evacuate Malta. She wanted to
find, and still seeks, some secret motive in our actions. What answer
did Novosíltsev get? None. The English have not understood and cannot
understand the self-abnegation of our Emperor who wants nothing for
himself, but only desires the good of mankind. And what have they
promised? Nothing! And what little they have promised they will not
perform! Prussia has always declared that Buonaparte is invincible, and
that all Europe is powerless before him.... And I don't believe a
word that Hardenburg says, or Haugwitz either. This famous Prussian
neutrality is just a trap. I have faith only in God and the lofty
destiny of our adored monarch. He will save Europe!
She suddenly paused, smiling at her own impetuosity.
I think, said the prince with a smile, that if you had been
sent instead of our dear Wintzingerode you would have captured the King
of Prussia's consent by assault. You are so eloquent. Will you give me
a cup of tea?
In a moment. À propos, she added, becoming calm again, I am
expecting two very interesting men tonight, le Vicomte de Mortemart, who
is connected with the Montmorencys through the Rohans, one of the best
French families. He is one of the genuine émigrés, the good ones. And
also the Abbé Morio. Do you know that profound thinker? He has been
received by the Emperor. Had you heard?
I shall be delighted to meet them, said the prince. But
tell me, he added with studied carelessness as if it had only just
occurred to him, though the question he was about to ask was the chief
motive of his visit, is it true that the Dowager Empress wants
Baron Funke to be appointed first secretary at Vienna? The baron by all
accounts is a poor creature.
Prince Vasíli wished to obtain this post for his son, but others were
trying through the Dowager Empress Márya Fëdorovna to secure it for
the baron.
Anna Pávlovna almost closed her eyes to indicate that neither she nor
anyone else had a right to criticize what the Empress desired or was
pleased with.
Baron Funke has been recommended to the Dowager Empress by her
sister, was all she said, in a dry and mournful tone.
As she named the Empress, Anna Pávlovna's face suddenly assumed an
expression of profound and sincere devotion and respect mingled with
sadness, and this occurred every time she mentioned her illustrious
patroness. She added that Her Majesty had deigned to show Baron Funke
beaucoup d'estime, and again her face clouded over with sadness.
The prince was silent and looked indifferent. But, with the womanly and
courtierlike quickness and tact habitual to her, Anna Pávlovna
wished both to rebuke him (for daring to speak as he had done of a man
recommended to the Empress) and at the same time to console him, so she
said:
Now about your family. Do you know that since your daughter came
out everyone has been enraptured by her? They say she is amazingly
beautiful.
The prince bowed to signify his respect and gratitude.
I often think, she continued after a short pause, drawing nearer
to the prince and smiling amiably at him as if to show that political
and social topics were ended and the time had come for intimate
conversationI often think how unfairly sometimes the joys of life
are distributed. Why has fate given you two such splendid children?
I don't speak of Anatole, your youngest. I don't like him, she
added in a tone admitting of no rejoinder and raising her eyebrows.
Two such charming children. And really you appreciate them less than
anyone, and so you don't deserve to have them.”
And she smiled her ecstatic smile.
I can't help it,” said the prince. “Lavater would have said I
lack the bump of paternity.
Don't joke; I mean to have a serious talk with you. Do you know
I am dissatisfied with your younger son? Between ourselves (and her
face assumed its melancholy expression), he was mentioned at Her
Majesty's and you were pitied....”
The prince answered nothing, but she looked at him significantly,
awaiting a reply. He frowned.
What would you have me do? he said at last. You know I did all
a father could for their education, and they have both turned out fools.
Hippolyte is at least a quiet fool, but Anatole is an active one. That
is the only difference between them. He said this smiling in a way
more natural and animated than usual, so that the wrinkles round
his mouth very clearly revealed something unexpectedly coarse and
unpleasant.
And why are children born to such men as you? If you were not a
father there would be nothing I could reproach you with, said Anna
Pávlovna, looking up pensively.
I am your faithful slave and to you alone I can confess that my
children are the bane of my life. It is the cross I have to bear. That
is how I explain it to myself. It can't be helped!”
He said no more, but expressed his resignation to cruel fate by a
gesture. Anna Pávlovna meditated.
Have you never thought of marrying your prodigal son Anatole? she
asked. They say old maids have a mania for matchmaking, and though I
don't feel that weakness in myself as yet, I know a little person who
is very unhappy with her father. She is a relation of yours, Princess
Mary Bolkónskaya.
Prince Vasíli did not reply, though, with the quickness of memory and
perception befitting a man of the world, he indicated by a movement of
the head that he was considering this information.
Do you know, he said at last, evidently unable to check the sad
current of his thoughts, that Anatole is costing me forty thousand
rubles a year? And, he went on after a pause, what will it be in
five years, if he goes on like this? Presently he added: That's
what we fathers have to put up with.... Is this princess of yours
rich?
Her father is very rich and stingy. He lives in the country. He is
the well-known Prince Bolkónski who had to retire from the army under
the late Emperor, and was nicknamed 'the King of Prussia.' He is
very clever but eccentric, and a bore. The poor girl is very unhappy.
She has a brother; I think you know him, he married Lise Meinen lately.
He is an aide-de-camp of Kutúzov's and will be here tonight.”
Listen, dear Annette, said the prince, suddenly taking Anna
Pávlovna's hand and for some reason drawing it downwards. “Arrange
that affair for me and I shall always be your most devoted slave-slafe
with an f, as a village elder of mine writes in his reports. She is rich
and of good family and that's all I want.”
And with the familiarity and easy grace peculiar to him, he raised the
maid of honor's hand to his lips, kissed it, and swung it to and fro
as he lay back in his armchair, looking in another direction.
Attendez, said Anna Pávlovna, reflecting, I'll speak to
Lise, young Bolkónski's wife, this very evening, and perhaps the
thing can be arranged. It shall be on your family's behalf that I'll
start my apprenticeship as old maid."""
@pytest.fixture(scope="module")
def server():
@ -87,6 +320,24 @@ def agent_id(server, user_id):
# cleanup
server.delete_agent(user_id, agent_state.id)
@pytest.fixture(scope="module")
def other_agent_id(server, user_id):
# create agent
agent_state = server.create_agent(
request=CreateAgent(
name="test_agent_other",
tools=BASE_TOOLS,
memory_blocks=[],
llm_config=LLMConfig.default_config("gpt-4"),
embedding_config=EmbeddingConfig.default_config(provider="openai"),
),
actor=server.get_user_or_default(user_id),
)
print(f"Created agent\n{agent_state}")
yield agent_state.id
# cleanup
server.delete_agent(user_id, agent_state.id)
def test_error_on_nonexistent_agent(server, user_id, agent_id):
try:
@ -121,7 +372,7 @@ def test_load_data(server, user_id, agent_id):
assert len(passages_before) == 0
source = server.source_manager.create_source(
Source(name="test_source", embedding_config=DEFAULT_EMBEDDING_CONFIG), actor=server.default_user
PydanticSource(name="test_source", embedding_config=EmbeddingConfig.default_config(provider="openai")), actor=server.default_user
)
# load data
@ -133,7 +384,7 @@ def test_load_data(server, user_id, agent_id):
"Shishir loves indian food",
]
connector = DummyDataConnector(archival_memories)
server.load_data(user_id, connector, source.name, agent_id=agent_id)
server.load_data(user_id, connector, source.name)
# @pytest.mark.order(3)
# def test_attach_source_to_agent(server, user_id, agent_id):
@ -168,7 +419,6 @@ def test_get_recall_memory(server, org_id, user_id, agent_id):
messages_1 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, limit=2)
cursor1 = messages_1[-1].id
messages_2 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, after=cursor1, limit=1000)
messages_2[-1].id
messages_3 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, limit=1000)
messages_3[-1].id
assert messages_3[-1].created_at >= messages_3[0].created_at
@ -691,3 +941,134 @@ def test_memory_rebuild_count(server, user_id, mock_e2b_api_key_none):
finally:
# cleanup
server.delete_agent(user_id, agent_state.id)
def test_load_file_to_source(server: SyncServer, user_id: str, agent_id: str, other_agent_id: str, tmp_path):
user = server.get_user_or_default(user_id)
# Create a source
source = server.source_manager.create_source(
PydanticSource(
name="timber_source",
embedding_config=EmbeddingConfig.default_config(provider="openai"),
created_by_id=user_id,
),
actor=user
)
# Create a test file with some content
test_file = tmp_path / "test.txt"
test_content = "We have a dog called Timber. He likes to sleep and eat chicken."
test_file.write_text(test_content)
# Attach source to agent first
agent = server.load_agent(agent_id=agent_id)
agent.attach_source(user=user, source_id=source.id, source_manager=server.source_manager, ms=server.ms)
# Get initial passage count
initial_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id)
assert initial_passage_count == 0
# Create a job for loading the first file
job = server.job_manager.create_job(
PydanticJob(
user_id=user_id,
metadata_={"type": "embedding", "filename": test_file.name, "source_id": source.id},
),
actor=user
)
# Load the first file to source
server.load_file_to_source(
source_id=source.id,
file_path=str(test_file),
job_id=job.id,
actor=user,
)
# Verify job completed successfully
job = server.job_manager.get_job_by_id(job_id=job.id, actor=user)
assert job.status == "completed"
assert job.metadata_["num_passages"] == 1
assert job.metadata_["num_documents"] == 1
# Verify passages were added
first_file_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id)
assert first_file_passage_count > initial_passage_count
# Create a second test file with different content
test_file2 = tmp_path / "test2.txt"
test_file2.write_text(WAR_AND_PEACE)
# Create a job for loading the second file
job2 = server.job_manager.create_job(
PydanticJob(
user_id=user_id,
metadata_={"type": "embedding", "filename": test_file2.name, "source_id": source.id},
),
actor=user
)
# Load the second file to source
server.load_file_to_source(
source_id=source.id,
file_path=str(test_file2),
job_id=job2.id,
actor=user,
)
# Verify second job completed successfully
job2 = server.job_manager.get_job_by_id(job_id=job2.id, actor=user)
assert job2.status == "completed"
assert job2.metadata_["num_passages"] >= 10
assert job2.metadata_["num_documents"] == 1
# Verify passages were appended (not replaced)
final_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id)
assert final_passage_count > first_file_passage_count
# Verify both old and new content is searchable
passages = server.passage_manager.list_passages(
actor=user,
agent_id=agent_id,
source_id=source.id,
query_text="what does Timber like to eat",
embedding_config=EmbeddingConfig.default_config(provider="openai"),
embed_query=True,
)
assert len(passages) == final_passage_count
assert any("chicken" in passage.text.lower() for passage in passages)
assert any("Anna".lower() in passage.text.lower() for passage in passages)
# TODO: Add this test back in after separation of `Passage tables` (LET-449)
# # Load second agent
# agent2 = server.load_agent(agent_id=other_agent_id)
# # Initially should have no passages
# initial_agent2_passages = server.passage_manager.size(actor=user, agent_id=other_agent_id, source_id=source.id)
# assert initial_agent2_passages == 0
# # Attach source to second agent
# agent2.attach_source(user=user, source_id=source.id, source_manager=server.source_manager, ms=server.ms)
# # Verify second agent has same number of passages as first agent
# agent2_passages = server.passage_manager.size(actor=user, agent_id=other_agent_id, source_id=source.id)
# agent1_passages = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id)
# assert agent2_passages == agent1_passages
# # Verify second agent can query the same content
# passages2 = server.passage_manager.list_passages(
# actor=user,
# agent_id=other_agent_id,
# source_id=source.id,
# query_text="what does Timber like to eat",
# embedding_config=EmbeddingConfig.default_config(provider="openai"),
# embed_query=True,
# limit=10,
# )
# assert len(passages2) == len(passages)
# assert any("chicken" in passage.text.lower() for passage in passages2)
# assert any("sleep" in passage.text.lower() for passage in passages2)
# # Cleanup
# server.delete_agent(user_id=user_id, agent_id=agent2_state.id)