mirror of
https://github.com/cpacker/MemGPT.git
synced 2025-06-03 04:30:22 +00:00
test: Add archival insert test to GPT-4 and make tests failure sensitive (#1930)
This commit is contained in:
parent
1a2a790008
commit
11e6491e7c
47
.github/workflows/test_openai.yml
vendored
47
.github/workflows/test_openai.yml
vendored
@ -30,8 +30,6 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_valid_first_message
|
||||
echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test model sends message with keyword
|
||||
id: test_keyword_message
|
||||
@ -39,8 +37,6 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_returns_keyword
|
||||
echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test model uses external tool correctly
|
||||
id: test_external_tool
|
||||
@ -48,8 +44,6 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_uses_external_tool
|
||||
echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test model recalls chat memory
|
||||
id: test_chat_memory
|
||||
@ -57,17 +51,20 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_recall_chat_memory
|
||||
echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test model uses 'archival_memory_search' to find secret
|
||||
id: test_archival_memory
|
||||
id: test_archival_memory_search
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_retrieval
|
||||
echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test model uses 'archival_memory_insert' to insert archival memories
|
||||
id: test_archival_memory_insert
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_archival_memory_insert
|
||||
|
||||
- name: Test model can edit core memories
|
||||
id: test_core_memory
|
||||
@ -75,8 +72,6 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_openai_gpt_4_edit_core_memory
|
||||
echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Test embedding endpoint
|
||||
id: test_embedding_endpoint
|
||||
@ -84,29 +79,3 @@ jobs:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai
|
||||
echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV
|
||||
continue-on-error: true
|
||||
|
||||
- name: Summarize test results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Test Results Summary:"
|
||||
echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test model uses external tool: $([[ $TEST_EXTERNAL_TOOL_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test model recalls chat memory: $([[ $TEST_CHAT_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test model uses 'archival_memory_search' to find secret: $([[ $TEST_ARCHIVAL_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test model can edit core memories: $([[ $TEST_CORE_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
echo "Test embedding endpoint: $([[ $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)"
|
||||
|
||||
# Check if any test failed
|
||||
if [[ $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \
|
||||
$TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \
|
||||
$TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \
|
||||
$TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \
|
||||
$TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \
|
||||
$TEST_CORE_MEMORY_EXIT_CODE -ne 0 || \
|
||||
$TEST_EMBEDDING_ENDPOINT_EXIT_CODE -ne 0 ]]; then
|
||||
echo "Some tests failed."
|
||||
exit 78
|
||||
fi
|
||||
|
@ -229,6 +229,35 @@ def check_agent_recall_chat_memory(filename: str) -> LettaResponse:
|
||||
return response
|
||||
|
||||
|
||||
def check_agent_archival_memory_insert(filename: str) -> LettaResponse:
|
||||
"""
|
||||
Checks that the LLM will execute an archival memory insert.
|
||||
|
||||
Note: This is acting on the Letta response, note the usage of `user_message`
|
||||
"""
|
||||
# Set up client
|
||||
client = create_client()
|
||||
cleanup(client=client, agent_uuid=agent_uuid)
|
||||
agent_state = setup_agent(client, filename)
|
||||
secret_word = "banana"
|
||||
|
||||
response = client.user_message(
|
||||
agent_id=agent_state.id,
|
||||
message=f"Please insert the secret word '{secret_word}' into archival memory.",
|
||||
)
|
||||
|
||||
# Basic checks
|
||||
assert_sanity_checks(response)
|
||||
|
||||
# Make sure archival_memory_search was called
|
||||
assert_invoked_function_call(response.messages, "archival_memory_insert")
|
||||
|
||||
# Make sure some inner monologue is present
|
||||
assert_inner_monologue_is_present_and_valid(response.messages)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def check_agent_archival_memory_retrieval(filename: str) -> LettaResponse:
|
||||
"""
|
||||
Checks that the LLM will execute an archival memory retrieval.
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
import time
|
||||
|
||||
from tests.helpers.endpoints_helper import (
|
||||
check_agent_archival_memory_insert,
|
||||
check_agent_archival_memory_retrieval,
|
||||
check_agent_edit_core_memory,
|
||||
check_agent_recall_chat_memory,
|
||||
@ -93,6 +94,13 @@ def test_openai_gpt_4_archival_memory_retrieval():
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_openai_gpt_4_archival_memory_insert():
|
||||
filename = os.path.join(llm_config_dir, "gpt-4.json")
|
||||
response = check_agent_archival_memory_insert(filename)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
def test_openai_gpt_4_edit_core_memory():
|
||||
filename = os.path.join(llm_config_dir, "gpt-4.json")
|
||||
response = check_agent_edit_core_memory(filename)
|
||||
|
Loading…
Reference in New Issue
Block a user