MemGPT/letta/services/tool_executor/tool_execution_sandbox.py

578 lines
24 KiB
Python

import ast
import base64
import io
import os
import pickle
import subprocess
import sys
import tempfile
import traceback
import uuid
from typing import Any, Dict, Optional
from letta.functions.helpers import generate_model_from_args_json_schema
from letta.log import get_logger
from letta.schemas.agent import AgentState
from letta.schemas.sandbox_config import SandboxConfig, SandboxType
from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.schemas.user import User
from letta.services.helpers.tool_execution_helper import (
add_imports_and_pydantic_schemas_for_args,
create_venv_for_local_sandbox,
find_python_executable,
install_pip_requirements_for_sandbox,
)
from letta.services.organization_manager import OrganizationManager
from letta.services.sandbox_config_manager import SandboxConfigManager
from letta.services.tool_manager import ToolManager
from letta.settings import tool_settings
from letta.tracing import log_event, trace_method
from letta.utils import get_friendly_error_msg
logger = get_logger(__name__)
class ToolExecutionSandbox:
METADATA_CONFIG_STATE_KEY = "config_state"
REQUIREMENT_TXT_NAME = "requirements.txt"
# For generating long, random marker hashes
NAMESPACE = uuid.NAMESPACE_DNS
LOCAL_SANDBOX_RESULT_START_MARKER = str(uuid.uuid5(NAMESPACE, "local-sandbox-result-start-marker"))
LOCAL_SANDBOX_RESULT_END_MARKER = str(uuid.uuid5(NAMESPACE, "local-sandbox-result-end-marker"))
# This is the variable name in the auto-generated code that contains the function results
# We make this a long random string to avoid collisions with any variables in the user's code
LOCAL_SANDBOX_RESULT_VAR_NAME = "result_ZQqiequkcFwRwwGQMqkt"
def __init__(
self, tool_name: str, args: dict, user: User, force_recreate=True, force_recreate_venv=False, tool_object: Optional[Tool] = None
):
self.tool_name = tool_name
self.args = args
self.user = user
# get organization
self.organization = OrganizationManager().get_organization_by_id(self.user.organization_id)
self.privileged_tools = self.organization.privileged_tools
# If a tool object is provided, we use it directly, otherwise pull via name
if tool_object is not None:
self.tool = tool_object
else:
# Get the tool via name
# TODO: So in theory, it's possible this retrieves a tool not provisioned to the agent
# TODO: That would probably imply that agent_state is incorrectly configured
self.tool = ToolManager().get_tool_by_name(tool_name=tool_name, actor=self.user)
if not self.tool:
raise ValueError(
f"Agent attempted to invoke tool {self.tool_name} that does not exist for organization {self.user.organization_id}"
)
self.sandbox_config_manager = SandboxConfigManager()
self.force_recreate = force_recreate
self.force_recreate_venv = force_recreate_venv
def run(
self,
agent_state: Optional[AgentState] = None,
additional_env_vars: Optional[Dict] = None,
) -> ToolExecutionResult:
"""
Run the tool in a sandbox environment.
Args:
agent_state (Optional[AgentState]): The state of the agent invoking the tool
additional_env_vars (Optional[Dict]): Environment variables to inject into the sandbox
Returns:
ToolExecutionResult: Object containing tool execution outcome (e.g. status, response)
"""
if tool_settings.e2b_api_key and not self.privileged_tools:
logger.debug(f"Using e2b sandbox to execute {self.tool_name}")
result = self.run_e2b_sandbox(agent_state=agent_state, additional_env_vars=additional_env_vars)
else:
logger.debug(f"Using local sandbox to execute {self.tool_name}")
result = self.run_local_dir_sandbox(agent_state=agent_state, additional_env_vars=additional_env_vars)
# Log out any stdout/stderr from the tool run
logger.debug(f"Executed tool '{self.tool_name}', logging output from tool run: \n")
for log_line in (result.stdout or []) + (result.stderr or []):
logger.debug(f"{log_line}")
logger.debug(f"Ending output log from tool run.")
# Return result
return result
# local sandbox specific functions
from contextlib import contextmanager
@contextmanager
def temporary_env_vars(self, env_vars: dict):
original_env = os.environ.copy() # Backup original environment variables
os.environ.update(env_vars) # Update with the new variables
try:
yield
finally:
os.environ.clear()
os.environ.update(original_env) # Restore original environment variables
@trace_method
def run_local_dir_sandbox(
self, agent_state: Optional[AgentState] = None, additional_env_vars: Optional[Dict] = None
) -> ToolExecutionResult:
sbx_config = self.sandbox_config_manager.get_or_create_default_sandbox_config(sandbox_type=SandboxType.LOCAL, actor=self.user)
local_configs = sbx_config.get_local_config()
# Get environment variables for the sandbox
env = os.environ.copy()
env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100)
env.update(env_vars)
# Get environment variables for this agent specifically
if agent_state:
env.update(agent_state.get_agent_env_vars_as_dict())
# Finally, get any that are passed explicitly into the `run` function call
if additional_env_vars:
env.update(additional_env_vars)
# Safety checks
if not os.path.exists(local_configs.sandbox_dir) or not os.path.isdir(local_configs.sandbox_dir):
logger.warning(f"Sandbox directory does not exist, creating: {local_configs.sandbox_dir}")
os.makedirs(local_configs.sandbox_dir)
# Write the code to a temp file in the sandbox_dir
with tempfile.NamedTemporaryFile(mode="w", dir=local_configs.sandbox_dir, suffix=".py", delete=False) as temp_file:
if local_configs.use_venv:
# If using venv, we need to wrap with special string markers to separate out the output and the stdout (since it is all in stdout)
code = self.generate_execution_script(agent_state=agent_state, wrap_print_with_markers=True)
else:
code = self.generate_execution_script(agent_state=agent_state)
temp_file.write(code)
temp_file.flush()
temp_file_path = temp_file.name
try:
if local_configs.use_venv:
return self.run_local_dir_sandbox_venv(sbx_config, env, temp_file_path)
else:
return self.run_local_dir_sandbox_directly(sbx_config, env, temp_file_path)
except Exception as e:
logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}")
logger.error(f"Logging out tool {self.tool_name} auto-generated code for debugging: \n\n{code}")
raise e
finally:
# Clean up the temp file
os.remove(temp_file_path)
@trace_method
def run_local_dir_sandbox_venv(
self,
sbx_config: SandboxConfig,
env: Dict[str, str],
temp_file_path: str,
) -> ToolExecutionResult:
local_configs = sbx_config.get_local_config()
sandbox_dir = os.path.expanduser(local_configs.sandbox_dir) # Expand tilde
venv_path = os.path.join(sandbox_dir, local_configs.venv_name)
# Recreate venv if required
if self.force_recreate_venv or not os.path.isdir(venv_path):
logger.warning(f"Virtual environment directory does not exist at: {venv_path}, creating one now...")
log_event(name="start create_venv_for_local_sandbox", attributes={"venv_path": venv_path})
create_venv_for_local_sandbox(
sandbox_dir_path=sandbox_dir, venv_path=venv_path, env=env, force_recreate=self.force_recreate_venv
)
log_event(name="finish create_venv_for_local_sandbox")
log_event(name="start install_pip_requirements_for_sandbox", attributes={"local_configs": local_configs.model_dump_json()})
install_pip_requirements_for_sandbox(local_configs, env=env)
log_event(name="finish install_pip_requirements_for_sandbox", attributes={"local_configs": local_configs.model_dump_json()})
# Ensure Python executable exists
python_executable = find_python_executable(local_configs)
if not os.path.isfile(python_executable):
raise FileNotFoundError(f"Python executable not found in virtual environment: {python_executable}")
# Set up environment variables
env["VIRTUAL_ENV"] = venv_path
env["PATH"] = os.path.join(venv_path, "bin") + ":" + env["PATH"]
env["PYTHONWARNINGS"] = "ignore"
# Execute the code
try:
log_event(name="start subprocess")
result = subprocess.run(
[python_executable, temp_file_path],
env=env,
cwd=sandbox_dir,
timeout=60,
capture_output=True,
text=True,
)
log_event(name="finish subprocess")
func_result, stdout = self.parse_out_function_results_markers(result.stdout)
func_return, agent_state = self.parse_best_effort(func_result)
return ToolExecutionResult(
status="success",
func_return=func_return,
agent_state=agent_state,
stdout=[stdout] if stdout else [],
stderr=[result.stderr] if result.stderr else [],
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
except subprocess.CalledProcessError as e:
logger.error(f"Executing tool {self.tool_name} has process error: {e}")
func_return = get_friendly_error_msg(
function_name=self.tool_name,
exception_name=type(e).__name__,
exception_message=str(e),
)
return ToolExecutionResult(
status="error",
func_return=func_return,
agent_state=None,
stdout=[e.stdout] if e.stdout else [],
stderr=[e.stderr] if e.stderr else [],
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
except subprocess.TimeoutExpired:
raise TimeoutError(f"Executing tool {self.tool_name} has timed out.")
except Exception as e:
logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}")
raise e
@trace_method
def run_local_dir_sandbox_directly(
self,
sbx_config: SandboxConfig,
env: Dict[str, str],
temp_file_path: str,
) -> ToolExecutionResult:
status = "success"
func_return, agent_state, stderr = None, None, None
old_stdout = sys.stdout
old_stderr = sys.stderr
captured_stdout, captured_stderr = io.StringIO(), io.StringIO()
sys.stdout = captured_stdout
sys.stderr = captured_stderr
try:
with self.temporary_env_vars(env):
# Read and compile the Python script
with open(temp_file_path, "r", encoding="utf-8") as f:
source = f.read()
code_obj = compile(source, temp_file_path, "exec")
# Provide a dict for globals
globals_dict = dict(env) # or {}
# If you need to mimic `__main__` behavior:
globals_dict["__name__"] = "__main__"
globals_dict["__file__"] = temp_file_path
# Execute the compiled code
log_event(name="start exec", attributes={"temp_file_path": temp_file_path})
exec(code_obj, globals_dict)
log_event(name="finish exec", attributes={"temp_file_path": temp_file_path})
# Get result from the global dict
func_result = globals_dict.get(self.LOCAL_SANDBOX_RESULT_VAR_NAME)
func_return, agent_state = self.parse_best_effort(func_result)
except Exception as e:
func_return = get_friendly_error_msg(
function_name=self.tool_name,
exception_name=type(e).__name__,
exception_message=str(e),
)
traceback.print_exc(file=sys.stderr)
status = "error"
# Restore stdout/stderr
sys.stdout = old_stdout
sys.stderr = old_stderr
stdout_output = [captured_stdout.getvalue()] if captured_stdout.getvalue() else []
stderr_output = [captured_stderr.getvalue()] if captured_stderr.getvalue() else []
return ToolExecutionResult(
status=status,
func_return=func_return,
agent_state=agent_state,
stdout=stdout_output,
stderr=stderr_output,
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
def parse_out_function_results_markers(self, text: str):
if self.LOCAL_SANDBOX_RESULT_START_MARKER not in text:
return "", text
marker_len = len(self.LOCAL_SANDBOX_RESULT_START_MARKER)
start_index = text.index(self.LOCAL_SANDBOX_RESULT_START_MARKER) + marker_len
end_index = text.index(self.LOCAL_SANDBOX_RESULT_END_MARKER)
return text[start_index:end_index], text[: start_index - marker_len] + text[end_index + +marker_len :]
# e2b sandbox specific functions
def run_e2b_sandbox(
self,
agent_state: Optional[AgentState] = None,
additional_env_vars: Optional[Dict] = None,
) -> ToolExecutionResult:
sbx_config = self.sandbox_config_manager.get_or_create_default_sandbox_config(sandbox_type=SandboxType.E2B, actor=self.user)
sbx = self.get_running_e2b_sandbox_with_same_state(sbx_config)
if not sbx or self.force_recreate:
if not sbx:
logger.info(f"No running e2b sandbox found with the same state: {sbx_config}")
else:
logger.info(f"Force recreated e2b sandbox with state: {sbx_config}")
sbx = self.create_e2b_sandbox_with_metadata_hash(sandbox_config=sbx_config)
logger.info(f"E2B Sandbox configurations: {sbx_config}")
logger.info(f"E2B Sandbox ID: {sbx.sandbox_id}")
# Since this sandbox was used, we extend its lifecycle by the timeout
sbx.set_timeout(sbx_config.get_e2b_config().timeout)
# Get environment variables for the sandbox
# TODO: We set limit to 100 here, but maybe we want it uncapped? Realistically this should be fine.
env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100)
# Get environment variables for this agent specifically
if agent_state:
env_vars.update(agent_state.get_agent_env_vars_as_dict())
# Finally, get any that are passed explicitly into the `run` function call
if additional_env_vars:
env_vars.update(additional_env_vars)
code = self.generate_execution_script(agent_state=agent_state)
execution = sbx.run_code(code, envs=env_vars)
if execution.results:
func_return, agent_state = self.parse_best_effort(execution.results[0].text)
elif execution.error:
logger.error(f"Executing tool {self.tool_name} raised a {execution.error.name} with message: \n{execution.error.value}")
logger.error(f"Traceback from e2b sandbox: \n{execution.error.traceback}")
func_return = get_friendly_error_msg(
function_name=self.tool_name, exception_name=execution.error.name, exception_message=execution.error.value
)
execution.logs.stderr.append(execution.error.traceback)
else:
raise ValueError(f"Tool {self.tool_name} returned execution with None")
return ToolExecutionResult(
status="error" if execution.error else "success",
func_return=func_return,
agent_state=agent_state,
stdout=execution.logs.stdout,
stderr=execution.logs.stderr,
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
def parse_exception_from_e2b_execution(self, e2b_execution: "Execution") -> Exception:
builtins_dict = __builtins__ if isinstance(__builtins__, dict) else vars(__builtins__)
# Dynamically fetch the exception class from builtins, defaulting to Exception if not found
exception_class = builtins_dict.get(e2b_execution.error.name, Exception)
return exception_class(e2b_execution.error.value)
def get_running_e2b_sandbox_with_same_state(self, sandbox_config: SandboxConfig) -> Optional["Sandbox"]:
from e2b_code_interpreter import Sandbox
# List running sandboxes and access metadata.
running_sandboxes = self.list_running_e2b_sandboxes()
# Hash the config to check the state
state_hash = sandbox_config.fingerprint()
for sandbox in running_sandboxes:
if self.METADATA_CONFIG_STATE_KEY in sandbox.metadata and sandbox.metadata[self.METADATA_CONFIG_STATE_KEY] == state_hash:
return Sandbox.connect(sandbox.sandbox_id)
return None
def create_e2b_sandbox_with_metadata_hash(self, sandbox_config: SandboxConfig) -> "Sandbox":
from e2b_code_interpreter import Sandbox
state_hash = sandbox_config.fingerprint()
e2b_config = sandbox_config.get_e2b_config()
if e2b_config.template:
sbx = Sandbox(sandbox_config.get_e2b_config().template, metadata={self.METADATA_CONFIG_STATE_KEY: state_hash})
else:
# no template
sbx = Sandbox(metadata={self.METADATA_CONFIG_STATE_KEY: state_hash}, **e2b_config.model_dump(exclude={"pip_requirements"}))
# install pip requirements
if e2b_config.pip_requirements:
for package in e2b_config.pip_requirements:
sbx.commands.run(f"pip install {package}")
return sbx
def list_running_e2b_sandboxes(self):
from e2b_code_interpreter import Sandbox
# List running sandboxes and access metadata.
return Sandbox.list()
# general utility functions
def parse_best_effort(self, text: str) -> Any:
if not text:
return None, None
result = pickle.loads(base64.b64decode(text))
agent_state = None
if not result["agent_state"] is None:
agent_state = result["agent_state"]
return result["results"], agent_state
def parse_function_arguments(self, source_code: str, tool_name: str):
"""Get arguments of a function from its source code"""
tree = ast.parse(source_code)
args = []
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == tool_name:
for arg in node.args.args:
args.append(arg.arg)
return args
def generate_execution_script(self, agent_state: AgentState, wrap_print_with_markers: bool = False) -> str:
"""
Generate code to run inside of execution sandbox.
Passes into a serialized agent state into the code, to be accessed by the tool.
Args:
agent_state (AgentState): The agent state
wrap_print_with_markers (bool): If true, we wrap the final statement with a `print` and wrap with special markers
Returns:
code (str): The generated code strong
"""
# dump JSON representation of agent state to re-load
code = "from typing import *\n"
code += "import pickle\n"
code += "import sys\n"
code += "import base64\n"
# imports to support agent state
if agent_state:
code += "import letta\n"
code += "from letta import * \n"
import pickle
if self.tool.args_json_schema:
schema_code = add_imports_and_pydantic_schemas_for_args(self.tool.args_json_schema)
if "from __future__ import annotations" in schema_code:
schema_code = schema_code.replace("from __future__ import annotations", "").lstrip()
code = "from __future__ import annotations\n\n" + code
code += schema_code + "\n"
# load the agent state
if agent_state:
agent_state_pickle = pickle.dumps(agent_state)
code += f"agent_state = pickle.loads({agent_state_pickle})\n"
else:
# agent state is None
code += "agent_state = None\n"
if self.tool.args_json_schema:
args_schema = generate_model_from_args_json_schema(self.tool.args_json_schema)
code += f"args_object = {args_schema.__name__}(**{self.args})\n"
for param in self.args:
code += f"{param} = args_object.{param}\n"
else:
for param in self.args:
code += self.initialize_param(param, self.args[param])
if "agent_state" in self.parse_function_arguments(self.tool.source_code, self.tool.name):
inject_agent_state = True
else:
inject_agent_state = False
code += "\n" + self.tool.source_code + "\n"
# TODO: handle wrapped print
code += (
self.LOCAL_SANDBOX_RESULT_VAR_NAME
+ ' = {"results": '
+ self.invoke_function_call(inject_agent_state=inject_agent_state)
+ ', "agent_state": agent_state}\n'
)
code += (
f"{self.LOCAL_SANDBOX_RESULT_VAR_NAME} = base64.b64encode(pickle.dumps({self.LOCAL_SANDBOX_RESULT_VAR_NAME})).decode('utf-8')\n"
)
if wrap_print_with_markers:
code += f"sys.stdout.write('{self.LOCAL_SANDBOX_RESULT_START_MARKER}')\n"
code += f"sys.stdout.write(str({self.LOCAL_SANDBOX_RESULT_VAR_NAME}))\n"
code += f"sys.stdout.write('{self.LOCAL_SANDBOX_RESULT_END_MARKER}')\n"
else:
code += f"{self.LOCAL_SANDBOX_RESULT_VAR_NAME}\n"
return code
def _convert_param_to_value(self, param_type: str, raw_value: str) -> str:
if param_type == "string":
value = "pickle.loads(" + str(pickle.dumps(raw_value)) + ")"
elif param_type == "integer" or param_type == "boolean" or param_type == "number":
value = raw_value
elif param_type == "array":
value = raw_value
elif param_type == "object":
value = raw_value
else:
raise TypeError(f"Unsupported type: {param_type}, raw_value={raw_value}")
return str(value)
def initialize_param(self, name: str, raw_value: str) -> str:
params = self.tool.json_schema["parameters"]["properties"]
spec = params.get(name)
if spec is None:
# ignore extra params (like 'self') for now
return ""
param_type = spec.get("type")
if param_type is None and spec.get("parameters"):
param_type = spec["parameters"].get("type")
value = self._convert_param_to_value(param_type, raw_value)
return name + " = " + value + "\n"
def invoke_function_call(self, inject_agent_state: bool) -> str:
"""
Generate the code string to call the function.
Args:
inject_agent_state (bool): Whether to inject the agent's state as an input into the tool
Returns:
str: Generated code string for calling the tool
"""
kwargs = []
for name in self.args:
if name in self.tool.json_schema["parameters"]["properties"]:
kwargs.append(name)
param_list = [f"{arg}={arg}" for arg in kwargs]
if inject_agent_state:
param_list.append("agent_state=agent_state")
params = ", ".join(param_list)
# if "agent_state" in kwargs:
# params += ", agent_state=agent_state"
# TODO: fix to figure out when to insert agent state or not
# params += "agent_state=agent_state"
func_call_str = self.tool.name + "(" + params + ")"
return func_call_str