fix: add more types to the type_map from python -> json schema (#2095)

This commit is contained in:
Charles Packer 2024-11-22 12:12:40 -08:00 committed by GitHub
parent 03c8c65c20
commit 708feec0a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 5 deletions

View File

@ -1,5 +1,5 @@
import inspect
from typing import Any, Dict, Optional, Type, Union, get_args, get_origin
from typing import Any, Dict, List, Optional, Type, Union, get_args, get_origin
from docstring_parser import parse
from pydantic import BaseModel
@ -38,15 +38,29 @@ def type_to_json_schema_type(py_type):
# Mapping of Python types to JSON schema types
type_map = {
# Basic types
int: "integer",
str: "string",
bool: "boolean",
float: "number",
list[str]: "array",
# Add more mappings as needed
# Collections
List[str]: "array",
List[int]: "array",
list: "array",
tuple: "array",
set: "array",
# Dictionaries
dict: "object",
Dict[str, Any]: "object",
# Special types
None: "null",
type(None): "null",
# Optional types
# Optional[str]: "string", # NOTE: caught above ^
Union[str, None]: "string",
}
if py_type not in type_map:
raise ValueError(f"Python type {py_type} has no corresponding JSON schema type")
raise ValueError(f"Python type {py_type} has no corresponding JSON schema type - full map: {type_map}")
return type_map.get(py_type, "string") # Default to "string" if type not in map

View File

@ -88,7 +88,7 @@ def num_tokens_from_functions(functions: List[dict], model: str = "gpt-4"):
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
print("Warning: model not found. Using cl100k_base encoding.")
warnings.warn("Warning: model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = 0