Add o1 support

2025-06-03 03:59:07 +00:00 · 2024-09-13 16:37:04 -04:00 · 2024-09-13 16:37:04 -04:00 · adceb76775
commit adceb76775
parent bbdd58cc93
5 changed files with 531 additions and 48 deletions
--- a/src/api/openai-native.ts
+++ b/src/api/openai-native.ts
@ -9,6 +9,7 @@ import {
 	openAiNativeModels,
 } from "../shared/api"
 import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
+import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../utils/o1-format"

 export class OpenAiNativeHandler implements ApiHandler {
 	private options: ApiHandlerOptions
@ -38,19 +39,46 @@ export class OpenAiNativeHandler implements ApiHandler {
 				parameters: tool.input_schema,
 			},
 		}))
-		const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
-			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
-			messages: openAiMessages,
-			tools: openAiTools,
-			tool_choice: "auto",
+
+		let createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
+
+		switch (this.getModel().id) {
+			case "o1-preview":
+			case "o1-mini":
+				createParams = {
+					model: this.getModel().id,
+					max_tokens: this.getModel().info.maxTokens,
+					messages: convertToO1Messages(convertToOpenAiMessages(messages), systemPrompt),
+				}
+				break
+			default:
+				createParams = {
+					model: this.getModel().id,
+					max_tokens: this.getModel().info.maxTokens,
+					messages: openAiMessages,
+					tools: openAiTools,
+					tool_choice: "auto",
+				}
+				break
 		}
+
 		const completion = await this.client.chat.completions.create(createParams)
 		const errorMessage = (completion as any).error?.message
 		if (errorMessage) {
 			throw new Error(errorMessage)
 		}
-		const anthropicMessage = convertToAnthropicMessage(completion)
+
+		let anthropicMessage: Anthropic.Messages.Message
+		switch (this.getModel().id) {
+			case "o1-preview":
+			case "o1-mini":
+				anthropicMessage = convertO1ResponseToAnthropicMessage(completion)
+				break
+			default:
+				anthropicMessage = convertToAnthropicMessage(completion)
+				break
+		}
+
 		return { message: anthropicMessage }
 	}

--- a/src/api/openrouter.ts
+++ b/src/api/openrouter.ts
@ -10,6 +10,7 @@ import {
 } from "../shared/api"
 import { convertToAnthropicMessage, convertToOpenAiMessages } from "../utils/openai-format"
 import axios from "axios"
+import { convertO1ResponseToAnthropicMessage, convertToO1Messages } from "../utils/o1-format"

 export class OpenRouterHandler implements ApiHandler {
 	private options: ApiHandlerOptions
@ -86,12 +87,26 @@ export class OpenRouterHandler implements ApiHandler {
 			},
 		}))

-		const createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
-			model: this.getModel().id,
-			max_tokens: this.getModel().info.maxTokens,
-			messages: openAiMessages,
-			tools: openAiTools,
-			tool_choice: "auto",
+		let createParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming
+
+		switch (this.getModel().id) {
+			case "openai/o1-preview":
+			case "openai/o1-mini":
+				createParams = {
+					model: this.getModel().id,
+					max_tokens: this.getModel().info.maxTokens,
+					messages: convertToO1Messages(convertToOpenAiMessages(messages), systemPrompt),
+				}
+				break
+			default:
+				createParams = {
+					model: this.getModel().id,
+					max_tokens: this.getModel().info.maxTokens,
+					messages: openAiMessages,
+					tools: openAiTools,
+					tool_choice: "auto",
+				}
+				break
 		}

 		let completion: OpenAI.Chat.Completions.ChatCompletion
@ -107,7 +122,16 @@ export class OpenRouterHandler implements ApiHandler {
 			throw new Error(errorMessage)
 		}

-		const anthropicMessage = convertToAnthropicMessage(completion)
+		let anthropicMessage: Anthropic.Messages.Message
+		switch (this.getModel().id) {
+			case "openai/o1-preview":
+			case "openai/o1-mini":
+				anthropicMessage = convertO1ResponseToAnthropicMessage(completion)
+				break
+			default:
+				anthropicMessage = convertToAnthropicMessage(completion)
+				break
+		}

 		// Check if the model is Gemini Flash and remove extra escapes in tool result args
 		// switch (this.getModel().id) {
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@ -149,22 +149,22 @@ export const openRouterModels = {
 		cacheReadsPrice: 0.03,
 	},
 	// Doesn't support tool use (yet)
-	// "openai/o1-preview": {
-	// 	maxTokens: 32_768,
-	// 	contextWindow: 128_000,
-	// 	supportsImages: true,
-	// 	supportsPromptCache: false,
-	// 	inputPrice: 15,
-	// 	outputPrice: 60,
-	// },
-	// "openai/o1-mini": {
-	// 	maxTokens: 65_536,
-	// 	contextWindow: 128_000,
-	// 	supportsImages: true,
-	// 	supportsPromptCache: false,
-	// 	inputPrice: 3,
-	// 	outputPrice: 12,
-	// },
+	"openai/o1-preview": {
+		maxTokens: 32_768,
+		contextWindow: 128_000,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 15,
+		outputPrice: 60,
+	},
+	"openai/o1-mini": {
+		maxTokens: 65_536,
+		contextWindow: 128_000,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 3,
+		outputPrice: 12,
+	},
 	"openai/gpt-4o-2024-08-06": {
 		maxTokens: 16384,
 		contextWindow: 128_000,
@ -365,22 +365,22 @@ export type OpenAiNativeModelId = keyof typeof openAiNativeModels
 export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-4o"
 export const openAiNativeModels = {
 	// don't support tool use yet
-	// "o1-preview": {
-	// 	maxTokens: 32_768,
-	// 	contextWindow: 128_000,
-	// 	supportsImages: true,
-	// 	supportsPromptCache: false,
-	// 	inputPrice: 15,
-	// 	outputPrice: 60,
-	// },
-	// "o1-mini": {
-	// 	maxTokens: 65_536,
-	// 	contextWindow: 128_000,
-	// 	supportsImages: true,
-	// 	supportsPromptCache: false,
-	// 	inputPrice: 3,
-	// 	outputPrice: 12,
-	// },
+	"o1-preview": {
+		maxTokens: 32_768,
+		contextWindow: 128_000,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 15,
+		outputPrice: 60,
+	},
+	"o1-mini": {
+		maxTokens: 65_536,
+		contextWindow: 128_000,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 3,
+		outputPrice: 12,
+	},
 	"gpt-4o": {
 		maxTokens: 4_096,
 		contextWindow: 128_000,
--- a/src/utils/o1-format.ts
+++ b/src/utils/o1-format.ts
@ -0,0 +1,429 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import OpenAI from "openai"
+
+const o1SystemPrompt = (systemPrompt: string) => `
+# System Prompt
+
+${systemPrompt}
+
+# Instructions for Formulating Your Response
+
+You must respond to the user's request by using at least one tool call. When formulating your response, follow these guidelines:
+
+1. Begin your response with normal text, explaining your thoughts, analysis, or plan of action.
+2. If you need to use any tools, place ALL tool calls at the END of your message, after your normal text explanation.
+3. You can use multiple tool calls if needed, but they should all be grouped together at the end of your message.
+4. After placing the tool calls, do not add any additional normal text. The tool calls should be the final content in your message.
+
+Here's the general structure your responses should follow:
+
+\`\`\`
+[Your normal text response explaining your thoughts and actions]
+
+[Tool Call 1]
+[Tool Call 2 if needed]
+[Tool Call 3 if needed]
+...
+\`\`\`
+
+Remember:
+- Choose the most appropriate tool(s) based on the task and the tool descriptions provided.
+- Formulate your tool calls using the XML format specified for each tool.
+- Provide clear explanations in your normal text about what actions you're taking and why you're using particular tools.
+- Act as if the tool calls will be executed immediately after your message, and your next response will have access to their results.
+
+# Tool Descriptions and XML Formats
+
+1. execute_command:
+<execute_command>
+<command>Your command here</command>
+</execute_command>
+Description: Execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory.
+
+2. list_files:
+<list_files>
+<path>Directory path here</path>
+<recursive>true or false (optional)</recursive>
+</list_files>
+Description: List files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents.
+
+3. list_code_definition_names:
+<list_code_definition_names>
+<path>Directory path here</path>
+</list_code_definition_names>
+Description: Lists definition names (classes, functions, methods, etc.) used in source code files at the top level of the specified directory. This tool provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.
+
+4. search_files:
+<search_files>
+<path>Directory path here</path>
+<regex>Your regex pattern here</regex>
+<filePattern>Optional file pattern here</filePattern>
+</search_files>
+Description: Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.
+
+5. read_file:
+<read_file>
+<path>File path here</path>
+</read_file>
+Description: Read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
+
+6. write_to_file:
+<write_to_file>
+<path>File path here</path>
+<content>
+Your file content here
+</content>
+</write_to_file>
+Description: Write content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. Always provide the full intended content of the file, without any truncation. This tool will automatically create any directories needed to write the file.
+
+7. ask_followup_question:
+<ask_followup_question>
+<question>Your question here</question>
+</ask_followup_question>
+Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.
+
+8. attempt_completion:
+<attempt_completion>
+<command>Optional command to demonstrate result</command>
+<result>
+Your final result description here
+</result>
+</attempt_completion>
+Description: Once you've completed the task, use this tool to present the result to the user. They may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
+
+# Examples
+
+Here are some examples of how to structure your responses with tool calls:
+
+Example 1: Using a single tool
+
+Let's run the test suite for our project. This will help us ensure that all our components are functioning correctly.
+
+<execute_command>
+<command>npm test</command>
+</execute_command>
+
+Example 2: Using multiple tools
+
+Let's create two new configuration files for the web application: one for the frontend and one for the backend.
+
+<write_to_file>
+<path>./frontend-config.json</path>
+<content>
+{
+  "apiEndpoint": "https://api.example.com",
+  "theme": {
+    "primaryColor": "#007bff",
+    "secondaryColor": "#6c757d",
+    "fontFamily": "Arial, sans-serif"
+  },
+  "features": {
+    "darkMode": true,
+    "notifications": true,
+    "analytics": false
+  },
+  "version": "1.0.0"
+}
+</content>
+</write_to_file>
+
+<write_to_file>
+<path>./backend-config.yaml</path>
+<content>
+database:
+  host: localhost
+  port: 5432
+  name: myapp_db
+  user: admin
+
+server:
+  port: 3000
+  environment: development
+  logLevel: debug
+
+security:
+  jwtSecret: your-secret-key-here
+  passwordSaltRounds: 10
+
+caching:
+  enabled: true
+  provider: redis
+  ttl: 3600
+
+externalServices:
+  emailProvider: sendgrid
+  storageProvider: aws-s3
+</content>
+</write_to_file>
+
+Example 3: Asking a follow-up question
+
+I've analyzed the project structure, but I need more information to proceed. Let me ask the user for clarification.
+
+<ask_followup_question>
+<question>Which specific feature would you like me to implement in the example.py file?</question>
+</ask_followup_question>
+`
+
+export function convertToO1Messages(
+	openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[],
+	systemPrompt: string
+): OpenAI.Chat.ChatCompletionMessageParam[] {
+	const toolsReplaced = openAiMessages.reduce((acc, message) => {
+		if (message.role === "tool") {
+			// Convert tool messages to user messages
+			acc.push({
+				role: "user",
+				content: message.content || "",
+			})
+		} else if (message.role === "assistant" && message.tool_calls) {
+			// Convert tool calls to content and remove tool_calls
+			let content = message.content || ""
+			message.tool_calls.forEach((toolCall) => {
+				if (toolCall.type === "function") {
+					content += `\nTool Call: ${toolCall.function.name}\nArguments: ${toolCall.function.arguments}`
+				}
+			})
+			acc.push({
+				role: "assistant",
+				content: content,
+				tool_calls: undefined,
+			})
+		} else {
+			// Keep other messages as they are
+			acc.push(message)
+		}
+		return acc
+	}, [] as OpenAI.Chat.ChatCompletionMessageParam[])
+
+	// Find the index of the last assistant message
+	// const lastAssistantIndex = findLastIndex(toolsReplaced, (message) => message.role === "assistant")
+
+	// Create a new array to hold the modified messages
+	const messagesWithSystemPrompt = [
+		{
+			role: "user",
+			content: o1SystemPrompt(systemPrompt),
+		} as OpenAI.Chat.ChatCompletionUserMessageParam,
+		...toolsReplaced,
+	]
+
+	// If there's an assistant message, insert the system prompt after it
+	// if (lastAssistantIndex !== -1) {
+	// 	const insertIndex = lastAssistantIndex + 1
+	// 	if (insertIndex < messagesWithSystemPrompt.length && messagesWithSystemPrompt[insertIndex].role === "user") {
+	// 		messagesWithSystemPrompt.splice(insertIndex, 0, {
+	// 			role: "user",
+	// 			content: o1SystemPrompt(systemPrompt),
+	// 		})
+	// 	}
+	// } else {
+	// 	// If there were no assistant messages, prepend the system prompt
+	// 	messagesWithSystemPrompt.unshift({
+	// 		role: "user",
+	// 		content: o1SystemPrompt(systemPrompt),
+	// 	})
+	// }
+
+	return messagesWithSystemPrompt
+}
+
+interface ToolCall {
+	tool: string
+	tool_input: Record<string, string>
+}
+
+const toolNames = [
+	"execute_command",
+	"list_files",
+	"list_code_definition_names",
+	"search_files",
+	"read_file",
+	"write_to_file",
+	"ask_followup_question",
+	"attempt_completion",
+]
+
+function parseAIResponse(response: string): { normalText: string; toolCalls: ToolCall[] } {
+	// Create a regex pattern to match any tool call opening tag
+	const toolCallPattern = new RegExp(`<(${toolNames.join("|")})`, "i")
+	const match = response.match(toolCallPattern)
+
+	if (!match) {
+		// No tool calls found
+		return { normalText: response.trim(), toolCalls: [] }
+	}
+
+	const toolCallStart = match.index!
+	const normalText = response.slice(0, toolCallStart).trim()
+	const toolCallsText = response.slice(toolCallStart)
+
+	const toolCalls = parseToolCalls(toolCallsText)
+
+	return { normalText, toolCalls }
+}
+
+function parseToolCalls(toolCallsText: string): ToolCall[] {
+	const toolCalls: ToolCall[] = []
+
+	let remainingText = toolCallsText
+
+	while (remainingText.length > 0) {
+		const toolMatch = toolNames.find((tool) => new RegExp(`<${tool}`, "i").test(remainingText))
+
+		if (!toolMatch) {
+			break // No more tool calls found
+		}
+
+		const startTag = `<${toolMatch}`
+		const endTag = `</${toolMatch}>`
+		const startIndex = remainingText.indexOf(startTag)
+		const endIndex = remainingText.indexOf(endTag, startIndex)
+
+		if (endIndex === -1) {
+			break // Malformed XML, no closing tag found
+		}
+
+		const toolCallContent = remainingText.slice(startIndex, endIndex + endTag.length)
+		remainingText = remainingText.slice(endIndex + endTag.length).trim()
+
+		const toolCall = parseToolCall(toolMatch, toolCallContent)
+		if (toolCall) {
+			toolCalls.push(toolCall)
+		}
+	}
+
+	return toolCalls
+}
+
+function parseToolCall(toolName: string, content: string): ToolCall | null {
+	const tool_input: Record<string, string> = {}
+
+	// Remove the outer tool tags
+	const innerContent = content.replace(new RegExp(`^<${toolName}>|</${toolName}>$`, "g"), "").trim()
+
+	// Parse nested XML elements
+	const paramRegex = /<(\w+)>([\s\S]*?)<\/\1>/gs
+	let match
+
+	while ((match = paramRegex.exec(innerContent)) !== null) {
+		const [, paramName, paramValue] = match
+		// Preserve newlines and trim only leading/trailing whitespace
+		tool_input[paramName] = paramValue.replace(/^\s+|\s+$/g, "")
+	}
+
+	// Validate required parameters
+	if (!validateToolInput(toolName, tool_input)) {
+		console.error(`Invalid tool call for ${toolName}:`, content)
+		return null
+	}
+
+	return { tool: toolName, tool_input }
+}
+
+function validateToolInput(toolName: string, tool_input: Record<string, string>): boolean {
+	switch (toolName) {
+		case "execute_command":
+			return "command" in tool_input
+		case "read_file":
+		case "list_code_definition_names":
+		case "list_files":
+			return "path" in tool_input
+		case "search_files":
+			return "path" in tool_input && "regex" in tool_input
+		case "write_to_file":
+			return "path" in tool_input && "content" in tool_input
+		case "ask_followup_question":
+			return "question" in tool_input
+		case "attempt_completion":
+			return "result" in tool_input
+		default:
+			return false
+	}
+}
+
+// Example usage:
+// const aiResponse = `Here's my analysis of the situation...
+
+// <execute_command>
+//   <command>ls -la</command>
+// </execute_command>
+
+// <write_to_file>
+//   <path>./example.txt</path>
+//   <content>Hello, World!</content>
+// </write_to_file>`;
+//
+// const { normalText, toolCalls } = parseAIResponse(aiResponse);
+// console.log(normalText);
+// console.log(toolCalls);
+
+// Convert OpenAI response to Anthropic format
+export function convertO1ResponseToAnthropicMessage(
+	completion: OpenAI.Chat.Completions.ChatCompletion
+): Anthropic.Messages.Message {
+	const openAiMessage = completion.choices[0].message
+	const { normalText, toolCalls } = parseAIResponse(openAiMessage.content || "")
+
+	const anthropicMessage: Anthropic.Messages.Message = {
+		id: completion.id,
+		type: "message",
+		role: openAiMessage.role, // always "assistant"
+		content: [
+			{
+				type: "text",
+				text: normalText,
+			},
+		],
+		model: completion.model,
+		stop_reason: (() => {
+			switch (completion.choices[0].finish_reason) {
+				case "stop":
+					return "end_turn"
+				case "length":
+					return "max_tokens"
+				case "tool_calls":
+					return "tool_use"
+				case "content_filter": // Anthropic doesn't have an exact equivalent
+				default:
+					return null
+			}
+		})(),
+		stop_sequence: null, // which custom stop_sequence was generated, if any (not applicable if you don't use stop_sequence)
+		usage: {
+			input_tokens: completion.usage?.prompt_tokens || 0,
+			output_tokens: completion.usage?.completion_tokens || 0,
+		},
+	}
+
+	if (toolCalls.length > 0) {
+		anthropicMessage.content.push(
+			...toolCalls.map((toolCall: ToolCall, index: number): Anthropic.ToolUseBlock => {
+				return {
+					type: "tool_use",
+					id: `call_${index}_${Date.now()}`, // Generate a unique ID for each tool call
+					name: toolCall.tool,
+					input: toolCall.tool_input,
+				}
+			})
+		)
+	}
+
+	return anthropicMessage
+}
+
+// Example usage:
+// const openAICompletion = {
+//     id: "cmpl-123",
+//     choices: [{
+//         message: {
+//             role: "assistant",
+//             content: "Here's my analysis...\n\n<execute_command>\n  <command>ls -la</command>\n</execute_command>"
+//         },
+//         finish_reason: "stop"
+//     }],
+//     model: "gpt-3.5-turbo",
+//     usage: { prompt_tokens: 50, completion_tokens: 100 }
+// };
+// const anthropicMessage = convertO1ResponseToAnthropicMessage(openAICompletion);
+// console.log(anthropicMessage);
--- a/webview-ui/src/components/ApiOptions.tsx
+++ b/webview-ui/src/components/ApiOptions.tsx
@ -546,7 +546,7 @@ export const formatPrice = (price: number) => {

 const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string; modelInfo: ModelInfo }) => {
 	const isGemini = Object.keys(geminiModels).includes(selectedModelId)
-	const isO1 = false //(["o1-preview", "o1-mini"] as OpenAiNativeModelId[]).includes(selectedModelId as OpenAiNativeModelId)
+	const isO1 = selectedModelId && selectedModelId.includes("o1")
 	return (
 		<p style={{ fontSize: "12px", marginTop: "2px", color: "var(--vscode-descriptionForeground)" }}>
 			<ModelInfoSupportsItem
@ -613,8 +613,10 @@ const ModelInfoView = ({ selectedModelId, modelInfo }: { selectedModelId: string
 					<span
 						style={{
 							fontStyle: "italic",
+							color: "var(--vscode-errorForeground)",
 						}}>
-						* This model is newly released and may not be accessible to all users yet.
+						* This model does not support tool use or system prompts, so Claude Dev uses structured output
+						prompting to achieve similar results. Your mileage may vary.
 					</span>
 				</>
 			)}