Added Cerebras as a Provider (#3810)

* Added Cerebras as a Provider * prettier fix * prettier --------- Co-authored-by: sam <sam@MacBook-Air-3.local>
2025-06-03 03:59:07 +00:00 · 2025-05-26 20:06:15 -07:00 · 2025-05-26 20:06:15 -07:00 · 6fa819a170
commit 6fa819a170
parent 2ca3e9ac82
11 changed files with 339 additions and 3 deletions
--- a/.changeset/cerebras-provider.md
+++ b/.changeset/cerebras-provider.md
@ -0,0 +1,15 @@
+---
+"claude-dev": minor
+---
+
+Add Cerebras as a new API provider with comprehensive model support. Features include:
+
+- **5 Cerebras models**: llama3.1-8b, llama-4-scout-17b-16e-instruct, llama-3.3-70b, qwen-3-32b, and deepseek-r1-distill-llama-70b
+- **Native Cerebras SDK integration** using @cerebras/cerebras_cloud_sdk
+- **Reasoning support** for Qwen and DeepSeek R1 Distill models with `<think>` tag handling
+- **Streaming responses** with proper error handling and usage tracking
+- **Cost calculation** and token counting
+- **UI integration** with API key configuration and model selection
+- **Free pricing** for all models (set to $0 input/output costs)
+
+Users can now connect to Cerebras's high-performance inference API using their API key and access fast, efficient LLM services directly from within Cline. 
--- a/.clinerules/cline-overview.md
+++ b/.clinerules/cline-overview.md
@ -164,6 +164,7 @@ Key providers include:
 - **OpenRouter**: Meta-provider supporting multiple model providers
 - **AWS Bedrock**: Integration with Amazon's AI services
 - **Gemini**: Google's AI models
+- **Cerebras**: High-performance inference with Llama, Qwen, and DeepSeek models
 - **Ollama**: Local model hosting
 - **LM Studio**: Local model hosting
 - **VSCode LM**: VSCode's built-in language models
--- a/README.md
+++ b/README.md
@ -51,7 +51,7 @@ Thanks to [Claude 3.7 Sonnet's agentic coding capabilities](https://www.anthrop

 ### Use any API and Model

-Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
+Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, GCP Vertex, and Cerebras. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.

 The extension also keeps track of total tokens and API usage cost for the entire task loop and individual requests, keeping you informed of spend every step of the way.

--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
 	"name": "claude-dev",
-	"version": "3.17.2",
+	"version": "3.17.5",
 	"lockfileVersion": 2,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "claude-dev",
-			"version": "3.17.2",
+			"version": "3.17.5",
 			"license": "Apache-2.0",
 			"dependencies": {
 				"@anthropic-ai/bedrock-sdk": "^0.12.4",
@ -14,6 +14,7 @@
 				"@anthropic-ai/vertex-sdk": "^0.6.4",
 				"@aws-sdk/client-bedrock-runtime": "^3.758.0",
 				"@bufbuild/protobuf": "^2.2.5",
+				"@cerebras/cerebras_cloud_sdk": "^1.35.0",
 				"@google-cloud/vertexai": "^1.9.3",
 				"@google/genai": "^0.13.0",
 				"@grpc/grpc-js": "^1.9.15",
@ -4089,6 +4090,30 @@
 			"integrity": "sha512-/g5EzJifw5GF8aren8wZ/G5oMuPoGeS6MQD3ca8ddcvdXR5UELUfdTZITCGNhNXynY/AYl3Z4plmxdj/tRl/hQ==",
 			"license": "(Apache-2.0 AND BSD-3-Clause)"
 		},
+		"node_modules/@cerebras/cerebras_cloud_sdk": {
+			"version": "1.35.0",
+			"resolved": "https://registry.npmjs.org/@cerebras/cerebras_cloud_sdk/-/cerebras_cloud_sdk-1.35.0.tgz",
+			"integrity": "sha512-bQ6KYHmcvudHJ1aLzqkeETn3Y071/8/zpcZho6g4pKZ+VluHvLmIG0buhrwF9qJY5WSLmXR/s4pruxVRmfV7yQ==",
+			"license": "Apache-2.0",
+			"dependencies": {
+				"@types/node": "^18.11.18",
+				"@types/node-fetch": "^2.6.4",
+				"abort-controller": "^3.0.0",
+				"agentkeepalive": "^4.2.1",
+				"form-data-encoder": "1.7.2",
+				"formdata-node": "^4.3.2",
+				"node-fetch": "^2.6.7"
+			}
+		},
+		"node_modules/@cerebras/cerebras_cloud_sdk/node_modules/@types/node": {
+			"version": "18.19.103",
+			"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
+			"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
+			"license": "MIT",
+			"dependencies": {
+				"undici-types": "~5.26.4"
+			}
+		},
 		"node_modules/@changesets/apply-release-plan": {
 			"version": "7.0.8",
 			"resolved": "https://registry.npmjs.org/@changesets/apply-release-plan/-/apply-release-plan-7.0.8.tgz",
@ -28892,6 +28917,30 @@
 			"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.2.5.tgz",
 			"integrity": "sha512-/g5EzJifw5GF8aren8wZ/G5oMuPoGeS6MQD3ca8ddcvdXR5UELUfdTZITCGNhNXynY/AYl3Z4plmxdj/tRl/hQ=="
 		},
+		"@cerebras/cerebras_cloud_sdk": {
+			"version": "1.35.0",
+			"resolved": "https://registry.npmjs.org/@cerebras/cerebras_cloud_sdk/-/cerebras_cloud_sdk-1.35.0.tgz",
+			"integrity": "sha512-bQ6KYHmcvudHJ1aLzqkeETn3Y071/8/zpcZho6g4pKZ+VluHvLmIG0buhrwF9qJY5WSLmXR/s4pruxVRmfV7yQ==",
+			"requires": {
+				"@types/node": "^18.11.18",
+				"@types/node-fetch": "^2.6.4",
+				"abort-controller": "^3.0.0",
+				"agentkeepalive": "^4.2.1",
+				"form-data-encoder": "1.7.2",
+				"formdata-node": "^4.3.2",
+				"node-fetch": "^2.6.7"
+			},
+			"dependencies": {
+				"@types/node": {
+					"version": "18.19.103",
+					"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
+					"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
+					"requires": {
+						"undici-types": "~5.26.4"
+					}
+				}
+			}
+		},
 		"@changesets/apply-release-plan": {
 			"version": "7.0.8",
 			"resolved": "https://registry.npmjs.org/@changesets/apply-release-plan/-/apply-release-plan-7.0.8.tgz",
--- a/package.json
+++ b/package.json
@ -349,6 +349,7 @@
 		"@anthropic-ai/vertex-sdk": "^0.6.4",
 		"@aws-sdk/client-bedrock-runtime": "^3.758.0",
 		"@bufbuild/protobuf": "^2.2.5",
+		"@cerebras/cerebras_cloud_sdk": "^1.35.0",
 		"@google-cloud/vertexai": "^1.9.3",
 		"@google/genai": "^0.13.0",
 		"@grpc/grpc-js": "^1.9.15",
--- a/src/api/index.ts
+++ b/src/api/index.ts
@ -24,6 +24,7 @@ import { FireworksHandler } from "./providers/fireworks"
 import { AskSageHandler } from "./providers/asksage"
 import { XAIHandler } from "./providers/xai"
 import { SambanovaHandler } from "./providers/sambanova"
+import { CerebrasHandler } from "./providers/cerebras"

 export interface ApiHandler {
 	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
@ -84,6 +85,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 			return new XAIHandler(options)
 		case "sambanova":
 			return new SambanovaHandler(options)
+		case "cerebras":
+			return new CerebrasHandler(options)
 		default:
 			return new AnthropicHandler(options)
 	}
--- a/src/api/providers/cerebras.ts
+++ b/src/api/providers/cerebras.ts
@ -0,0 +1,169 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import Cerebras from "@cerebras/cerebras_cloud_sdk"
+import { withRetry } from "../retry"
+import { ApiHandlerOptions, ModelInfo, CerebrasModelId, cerebrasDefaultModelId, cerebrasModels } from "@shared/api"
+import { ApiHandler } from "../index"
+import { ApiStream } from "@api/transform/stream"
+
+export class CerebrasHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: Cerebras
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+
+		// Clean and validate the API key
+		const cleanApiKey = this.options.cerebrasApiKey?.trim()
+
+		if (!cleanApiKey) {
+			throw new Error("Cerebras API key is required")
+		}
+
+		this.client = new Cerebras({
+			apiKey: cleanApiKey,
+			timeout: 30000, // 30 second timeout
+		})
+	}
+
+	@withRetry()
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to Cerebras format
+		const cerebrasMessages: Array<{
+			role: "system" | "user" | "assistant"
+			content: string
+		}> = [{ role: "system", content: systemPrompt }]
+
+		// Convert Anthropic messages to Cerebras format
+		for (const message of messages) {
+			if (message.role === "user") {
+				const content = Array.isArray(message.content)
+					? message.content
+							.map((block) => {
+								if (block.type === "text") {
+									return block.text
+								} else if (block.type === "image") {
+									return "[Image content not supported in Cerebras]"
+								}
+								return ""
+							})
+							.join("\n")
+					: message.content
+				cerebrasMessages.push({ role: "user", content })
+			} else if (message.role === "assistant") {
+				const content = Array.isArray(message.content)
+					? message.content
+							.map((block) => {
+								if (block.type === "text") {
+									return block.text
+								}
+								return ""
+							})
+							.join("\n")
+					: message.content || ""
+				cerebrasMessages.push({ role: "assistant", content })
+			}
+		}
+
+		try {
+			const stream = await this.client.chat.completions.create({
+				model: this.getModel().id,
+				messages: cerebrasMessages,
+				temperature: 0,
+				stream: true,
+			})
+
+			// Handle streaming response
+			let reasoning: string | null = null // Track reasoning content for models that support thinking
+			const modelId = this.getModel().id
+			const isReasoningModel = modelId.includes("qwen") || modelId.includes("deepseek-r1-distill")
+
+			for await (const chunk of stream as any) {
+				// Type assertion for the streaming chunk
+				const streamChunk = chunk as any
+
+				if (streamChunk.choices?.[0]?.delta?.content) {
+					const content = streamChunk.choices[0].delta.content
+
+					// Handle reasoning models (Qwen and DeepSeek R1 Distill) that use <think> tags
+					if (isReasoningModel) {
+						// Check if we're entering or continuing reasoning mode
+						if (reasoning || content.includes("<think>")) {
+							reasoning = (reasoning || "") + content
+
+							// Clean the content by removing think tags for display
+							let cleanContent = content.replace(/<think>/g, "").replace(/<\/think>/g, "")
+
+							// Only yield reasoning content if there's actual content after cleaning
+							if (cleanContent.trim()) {
+								yield {
+									type: "reasoning",
+									reasoning: cleanContent,
+								}
+							}
+
+							// Check if reasoning is complete
+							if (reasoning.includes("</think>")) {
+								reasoning = null
+							}
+						} else {
+							// Regular content outside of thinking tags
+							yield {
+								type: "text",
+								text: content,
+							}
+						}
+					} else {
+						// Non-reasoning models - just yield text content
+						yield {
+							type: "text",
+							text: content,
+						}
+					}
+				}
+
+				// Handle usage information from Cerebras API
+				// Usage is typically only available in the final chunk
+				if (streamChunk.usage) {
+					const totalCost = this.calculateCost({
+						inputTokens: streamChunk.usage.prompt_tokens || 0,
+						outputTokens: streamChunk.usage.completion_tokens || 0,
+					})
+
+					yield {
+						type: "usage",
+						inputTokens: streamChunk.usage.prompt_tokens || 0,
+						outputTokens: streamChunk.usage.completion_tokens || 0,
+						cacheReadTokens: 0,
+						cacheWriteTokens: 0,
+						totalCost,
+					}
+				}
+			}
+		} catch (error) {
+			throw error
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.apiModelId
+		if (modelId && modelId in cerebrasModels) {
+			const id = modelId as CerebrasModelId
+			return { id, info: cerebrasModels[id] }
+		}
+		return {
+			id: cerebrasDefaultModelId,
+			info: cerebrasModels[cerebrasDefaultModelId],
+		}
+	}
+
+	private calculateCost({ inputTokens, outputTokens }: { inputTokens: number; outputTokens: number }): number {
+		const model = this.getModel()
+		const inputPrice = model.info.inputPrice || 0
+		const outputPrice = model.info.outputPrice || 0
+
+		const inputCost = (inputPrice / 1_000_000) * inputTokens
+		const outputCost = (outputPrice / 1_000_000) * outputTokens
+
+		return inputCost + outputCost
+	}
+}
--- a/src/core/storage/state-keys.ts
+++ b/src/core/storage/state-keys.ts
@ -21,6 +21,7 @@ export type SecretKey =
 	| "xaiApiKey"
 	| "nebiusApiKey"
 	| "sambanovaApiKey"
+	| "cerebrasApiKey"

 export type GlobalStateKey =
 	| "apiProvider"
--- a/src/core/storage/state.ts
+++ b/src/core/storage/state.ts
@ -155,6 +155,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 		thinkingBudgetTokens,
 		reasoningEffort,
 		sambanovaApiKey,
+		cerebrasApiKey,
 		nebiusApiKey,
 		planActSeparateModelsSettingRaw,
 		favoritedModelIds,
@ -244,6 +245,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 		getGlobalState(context, "thinkingBudgetTokens") as Promise<number | undefined>,
 		getGlobalState(context, "reasoningEffort") as Promise<string | undefined>,
 		getSecret(context, "sambanovaApiKey") as Promise<string | undefined>,
+		getSecret(context, "cerebrasApiKey") as Promise<string | undefined>,
 		getSecret(context, "nebiusApiKey") as Promise<string | undefined>,
 		getGlobalState(context, "planActSeparateModelsSetting") as Promise<boolean | undefined>,
 		getGlobalState(context, "favoritedModelIds") as Promise<string[] | undefined>,
@ -357,6 +359,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 			asksageApiUrl,
 			xaiApiKey,
 			sambanovaApiKey,
+			cerebrasApiKey,
 			nebiusApiKey,
 			favoritedModelIds,
 			requestTimeoutMs,
@ -451,6 +454,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
 		reasoningEffort,
 		clineApiKey,
 		sambanovaApiKey,
+		cerebrasApiKey,
 		nebiusApiKey,
 		favoritedModelIds,
 	} = apiConfiguration
@ -512,6 +516,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
 	await updateGlobalState(context, "reasoningEffort", reasoningEffort)
 	await storeSecret(context, "clineApiKey", clineApiKey)
 	await storeSecret(context, "sambanovaApiKey", sambanovaApiKey)
+	await storeSecret(context, "cerebrasApiKey", cerebrasApiKey)
 	await storeSecret(context, "nebiusApiKey", nebiusApiKey)
 	await updateGlobalState(context, "favoritedModelIds", favoritedModelIds)
 	await updateGlobalState(context, "requestTimeoutMs", apiConfiguration.requestTimeoutMs)
@ -542,6 +547,7 @@ export async function resetExtensionState(context: vscode.ExtensionContext) {
 		"asksageApiKey",
 		"xaiApiKey",
 		"sambanovaApiKey",
+		"cerebrasApiKey",
 		"nebiusApiKey",
 	]
 	for (const key of secretKeys) {
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@ -24,6 +24,7 @@ export type ApiProvider =
 	| "asksage"
 	| "xai"
 	| "sambanova"
+	| "cerebras"

 export interface ApiHandlerOptions {
 	apiModelId?: string
@ -89,6 +90,7 @@ export interface ApiHandlerOptions {
 	thinkingBudgetTokens?: number
 	reasoningEffort?: string
 	sambanovaApiKey?: string
+	cerebrasApiKey?: string
 	requestTimeoutMs?: number
 	onRetryAttempt?: (attempt: number, maxRetries: number, delay: number, error: any) => void
 }
@ -1928,6 +1930,58 @@ export const sambanovaModels = {
 	},
 } as const satisfies Record<string, ModelInfo>

+// Cerebras
+// https://inference-docs.cerebras.ai/api-reference/models
+export type CerebrasModelId = keyof typeof cerebrasModels
+export const cerebrasDefaultModelId: CerebrasModelId = "llama3.1-8b"
+export const cerebrasModels = {
+	"llama-4-scout-17b-16e-instruct": {
+		maxTokens: 8192,
+		contextWindow: 8192,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Fast inference model with ~2700 tokens/s",
+	},
+	"llama3.1-8b": {
+		maxTokens: 8192,
+		contextWindow: 8192,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Efficient model with ~2100 tokens/s",
+	},
+	"llama-3.3-70b": {
+		maxTokens: 8192,
+		contextWindow: 8192,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Powerful model with ~2600 tokens/s",
+	},
+	"qwen-3-32b": {
+		maxTokens: 16382,
+		contextWindow: 16382,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "SOTA coding performance with ~2500 tokens/s",
+	},
+	"deepseek-r1-distill-llama-70b": {
+		maxTokens: 8192,
+		contextWindow: 8192,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Advanced reasoning model with ~2300 tokens/s (private preview)",
+	},
+} as const satisfies Record<string, ModelInfo>
+
 // Requesty
 // https://requesty.ai/models
 export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@ -51,6 +51,8 @@ import {
 	nebiusDefaultModelId,
 	sambanovaModels,
 	sambanovaDefaultModelId,
+	cerebrasModels,
+	cerebrasDefaultModelId,
 	doubaoModels,
 	doubaoDefaultModelId,
 	liteLlmModelInfoSaneDefaults,
@ -329,6 +331,7 @@ const ApiOptions = ({
 					<VSCodeOption value="asksage">AskSage</VSCodeOption>
 					<VSCodeOption value="xai">xAI</VSCodeOption>
 					<VSCodeOption value="sambanova">SambaNova</VSCodeOption>
+					<VSCodeOption value="cerebras">Cerebras</VSCodeOption>
 				</VSCodeDropdown>
 			</DropdownContainer>

@ -2013,6 +2016,37 @@ const ApiOptions = ({
 				</div>
 			)}

+			{selectedProvider === "cerebras" && (
+				<div>
+					<VSCodeTextField
+						value={apiConfiguration?.cerebrasApiKey || ""}
+						style={{ width: "100%" }}
+						type="password"
+						onInput={handleInputChange("cerebrasApiKey")}
+						placeholder="Enter API Key...">
+						<span style={{ fontWeight: 500 }}>Cerebras API Key</span>
+					</VSCodeTextField>
+					<p
+						style={{
+							fontSize: "12px",
+							marginTop: 3,
+							color: "var(--vscode-descriptionForeground)",
+						}}>
+						This key is stored locally and only used to make API requests from this extension.
+						{!apiConfiguration?.cerebrasApiKey && (
+							<VSCodeLink
+								href="https://cloud.cerebras.ai/"
+								style={{
+									display: "inline",
+									fontSize: "inherit",
+								}}>
+								You can get a Cerebras API key by signing up here.
+							</VSCodeLink>
+						)}
+					</p>
+				</div>
+			)}
+
 			{apiErrorMessage && (
 				<p
 					style={{
@ -2130,6 +2164,7 @@ const ApiOptions = ({
 							{selectedProvider === "asksage" && createDropdown(askSageModels)}
 							{selectedProvider === "xai" && createDropdown(xaiModels)}
 							{selectedProvider === "sambanova" && createDropdown(sambanovaModels)}
+							{selectedProvider === "cerebras" && createDropdown(cerebrasModels)}
 							{selectedProvider === "nebius" && createDropdown(nebiusModels)}
 						</DropdownContainer>

@ -2565,6 +2600,8 @@ export function normalizeApiConfiguration(apiConfiguration?: ApiConfiguration):
 			return getProviderData(nebiusModels, nebiusDefaultModelId)
 		case "sambanova":
 			return getProviderData(sambanovaModels, sambanovaDefaultModelId)
+		case "cerebras":
+			return getProviderData(cerebrasModels, cerebrasDefaultModelId)
 		default:
 			return getProviderData(anthropicModels, anthropicDefaultModelId)
 	}