Added Cerebras as a Provider (#3810)

* Added Cerebras as a Provider

* prettier fix

* prettier

---------

Co-authored-by: sam <sam@MacBook-Air-3.local>
This commit is contained in:
Kevin Taylor 2025-05-26 20:06:15 -07:00 committed by GitHub
parent 2ca3e9ac82
commit 6fa819a170
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 339 additions and 3 deletions

View File

@ -0,0 +1,15 @@
---
"claude-dev": minor
---
Add Cerebras as a new API provider with comprehensive model support. Features include:
- **5 Cerebras models**: llama3.1-8b, llama-4-scout-17b-16e-instruct, llama-3.3-70b, qwen-3-32b, and deepseek-r1-distill-llama-70b
- **Native Cerebras SDK integration** using @cerebras/cerebras_cloud_sdk
- **Reasoning support** for Qwen and DeepSeek R1 Distill models with `<think>` tag handling
- **Streaming responses** with proper error handling and usage tracking
- **Cost calculation** and token counting
- **UI integration** with API key configuration and model selection
- **Free pricing** for all models (set to $0 input/output costs)
Users can now connect to Cerebras's high-performance inference API using their API key and access fast, efficient LLM services directly from within Cline.

View File

@ -164,6 +164,7 @@ Key providers include:
- **OpenRouter**: Meta-provider supporting multiple model providers
- **AWS Bedrock**: Integration with Amazon's AI services
- **Gemini**: Google's AI models
- **Cerebras**: High-performance inference with Llama, Qwen, and DeepSeek models
- **Ollama**: Local model hosting
- **LM Studio**: Local model hosting
- **VSCode LM**: VSCode's built-in language models

View File

@ -51,7 +51,7 @@ Thanks to [Claude 3.7 Sonnet's agentic coding capabilities](https://www.anthrop
### Use any API and Model
Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, GCP Vertex, and Cerebras. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
The extension also keeps track of total tokens and API usage cost for the entire task loop and individual requests, keeping you informed of spend every step of the way.

53
package-lock.json generated
View File

@ -1,12 +1,12 @@
{
"name": "claude-dev",
"version": "3.17.2",
"version": "3.17.5",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "claude-dev",
"version": "3.17.2",
"version": "3.17.5",
"license": "Apache-2.0",
"dependencies": {
"@anthropic-ai/bedrock-sdk": "^0.12.4",
@ -14,6 +14,7 @@
"@anthropic-ai/vertex-sdk": "^0.6.4",
"@aws-sdk/client-bedrock-runtime": "^3.758.0",
"@bufbuild/protobuf": "^2.2.5",
"@cerebras/cerebras_cloud_sdk": "^1.35.0",
"@google-cloud/vertexai": "^1.9.3",
"@google/genai": "^0.13.0",
"@grpc/grpc-js": "^1.9.15",
@ -4089,6 +4090,30 @@
"integrity": "sha512-/g5EzJifw5GF8aren8wZ/G5oMuPoGeS6MQD3ca8ddcvdXR5UELUfdTZITCGNhNXynY/AYl3Z4plmxdj/tRl/hQ==",
"license": "(Apache-2.0 AND BSD-3-Clause)"
},
"node_modules/@cerebras/cerebras_cloud_sdk": {
"version": "1.35.0",
"resolved": "https://registry.npmjs.org/@cerebras/cerebras_cloud_sdk/-/cerebras_cloud_sdk-1.35.0.tgz",
"integrity": "sha512-bQ6KYHmcvudHJ1aLzqkeETn3Y071/8/zpcZho6g4pKZ+VluHvLmIG0buhrwF9qJY5WSLmXR/s4pruxVRmfV7yQ==",
"license": "Apache-2.0",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
}
},
"node_modules/@cerebras/cerebras_cloud_sdk/node_modules/@types/node": {
"version": "18.19.103",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
"license": "MIT",
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/@changesets/apply-release-plan": {
"version": "7.0.8",
"resolved": "https://registry.npmjs.org/@changesets/apply-release-plan/-/apply-release-plan-7.0.8.tgz",
@ -28892,6 +28917,30 @@
"resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.2.5.tgz",
"integrity": "sha512-/g5EzJifw5GF8aren8wZ/G5oMuPoGeS6MQD3ca8ddcvdXR5UELUfdTZITCGNhNXynY/AYl3Z4plmxdj/tRl/hQ=="
},
"@cerebras/cerebras_cloud_sdk": {
"version": "1.35.0",
"resolved": "https://registry.npmjs.org/@cerebras/cerebras_cloud_sdk/-/cerebras_cloud_sdk-1.35.0.tgz",
"integrity": "sha512-bQ6KYHmcvudHJ1aLzqkeETn3Y071/8/zpcZho6g4pKZ+VluHvLmIG0buhrwF9qJY5WSLmXR/s4pruxVRmfV7yQ==",
"requires": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
},
"dependencies": {
"@types/node": {
"version": "18.19.103",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.103.tgz",
"integrity": "sha512-hHTHp+sEz6SxFsp+SA+Tqrua3AbmlAw+Y//aEwdHrdZkYVRWdvWD3y5uPZ0flYOkgskaFWqZ/YGFm3FaFQ0pRw==",
"requires": {
"undici-types": "~5.26.4"
}
}
}
},
"@changesets/apply-release-plan": {
"version": "7.0.8",
"resolved": "https://registry.npmjs.org/@changesets/apply-release-plan/-/apply-release-plan-7.0.8.tgz",

View File

@ -349,6 +349,7 @@
"@anthropic-ai/vertex-sdk": "^0.6.4",
"@aws-sdk/client-bedrock-runtime": "^3.758.0",
"@bufbuild/protobuf": "^2.2.5",
"@cerebras/cerebras_cloud_sdk": "^1.35.0",
"@google-cloud/vertexai": "^1.9.3",
"@google/genai": "^0.13.0",
"@grpc/grpc-js": "^1.9.15",

View File

@ -24,6 +24,7 @@ import { FireworksHandler } from "./providers/fireworks"
import { AskSageHandler } from "./providers/asksage"
import { XAIHandler } from "./providers/xai"
import { SambanovaHandler } from "./providers/sambanova"
import { CerebrasHandler } from "./providers/cerebras"
export interface ApiHandler {
createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
@ -84,6 +85,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
return new XAIHandler(options)
case "sambanova":
return new SambanovaHandler(options)
case "cerebras":
return new CerebrasHandler(options)
default:
return new AnthropicHandler(options)
}

View File

@ -0,0 +1,169 @@
import { Anthropic } from "@anthropic-ai/sdk"
import Cerebras from "@cerebras/cerebras_cloud_sdk"
import { withRetry } from "../retry"
import { ApiHandlerOptions, ModelInfo, CerebrasModelId, cerebrasDefaultModelId, cerebrasModels } from "@shared/api"
import { ApiHandler } from "../index"
import { ApiStream } from "@api/transform/stream"
export class CerebrasHandler implements ApiHandler {
private options: ApiHandlerOptions
private client: Cerebras
constructor(options: ApiHandlerOptions) {
this.options = options
// Clean and validate the API key
const cleanApiKey = this.options.cerebrasApiKey?.trim()
if (!cleanApiKey) {
throw new Error("Cerebras API key is required")
}
this.client = new Cerebras({
apiKey: cleanApiKey,
timeout: 30000, // 30 second timeout
})
}
@withRetry()
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
// Convert Anthropic messages to Cerebras format
const cerebrasMessages: Array<{
role: "system" | "user" | "assistant"
content: string
}> = [{ role: "system", content: systemPrompt }]
// Convert Anthropic messages to Cerebras format
for (const message of messages) {
if (message.role === "user") {
const content = Array.isArray(message.content)
? message.content
.map((block) => {
if (block.type === "text") {
return block.text
} else if (block.type === "image") {
return "[Image content not supported in Cerebras]"
}
return ""
})
.join("\n")
: message.content
cerebrasMessages.push({ role: "user", content })
} else if (message.role === "assistant") {
const content = Array.isArray(message.content)
? message.content
.map((block) => {
if (block.type === "text") {
return block.text
}
return ""
})
.join("\n")
: message.content || ""
cerebrasMessages.push({ role: "assistant", content })
}
}
try {
const stream = await this.client.chat.completions.create({
model: this.getModel().id,
messages: cerebrasMessages,
temperature: 0,
stream: true,
})
// Handle streaming response
let reasoning: string | null = null // Track reasoning content for models that support thinking
const modelId = this.getModel().id
const isReasoningModel = modelId.includes("qwen") || modelId.includes("deepseek-r1-distill")
for await (const chunk of stream as any) {
// Type assertion for the streaming chunk
const streamChunk = chunk as any
if (streamChunk.choices?.[0]?.delta?.content) {
const content = streamChunk.choices[0].delta.content
// Handle reasoning models (Qwen and DeepSeek R1 Distill) that use <think> tags
if (isReasoningModel) {
// Check if we're entering or continuing reasoning mode
if (reasoning || content.includes("<think>")) {
reasoning = (reasoning || "") + content
// Clean the content by removing think tags for display
let cleanContent = content.replace(/<think>/g, "").replace(/<\/think>/g, "")
// Only yield reasoning content if there's actual content after cleaning
if (cleanContent.trim()) {
yield {
type: "reasoning",
reasoning: cleanContent,
}
}
// Check if reasoning is complete
if (reasoning.includes("</think>")) {
reasoning = null
}
} else {
// Regular content outside of thinking tags
yield {
type: "text",
text: content,
}
}
} else {
// Non-reasoning models - just yield text content
yield {
type: "text",
text: content,
}
}
}
// Handle usage information from Cerebras API
// Usage is typically only available in the final chunk
if (streamChunk.usage) {
const totalCost = this.calculateCost({
inputTokens: streamChunk.usage.prompt_tokens || 0,
outputTokens: streamChunk.usage.completion_tokens || 0,
})
yield {
type: "usage",
inputTokens: streamChunk.usage.prompt_tokens || 0,
outputTokens: streamChunk.usage.completion_tokens || 0,
cacheReadTokens: 0,
cacheWriteTokens: 0,
totalCost,
}
}
}
} catch (error) {
throw error
}
}
getModel(): { id: string; info: ModelInfo } {
const modelId = this.options.apiModelId
if (modelId && modelId in cerebrasModels) {
const id = modelId as CerebrasModelId
return { id, info: cerebrasModels[id] }
}
return {
id: cerebrasDefaultModelId,
info: cerebrasModels[cerebrasDefaultModelId],
}
}
private calculateCost({ inputTokens, outputTokens }: { inputTokens: number; outputTokens: number }): number {
const model = this.getModel()
const inputPrice = model.info.inputPrice || 0
const outputPrice = model.info.outputPrice || 0
const inputCost = (inputPrice / 1_000_000) * inputTokens
const outputCost = (outputPrice / 1_000_000) * outputTokens
return inputCost + outputCost
}
}

View File

@ -21,6 +21,7 @@ export type SecretKey =
| "xaiApiKey"
| "nebiusApiKey"
| "sambanovaApiKey"
| "cerebrasApiKey"
export type GlobalStateKey =
| "apiProvider"

View File

@ -155,6 +155,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
thinkingBudgetTokens,
reasoningEffort,
sambanovaApiKey,
cerebrasApiKey,
nebiusApiKey,
planActSeparateModelsSettingRaw,
favoritedModelIds,
@ -244,6 +245,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
getGlobalState(context, "thinkingBudgetTokens") as Promise<number | undefined>,
getGlobalState(context, "reasoningEffort") as Promise<string | undefined>,
getSecret(context, "sambanovaApiKey") as Promise<string | undefined>,
getSecret(context, "cerebrasApiKey") as Promise<string | undefined>,
getSecret(context, "nebiusApiKey") as Promise<string | undefined>,
getGlobalState(context, "planActSeparateModelsSetting") as Promise<boolean | undefined>,
getGlobalState(context, "favoritedModelIds") as Promise<string[] | undefined>,
@ -357,6 +359,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
asksageApiUrl,
xaiApiKey,
sambanovaApiKey,
cerebrasApiKey,
nebiusApiKey,
favoritedModelIds,
requestTimeoutMs,
@ -451,6 +454,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
reasoningEffort,
clineApiKey,
sambanovaApiKey,
cerebrasApiKey,
nebiusApiKey,
favoritedModelIds,
} = apiConfiguration
@ -512,6 +516,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
await updateGlobalState(context, "reasoningEffort", reasoningEffort)
await storeSecret(context, "clineApiKey", clineApiKey)
await storeSecret(context, "sambanovaApiKey", sambanovaApiKey)
await storeSecret(context, "cerebrasApiKey", cerebrasApiKey)
await storeSecret(context, "nebiusApiKey", nebiusApiKey)
await updateGlobalState(context, "favoritedModelIds", favoritedModelIds)
await updateGlobalState(context, "requestTimeoutMs", apiConfiguration.requestTimeoutMs)
@ -542,6 +547,7 @@ export async function resetExtensionState(context: vscode.ExtensionContext) {
"asksageApiKey",
"xaiApiKey",
"sambanovaApiKey",
"cerebrasApiKey",
"nebiusApiKey",
]
for (const key of secretKeys) {

View File

@ -24,6 +24,7 @@ export type ApiProvider =
| "asksage"
| "xai"
| "sambanova"
| "cerebras"
export interface ApiHandlerOptions {
apiModelId?: string
@ -89,6 +90,7 @@ export interface ApiHandlerOptions {
thinkingBudgetTokens?: number
reasoningEffort?: string
sambanovaApiKey?: string
cerebrasApiKey?: string
requestTimeoutMs?: number
onRetryAttempt?: (attempt: number, maxRetries: number, delay: number, error: any) => void
}
@ -1928,6 +1930,58 @@ export const sambanovaModels = {
},
} as const satisfies Record<string, ModelInfo>
// Cerebras
// https://inference-docs.cerebras.ai/api-reference/models
export type CerebrasModelId = keyof typeof cerebrasModels
export const cerebrasDefaultModelId: CerebrasModelId = "llama3.1-8b"
export const cerebrasModels = {
"llama-4-scout-17b-16e-instruct": {
maxTokens: 8192,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Fast inference model with ~2700 tokens/s",
},
"llama3.1-8b": {
maxTokens: 8192,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Efficient model with ~2100 tokens/s",
},
"llama-3.3-70b": {
maxTokens: 8192,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Powerful model with ~2600 tokens/s",
},
"qwen-3-32b": {
maxTokens: 16382,
contextWindow: 16382,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "SOTA coding performance with ~2500 tokens/s",
},
"deepseek-r1-distill-llama-70b": {
maxTokens: 8192,
contextWindow: 8192,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Advanced reasoning model with ~2300 tokens/s (private preview)",
},
} as const satisfies Record<string, ModelInfo>
// Requesty
// https://requesty.ai/models
export const requestyDefaultModelId = "anthropic/claude-3-7-sonnet-latest"

View File

@ -51,6 +51,8 @@ import {
nebiusDefaultModelId,
sambanovaModels,
sambanovaDefaultModelId,
cerebrasModels,
cerebrasDefaultModelId,
doubaoModels,
doubaoDefaultModelId,
liteLlmModelInfoSaneDefaults,
@ -329,6 +331,7 @@ const ApiOptions = ({
<VSCodeOption value="asksage">AskSage</VSCodeOption>
<VSCodeOption value="xai">xAI</VSCodeOption>
<VSCodeOption value="sambanova">SambaNova</VSCodeOption>
<VSCodeOption value="cerebras">Cerebras</VSCodeOption>
</VSCodeDropdown>
</DropdownContainer>
@ -2013,6 +2016,37 @@ const ApiOptions = ({
</div>
)}
{selectedProvider === "cerebras" && (
<div>
<VSCodeTextField
value={apiConfiguration?.cerebrasApiKey || ""}
style={{ width: "100%" }}
type="password"
onInput={handleInputChange("cerebrasApiKey")}
placeholder="Enter API Key...">
<span style={{ fontWeight: 500 }}>Cerebras API Key</span>
</VSCodeTextField>
<p
style={{
fontSize: "12px",
marginTop: 3,
color: "var(--vscode-descriptionForeground)",
}}>
This key is stored locally and only used to make API requests from this extension.
{!apiConfiguration?.cerebrasApiKey && (
<VSCodeLink
href="https://cloud.cerebras.ai/"
style={{
display: "inline",
fontSize: "inherit",
}}>
You can get a Cerebras API key by signing up here.
</VSCodeLink>
)}
</p>
</div>
)}
{apiErrorMessage && (
<p
style={{
@ -2130,6 +2164,7 @@ const ApiOptions = ({
{selectedProvider === "asksage" && createDropdown(askSageModels)}
{selectedProvider === "xai" && createDropdown(xaiModels)}
{selectedProvider === "sambanova" && createDropdown(sambanovaModels)}
{selectedProvider === "cerebras" && createDropdown(cerebrasModels)}
{selectedProvider === "nebius" && createDropdown(nebiusModels)}
</DropdownContainer>
@ -2565,6 +2600,8 @@ export function normalizeApiConfiguration(apiConfiguration?: ApiConfiguration):
return getProviderData(nebiusModels, nebiusDefaultModelId)
case "sambanova":
return getProviderData(sambanovaModels, sambanovaDefaultModelId)
case "cerebras":
return getProviderData(cerebrasModels, cerebrasDefaultModelId)
default:
return getProviderData(anthropicModels, anthropicDefaultModelId)
}