mirror of
https://github.com/cline/cline.git
synced 2025-06-03 03:59:07 +00:00
Fix deepseek/openai pricing and token counting (#2646)
* Fix deepseek/openai pricing and token counting * Create ten-zebras-tie.md
This commit is contained in:
parent
8310a3dc23
commit
cac0309579
5
.changeset/ten-zebras-tie.md
Normal file
5
.changeset/ten-zebras-tie.md
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
"claude-dev": patch
|
||||
---
|
||||
|
||||
Fix issue with DeepSeek API provider token counting + context management
|
@ -36,14 +36,15 @@ export class DeepSeekHandler implements ApiHandler {
|
||||
}
|
||||
const deepUsage = usage as DeepSeekUsage
|
||||
|
||||
const inputTokens = deepUsage?.prompt_tokens || 0
|
||||
const inputTokens = deepUsage?.prompt_tokens || 0 // sum of cache hits and misses
|
||||
const outputTokens = deepUsage?.completion_tokens || 0
|
||||
const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
|
||||
const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
|
||||
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
||||
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens) // this will always be 0
|
||||
yield {
|
||||
type: "usage",
|
||||
inputTokens: inputTokens,
|
||||
inputTokens: nonCachedInputTokens,
|
||||
outputTokens: outputTokens,
|
||||
cacheWriteTokens: cacheWriteTokens,
|
||||
cacheReadTokens: cacheReadTokens,
|
||||
|
@ -26,14 +26,15 @@ export class OpenAiNativeHandler implements ApiHandler {
|
||||
}
|
||||
|
||||
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
|
||||
const inputTokens = usage?.prompt_tokens || 0
|
||||
const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
|
||||
const outputTokens = usage?.completion_tokens || 0
|
||||
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
|
||||
const cacheWriteTokens = 0
|
||||
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
||||
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
|
||||
yield {
|
||||
type: "usage",
|
||||
inputTokens: inputTokens,
|
||||
inputTokens: nonCachedInputTokens,
|
||||
outputTokens: outputTokens,
|
||||
cacheWriteTokens: cacheWriteTokens,
|
||||
cacheReadTokens: cacheReadTokens,
|
||||
|
@ -648,8 +648,8 @@ export const deepSeekModels = {
|
||||
maxTokens: 8_000,
|
||||
contextWindow: 64_000,
|
||||
supportsImages: false,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 0.27,
|
||||
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
||||
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this). Input is the sum of cache reads and writes
|
||||
outputPrice: 1.1,
|
||||
cacheWritesPrice: 0.27,
|
||||
cacheReadsPrice: 0.07,
|
||||
@ -658,8 +658,8 @@ export const deepSeekModels = {
|
||||
maxTokens: 8_000,
|
||||
contextWindow: 64_000,
|
||||
supportsImages: false,
|
||||
supportsPromptCache: true,
|
||||
inputPrice: 0.55,
|
||||
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
||||
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
||||
outputPrice: 2.19,
|
||||
cacheWritesPrice: 0.55,
|
||||
cacheReadsPrice: 0.14,
|
||||
|
Loading…
Reference in New Issue
Block a user