mirror of
https://github.com/cline/cline.git
synced 2025-06-03 03:59:07 +00:00
Fix deepseek/openai pricing and token counting (#2646)
* Fix deepseek/openai pricing and token counting * Create ten-zebras-tie.md
This commit is contained in:
parent
8310a3dc23
commit
cac0309579
5
.changeset/ten-zebras-tie.md
Normal file
5
.changeset/ten-zebras-tie.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
"claude-dev": patch
|
||||||
|
---
|
||||||
|
|
||||||
|
Fix issue with DeepSeek API provider token counting + context management
|
@ -36,14 +36,15 @@ export class DeepSeekHandler implements ApiHandler {
|
|||||||
}
|
}
|
||||||
const deepUsage = usage as DeepSeekUsage
|
const deepUsage = usage as DeepSeekUsage
|
||||||
|
|
||||||
const inputTokens = deepUsage?.prompt_tokens || 0
|
const inputTokens = deepUsage?.prompt_tokens || 0 // sum of cache hits and misses
|
||||||
const outputTokens = deepUsage?.completion_tokens || 0
|
const outputTokens = deepUsage?.completion_tokens || 0
|
||||||
const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
|
const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
|
||||||
const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
|
const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
|
||||||
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
||||||
|
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens) // this will always be 0
|
||||||
yield {
|
yield {
|
||||||
type: "usage",
|
type: "usage",
|
||||||
inputTokens: inputTokens,
|
inputTokens: nonCachedInputTokens,
|
||||||
outputTokens: outputTokens,
|
outputTokens: outputTokens,
|
||||||
cacheWriteTokens: cacheWriteTokens,
|
cacheWriteTokens: cacheWriteTokens,
|
||||||
cacheReadTokens: cacheReadTokens,
|
cacheReadTokens: cacheReadTokens,
|
||||||
|
@ -26,14 +26,15 @@ export class OpenAiNativeHandler implements ApiHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
|
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
|
||||||
const inputTokens = usage?.prompt_tokens || 0
|
const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
|
||||||
const outputTokens = usage?.completion_tokens || 0
|
const outputTokens = usage?.completion_tokens || 0
|
||||||
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
|
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
|
||||||
const cacheWriteTokens = 0
|
const cacheWriteTokens = 0
|
||||||
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
|
||||||
|
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
|
||||||
yield {
|
yield {
|
||||||
type: "usage",
|
type: "usage",
|
||||||
inputTokens: inputTokens,
|
inputTokens: nonCachedInputTokens,
|
||||||
outputTokens: outputTokens,
|
outputTokens: outputTokens,
|
||||||
cacheWriteTokens: cacheWriteTokens,
|
cacheWriteTokens: cacheWriteTokens,
|
||||||
cacheReadTokens: cacheReadTokens,
|
cacheReadTokens: cacheReadTokens,
|
||||||
|
@ -648,8 +648,8 @@ export const deepSeekModels = {
|
|||||||
maxTokens: 8_000,
|
maxTokens: 8_000,
|
||||||
contextWindow: 64_000,
|
contextWindow: 64_000,
|
||||||
supportsImages: false,
|
supportsImages: false,
|
||||||
supportsPromptCache: true,
|
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
||||||
inputPrice: 0.27,
|
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this). Input is the sum of cache reads and writes
|
||||||
outputPrice: 1.1,
|
outputPrice: 1.1,
|
||||||
cacheWritesPrice: 0.27,
|
cacheWritesPrice: 0.27,
|
||||||
cacheReadsPrice: 0.07,
|
cacheReadsPrice: 0.07,
|
||||||
@ -658,8 +658,8 @@ export const deepSeekModels = {
|
|||||||
maxTokens: 8_000,
|
maxTokens: 8_000,
|
||||||
contextWindow: 64_000,
|
contextWindow: 64_000,
|
||||||
supportsImages: false,
|
supportsImages: false,
|
||||||
supportsPromptCache: true,
|
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
|
||||||
inputPrice: 0.55,
|
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
|
||||||
outputPrice: 2.19,
|
outputPrice: 2.19,
|
||||||
cacheWritesPrice: 0.55,
|
cacheWritesPrice: 0.55,
|
||||||
cacheReadsPrice: 0.14,
|
cacheReadsPrice: 0.14,
|
||||||
|
Loading…
Reference in New Issue
Block a user