Fix deepseek/openai pricing and token counting (#2646)

* Fix deepseek/openai pricing and token counting

* Create ten-zebras-tie.md
This commit is contained in:
Saoud Rizwan 2025-04-02 22:41:09 -07:00 committed by GitHub
parent 8310a3dc23
commit cac0309579
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 15 additions and 8 deletions

View File

@ -0,0 +1,5 @@
---
"claude-dev": patch
---
Fix issue with DeepSeek API provider token counting + context management

View File

@ -36,14 +36,15 @@ export class DeepSeekHandler implements ApiHandler {
}
const deepUsage = usage as DeepSeekUsage
const inputTokens = deepUsage?.prompt_tokens || 0
const inputTokens = deepUsage?.prompt_tokens || 0 // sum of cache hits and misses
const outputTokens = deepUsage?.completion_tokens || 0
const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens) // this will always be 0
yield {
type: "usage",
inputTokens: inputTokens,
inputTokens: nonCachedInputTokens,
outputTokens: outputTokens,
cacheWriteTokens: cacheWriteTokens,
cacheReadTokens: cacheReadTokens,

View File

@ -26,14 +26,15 @@ export class OpenAiNativeHandler implements ApiHandler {
}
private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
const inputTokens = usage?.prompt_tokens || 0
const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
const outputTokens = usage?.completion_tokens || 0
const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
const cacheWriteTokens = 0
const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
yield {
type: "usage",
inputTokens: inputTokens,
inputTokens: nonCachedInputTokens,
outputTokens: outputTokens,
cacheWriteTokens: cacheWriteTokens,
cacheReadTokens: cacheReadTokens,

View File

@ -648,8 +648,8 @@ export const deepSeekModels = {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.27,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this). Input is the sum of cache reads and writes
outputPrice: 1.1,
cacheWritesPrice: 0.27,
cacheReadsPrice: 0.07,
@ -658,8 +658,8 @@ export const deepSeekModels = {
maxTokens: 8_000,
contextWindow: 64_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 0.55,
supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
outputPrice: 2.19,
cacheWritesPrice: 0.55,
cacheReadsPrice: 0.14,