Fix deepseek/openai pricing and token counting (#2646)

* Fix deepseek/openai pricing and token counting * Create ten-zebras-tie.md
2025-06-03 03:59:07 +00:00 · 2025-04-02 22:41:09 -07:00 · 2025-04-02 22:41:09 -07:00 · cac0309579
commit cac0309579
parent 8310a3dc23
4 changed files with 15 additions and 8 deletions
--- a/.changeset/ten-zebras-tie.md
+++ b/.changeset/ten-zebras-tie.md
@ -0,0 +1,5 @@
+---
+"claude-dev": patch
+---
+
+Fix issue with DeepSeek API provider token counting + context management
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@ -36,14 +36,15 @@ export class DeepSeekHandler implements ApiHandler {
 		}
 		const deepUsage = usage as DeepSeekUsage

-		const inputTokens = deepUsage?.prompt_tokens || 0
+		const inputTokens = deepUsage?.prompt_tokens || 0 // sum of cache hits and misses
 		const outputTokens = deepUsage?.completion_tokens || 0
 		const cacheReadTokens = deepUsage?.prompt_cache_hit_tokens || 0
 		const cacheWriteTokens = deepUsage?.prompt_cache_miss_tokens || 0
 		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+		const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens) // this will always be 0
 		yield {
 			type: "usage",
-			inputTokens: inputTokens,
+			inputTokens: nonCachedInputTokens,
 			outputTokens: outputTokens,
 			cacheWriteTokens: cacheWriteTokens,
 			cacheReadTokens: cacheReadTokens,
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@ -26,14 +26,15 @@ export class OpenAiNativeHandler implements ApiHandler {
 	}

 	private async *yieldUsage(info: ModelInfo, usage: OpenAI.Completions.CompletionUsage | undefined): ApiStream {
-		const inputTokens = usage?.prompt_tokens || 0
+		const inputTokens = usage?.prompt_tokens || 0 // sum of cache hits and misses
 		const outputTokens = usage?.completion_tokens || 0
 		const cacheReadTokens = usage?.prompt_tokens_details?.cached_tokens || 0
 		const cacheWriteTokens = 0
 		const totalCost = calculateApiCostOpenAI(info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
+		const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadTokens - cacheWriteTokens)
 		yield {
 			type: "usage",
-			inputTokens: inputTokens,
+			inputTokens: nonCachedInputTokens,
 			outputTokens: outputTokens,
 			cacheWriteTokens: cacheWriteTokens,
 			cacheReadTokens: cacheReadTokens,
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@ -648,8 +648,8 @@ export const deepSeekModels = {
 		maxTokens: 8_000,
 		contextWindow: 64_000,
 		supportsImages: false,
-		supportsPromptCache: true,
-		inputPrice: 0.27,
+		supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
+		inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this). Input is the sum of cache reads and writes
 		outputPrice: 1.1,
 		cacheWritesPrice: 0.27,
 		cacheReadsPrice: 0.07,
@ -658,8 +658,8 @@ export const deepSeekModels = {
 		maxTokens: 8_000,
 		contextWindow: 64_000,
 		supportsImages: false,
-		supportsPromptCache: true,
-		inputPrice: 0.55,
+		supportsPromptCache: true, // supports context caching, but not in the way anthropic does it (deepseek reports input tokens and reads/writes in the same usage report) FIXME: we need to show users cache stats how deepseek does it
+		inputPrice: 0, // technically there is no input price, it's all either a cache hit or miss (ApiOptions will not show this)
 		outputPrice: 2.19,
 		cacheWritesPrice: 0.55,
 		cacheReadsPrice: 0.14,