move remaining context management out of Cline.ts (#2367)

* move context management out * changeset
2025-06-03 03:59:07 +00:00 · 2025-03-24 16:49:11 -07:00 · 2025-03-24 16:49:11 -07:00 · ec01e1f19d
commit ec01e1f19d
parent 4a0a40ead1
3 changed files with 90 additions and 56 deletions
--- a/.changeset/nice-boats-kick.md
+++ b/.changeset/nice-boats-kick.md
@ -0,0 +1,5 @@
 ---
 "claude-dev": patch
 ---
 updated move context management out of cline
--- a/src/core/Cline.ts
+++ b/src/core/Cline.ts
@ -1373,58 +1373,20 @@ export class Cline {
 			})
 		}
-		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
+		const contextManagementMetadata = this.contextManager.getNewContextMessagesAndMetadata(
 		if (previousApiReqIndex >= 0) {
 			const previousRequest = this.clineMessages[previousApiReqIndex]
 			if (previousRequest && previousRequest.text) {
 				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
 				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
 				let contextWindow = this.api.getModel().info.contextWindow || 128_000
 				// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
 				if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
 					contextWindow = 64_000
 				}
 				let maxAllowedSize: number
 				switch (contextWindow) {
 					case 64_000: // deepseek models
 						maxAllowedSize = contextWindow - 27_000
 						break
 					case 128_000: // most models
 						maxAllowedSize = contextWindow - 30_000
 						break
 					case 200_000: // claude models
 						maxAllowedSize = contextWindow - 40_000
 						break
 					default:
 						maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
 				}
 				// This is the most reliable way to know when we're close to hitting the context window.
 				if (totalTokens >= maxAllowedSize) {
 					// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
 					// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
 					// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
 					const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
 					// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
 					this.conversationHistoryDeletedRange = this.contextManager.getNextTruncationRange(
 			this.apiConversationHistory,
 			this.clineMessages,
 			this.api,
 			this.conversationHistoryDeletedRange,
-						keep,
+			previousApiReqIndex,
 		)
 		if (contextManagementMetadata.updatedConversationHistoryDeletedRange) {
 			this.conversationHistoryDeletedRange = contextManagementMetadata.conversationHistoryDeletedRange
 			await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
 					// await this.overwriteApiConversationHistory(truncatedMessages)
 				}
 			}
 		}
-		// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
+		let stream = this.api.createMessage(systemPrompt, contextManagementMetadata.truncatedConversationHistory)
 		const truncatedConversationHistory = this.contextManager.getTruncatedMessages(
 			this.apiConversationHistory,
 			this.conversationHistoryDeletedRange,
 		)
 		let stream = this.api.createMessage(systemPrompt, truncatedConversationHistory)
 		const iterator = stream[Symbol.asyncIterator]()
--- a/src/core/context-management/ContextManager.ts
+++ b/src/core/context-management/ContextManager.ts
@ -1,10 +1,77 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { ClineApiReqInfo, ClineMessage } from "../../shared/ExtensionMessage"
 import { ApiHandler } from "../../api"
 import { OpenAiHandler } from "../../api/providers/openai"
 export class ContextManager {
-	getNextTruncationRange(
+	getNewContextMessagesAndMetadata(
-		messages: Anthropic.Messages.MessageParam[],
+		apiConversationHistory: Anthropic.Messages.MessageParam[],
-		currentDeletedRange: [number, number] | undefined = undefined,
+		clineMessages: ClineMessage[],
-		keep: "half" | "quarter" = "half",
+		api: ApiHandler,
 		conversationHistoryDeletedRange: [number, number] | undefined,
 		previousApiReqIndex: number,
 	) {
 		let updatedConversationHistoryDeletedRange = false
 		// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
 		if (previousApiReqIndex >= 0) {
 			const previousRequest = clineMessages[previousApiReqIndex]
 			if (previousRequest && previousRequest.text) {
 				const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
 				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
 				let contextWindow = api.getModel().info.contextWindow || 128_000
 				// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
 				if (api instanceof OpenAiHandler && api.getModel().id.toLowerCase().includes("deepseek")) {
 					contextWindow = 64_000
 				}
 				let maxAllowedSize: number
 				switch (contextWindow) {
 					case 64_000: // deepseek models
 						maxAllowedSize = contextWindow - 27_000
 						break
 					case 128_000: // most models
 						maxAllowedSize = contextWindow - 30_000
 						break
 					case 200_000: // claude models
 						maxAllowedSize = contextWindow - 40_000
 						break
 					default:
 						maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
 				}
 				// This is the most reliable way to know when we're close to hitting the context window.
 				if (totalTokens >= maxAllowedSize) {
 					// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
 					// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
 					// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
 					const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
 					// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
 					conversationHistoryDeletedRange = this.getNextTruncationRange(
 						apiConversationHistory,
 						conversationHistoryDeletedRange,
 						keep,
 					)
 					updatedConversationHistoryDeletedRange = true
 				}
 			}
 		}
 		// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
 		const truncatedConversationHistory = this.getTruncatedMessages(apiConversationHistory, conversationHistoryDeletedRange)
 		return {
 			conversationHistoryDeletedRange: conversationHistoryDeletedRange,
 			updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
 			truncatedConversationHistory: truncatedConversationHistory,
 		}
 	}
 	public getNextTruncationRange(
 		apiMessages: Anthropic.Messages.MessageParam[],
 		currentDeletedRange: [number, number] | undefined,
 		keep: "half" | "quarter",
 	): [number, number] {
 		// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
 		const rangeStartIndex = 1
@ -16,20 +83,20 @@ export class ContextManager {
 			// We first calculate half of the messages then divide by 2 to get the number of pairs.
 			// After flooring, we multiply by 2 to get the number of messages.
 			// Note that this will also always be an even number.
-			messagesToRemove = Math.floor((messages.length - startOfRest) / 4) * 2 // Keep even number
+			messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
 		} else {
 			// Remove 3/4 of remaining user-assistant pairs
 			// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
 			// After flooring, we multiply by 2 to get the number of messages.
 			// Note that this will also always be an even number.
-			messagesToRemove = Math.floor(((messages.length - startOfRest) * 3) / 4 / 2) * 2
+			messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
 		}
 		let rangeEndIndex = startOfRest + messagesToRemove - 1
 		// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
 		// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
-		if (messages[rangeEndIndex].role !== "user") {
+		if (apiMessages[rangeEndIndex].role !== "user") {
 			rangeEndIndex -= 1
 		}
@ -37,7 +104,7 @@ export class ContextManager {
 		return [rangeStartIndex, rangeEndIndex]
 	}
-	getTruncatedMessages(
+	public getTruncatedMessages(
 		messages: Anthropic.Messages.MessageParam[],
 		deletedRange: [number, number] | undefined,
 	): Anthropic.Messages.MessageParam[] {