move remaining context management out of Cline.ts (#2367)

* move context management out

* changeset
This commit is contained in:
Toshii 2025-03-24 16:49:11 -07:00 committed by GitHub
parent 4a0a40ead1
commit ec01e1f19d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 90 additions and 56 deletions

View File

@ -0,0 +1,5 @@
---
"claude-dev": patch
---
updated move context management out of cline

View File

@ -1373,58 +1373,20 @@ export class Cline {
}) })
} }
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request const contextManagementMetadata = this.contextManager.getNewContextMessagesAndMetadata(
if (previousApiReqIndex >= 0) {
const previousRequest = this.clineMessages[previousApiReqIndex]
if (previousRequest && previousRequest.text) {
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
let contextWindow = this.api.getModel().info.contextWindow || 128_000
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
contextWindow = 64_000
}
let maxAllowedSize: number
switch (contextWindow) {
case 64_000: // deepseek models
maxAllowedSize = contextWindow - 27_000
break
case 128_000: // most models
maxAllowedSize = contextWindow - 30_000
break
case 200_000: // claude models
maxAllowedSize = contextWindow - 40_000
break
default:
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
}
// This is the most reliable way to know when we're close to hitting the context window.
if (totalTokens >= maxAllowedSize) {
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
this.conversationHistoryDeletedRange = this.contextManager.getNextTruncationRange(
this.apiConversationHistory, this.apiConversationHistory,
this.clineMessages,
this.api,
this.conversationHistoryDeletedRange, this.conversationHistoryDeletedRange,
keep, previousApiReqIndex,
) )
if (contextManagementMetadata.updatedConversationHistoryDeletedRange) {
this.conversationHistoryDeletedRange = contextManagementMetadata.conversationHistoryDeletedRange
await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
// await this.overwriteApiConversationHistory(truncatedMessages)
}
}
} }
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache let stream = this.api.createMessage(systemPrompt, contextManagementMetadata.truncatedConversationHistory)
const truncatedConversationHistory = this.contextManager.getTruncatedMessages(
this.apiConversationHistory,
this.conversationHistoryDeletedRange,
)
let stream = this.api.createMessage(systemPrompt, truncatedConversationHistory)
const iterator = stream[Symbol.asyncIterator]() const iterator = stream[Symbol.asyncIterator]()

View File

@ -1,10 +1,77 @@
import { Anthropic } from "@anthropic-ai/sdk" import { Anthropic } from "@anthropic-ai/sdk"
import { ClineApiReqInfo, ClineMessage } from "../../shared/ExtensionMessage"
import { ApiHandler } from "../../api"
import { OpenAiHandler } from "../../api/providers/openai"
export class ContextManager { export class ContextManager {
getNextTruncationRange( getNewContextMessagesAndMetadata(
messages: Anthropic.Messages.MessageParam[], apiConversationHistory: Anthropic.Messages.MessageParam[],
currentDeletedRange: [number, number] | undefined = undefined, clineMessages: ClineMessage[],
keep: "half" | "quarter" = "half", api: ApiHandler,
conversationHistoryDeletedRange: [number, number] | undefined,
previousApiReqIndex: number,
) {
let updatedConversationHistoryDeletedRange = false
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
if (previousApiReqIndex >= 0) {
const previousRequest = clineMessages[previousApiReqIndex]
if (previousRequest && previousRequest.text) {
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
let contextWindow = api.getModel().info.contextWindow || 128_000
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
if (api instanceof OpenAiHandler && api.getModel().id.toLowerCase().includes("deepseek")) {
contextWindow = 64_000
}
let maxAllowedSize: number
switch (contextWindow) {
case 64_000: // deepseek models
maxAllowedSize = contextWindow - 27_000
break
case 128_000: // most models
maxAllowedSize = contextWindow - 30_000
break
case 200_000: // claude models
maxAllowedSize = contextWindow - 40_000
break
default:
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
}
// This is the most reliable way to know when we're close to hitting the context window.
if (totalTokens >= maxAllowedSize) {
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
conversationHistoryDeletedRange = this.getNextTruncationRange(
apiConversationHistory,
conversationHistoryDeletedRange,
keep,
)
updatedConversationHistoryDeletedRange = true
}
}
}
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
const truncatedConversationHistory = this.getTruncatedMessages(apiConversationHistory, conversationHistoryDeletedRange)
return {
conversationHistoryDeletedRange: conversationHistoryDeletedRange,
updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
truncatedConversationHistory: truncatedConversationHistory,
}
}
public getNextTruncationRange(
apiMessages: Anthropic.Messages.MessageParam[],
currentDeletedRange: [number, number] | undefined,
keep: "half" | "quarter",
): [number, number] { ): [number, number] {
// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm) // Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
const rangeStartIndex = 1 const rangeStartIndex = 1
@ -16,20 +83,20 @@ export class ContextManager {
// We first calculate half of the messages then divide by 2 to get the number of pairs. // We first calculate half of the messages then divide by 2 to get the number of pairs.
// After flooring, we multiply by 2 to get the number of messages. // After flooring, we multiply by 2 to get the number of messages.
// Note that this will also always be an even number. // Note that this will also always be an even number.
messagesToRemove = Math.floor((messages.length - startOfRest) / 4) * 2 // Keep even number messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
} else { } else {
// Remove 3/4 of remaining user-assistant pairs // Remove 3/4 of remaining user-assistant pairs
// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs. // We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
// After flooring, we multiply by 2 to get the number of messages. // After flooring, we multiply by 2 to get the number of messages.
// Note that this will also always be an even number. // Note that this will also always be an even number.
messagesToRemove = Math.floor(((messages.length - startOfRest) * 3) / 4 / 2) * 2 messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
} }
let rangeEndIndex = startOfRest + messagesToRemove - 1 let rangeEndIndex = startOfRest + messagesToRemove - 1
// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure. // Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline) // NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
if (messages[rangeEndIndex].role !== "user") { if (apiMessages[rangeEndIndex].role !== "user") {
rangeEndIndex -= 1 rangeEndIndex -= 1
} }
@ -37,7 +104,7 @@ export class ContextManager {
return [rangeStartIndex, rangeEndIndex] return [rangeStartIndex, rangeEndIndex]
} }
getTruncatedMessages( public getTruncatedMessages(
messages: Anthropic.Messages.MessageParam[], messages: Anthropic.Messages.MessageParam[],
deletedRange: [number, number] | undefined, deletedRange: [number, number] | undefined,
): Anthropic.Messages.MessageParam[] { ): Anthropic.Messages.MessageParam[] {