mirror of
https://github.com/cline/cline.git
synced 2025-06-03 03:59:07 +00:00
move remaining context management out of Cline.ts (#2367)
* move context management out * changeset
This commit is contained in:
parent
4a0a40ead1
commit
ec01e1f19d
5
.changeset/nice-boats-kick.md
Normal file
5
.changeset/nice-boats-kick.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
"claude-dev": patch
|
||||||
|
---
|
||||||
|
|
||||||
|
updated move context management out of cline
|
@ -1373,58 +1373,20 @@ export class Cline {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
|
const contextManagementMetadata = this.contextManager.getNewContextMessagesAndMetadata(
|
||||||
if (previousApiReqIndex >= 0) {
|
|
||||||
const previousRequest = this.clineMessages[previousApiReqIndex]
|
|
||||||
if (previousRequest && previousRequest.text) {
|
|
||||||
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
|
|
||||||
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
|
|
||||||
let contextWindow = this.api.getModel().info.contextWindow || 128_000
|
|
||||||
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
|
|
||||||
if (this.api instanceof OpenAiHandler && this.api.getModel().id.toLowerCase().includes("deepseek")) {
|
|
||||||
contextWindow = 64_000
|
|
||||||
}
|
|
||||||
let maxAllowedSize: number
|
|
||||||
switch (contextWindow) {
|
|
||||||
case 64_000: // deepseek models
|
|
||||||
maxAllowedSize = contextWindow - 27_000
|
|
||||||
break
|
|
||||||
case 128_000: // most models
|
|
||||||
maxAllowedSize = contextWindow - 30_000
|
|
||||||
break
|
|
||||||
case 200_000: // claude models
|
|
||||||
maxAllowedSize = contextWindow - 40_000
|
|
||||||
break
|
|
||||||
default:
|
|
||||||
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is the most reliable way to know when we're close to hitting the context window.
|
|
||||||
if (totalTokens >= maxAllowedSize) {
|
|
||||||
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
|
|
||||||
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
|
|
||||||
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
|
|
||||||
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
|
|
||||||
|
|
||||||
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
|
|
||||||
this.conversationHistoryDeletedRange = this.contextManager.getNextTruncationRange(
|
|
||||||
this.apiConversationHistory,
|
|
||||||
this.conversationHistoryDeletedRange,
|
|
||||||
keep,
|
|
||||||
)
|
|
||||||
await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
|
|
||||||
// await this.overwriteApiConversationHistory(truncatedMessages)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
|
|
||||||
const truncatedConversationHistory = this.contextManager.getTruncatedMessages(
|
|
||||||
this.apiConversationHistory,
|
this.apiConversationHistory,
|
||||||
|
this.clineMessages,
|
||||||
|
this.api,
|
||||||
this.conversationHistoryDeletedRange,
|
this.conversationHistoryDeletedRange,
|
||||||
|
previousApiReqIndex,
|
||||||
)
|
)
|
||||||
|
|
||||||
let stream = this.api.createMessage(systemPrompt, truncatedConversationHistory)
|
if (contextManagementMetadata.updatedConversationHistoryDeletedRange) {
|
||||||
|
this.conversationHistoryDeletedRange = contextManagementMetadata.conversationHistoryDeletedRange
|
||||||
|
await this.saveClineMessages() // saves task history item which we use to keep track of conversation history deleted range
|
||||||
|
}
|
||||||
|
|
||||||
|
let stream = this.api.createMessage(systemPrompt, contextManagementMetadata.truncatedConversationHistory)
|
||||||
|
|
||||||
const iterator = stream[Symbol.asyncIterator]()
|
const iterator = stream[Symbol.asyncIterator]()
|
||||||
|
|
||||||
|
@ -1,10 +1,77 @@
|
|||||||
import { Anthropic } from "@anthropic-ai/sdk"
|
import { Anthropic } from "@anthropic-ai/sdk"
|
||||||
|
import { ClineApiReqInfo, ClineMessage } from "../../shared/ExtensionMessage"
|
||||||
|
import { ApiHandler } from "../../api"
|
||||||
|
import { OpenAiHandler } from "../../api/providers/openai"
|
||||||
|
|
||||||
export class ContextManager {
|
export class ContextManager {
|
||||||
getNextTruncationRange(
|
getNewContextMessagesAndMetadata(
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
apiConversationHistory: Anthropic.Messages.MessageParam[],
|
||||||
currentDeletedRange: [number, number] | undefined = undefined,
|
clineMessages: ClineMessage[],
|
||||||
keep: "half" | "quarter" = "half",
|
api: ApiHandler,
|
||||||
|
conversationHistoryDeletedRange: [number, number] | undefined,
|
||||||
|
previousApiReqIndex: number,
|
||||||
|
) {
|
||||||
|
let updatedConversationHistoryDeletedRange = false
|
||||||
|
|
||||||
|
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
|
||||||
|
if (previousApiReqIndex >= 0) {
|
||||||
|
const previousRequest = clineMessages[previousApiReqIndex]
|
||||||
|
if (previousRequest && previousRequest.text) {
|
||||||
|
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
|
||||||
|
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
|
||||||
|
let contextWindow = api.getModel().info.contextWindow || 128_000
|
||||||
|
// FIXME: hack to get anyone using openai compatible with deepseek to have the proper context window instead of the default 128k. We need a way for the user to specify the context window for models they input through openai compatible
|
||||||
|
if (api instanceof OpenAiHandler && api.getModel().id.toLowerCase().includes("deepseek")) {
|
||||||
|
contextWindow = 64_000
|
||||||
|
}
|
||||||
|
let maxAllowedSize: number
|
||||||
|
switch (contextWindow) {
|
||||||
|
case 64_000: // deepseek models
|
||||||
|
maxAllowedSize = contextWindow - 27_000
|
||||||
|
break
|
||||||
|
case 128_000: // most models
|
||||||
|
maxAllowedSize = contextWindow - 30_000
|
||||||
|
break
|
||||||
|
case 200_000: // claude models
|
||||||
|
maxAllowedSize = contextWindow - 40_000
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
maxAllowedSize = Math.max(contextWindow - 40_000, contextWindow * 0.8) // for deepseek, 80% of 64k meant only ~10k buffer which was too small and resulted in users getting context window errors.
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is the most reliable way to know when we're close to hitting the context window.
|
||||||
|
if (totalTokens >= maxAllowedSize) {
|
||||||
|
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
|
||||||
|
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
|
||||||
|
// FIXME: truncating the conversation in a way that is optimal for prompt caching AND takes into account multi-context window complexity is something we need to improve
|
||||||
|
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
|
||||||
|
|
||||||
|
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
|
||||||
|
conversationHistoryDeletedRange = this.getNextTruncationRange(
|
||||||
|
apiConversationHistory,
|
||||||
|
conversationHistoryDeletedRange,
|
||||||
|
keep,
|
||||||
|
)
|
||||||
|
|
||||||
|
updatedConversationHistoryDeletedRange = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// conversationHistoryDeletedRange is updated only when we're close to hitting the context window, so we don't continuously break the prompt cache
|
||||||
|
const truncatedConversationHistory = this.getTruncatedMessages(apiConversationHistory, conversationHistoryDeletedRange)
|
||||||
|
|
||||||
|
return {
|
||||||
|
conversationHistoryDeletedRange: conversationHistoryDeletedRange,
|
||||||
|
updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
|
||||||
|
truncatedConversationHistory: truncatedConversationHistory,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public getNextTruncationRange(
|
||||||
|
apiMessages: Anthropic.Messages.MessageParam[],
|
||||||
|
currentDeletedRange: [number, number] | undefined,
|
||||||
|
keep: "half" | "quarter",
|
||||||
): [number, number] {
|
): [number, number] {
|
||||||
// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
|
// Since we always keep the first message, currentDeletedRange[0] will always be 1 (for now until we have a smarter truncation algorithm)
|
||||||
const rangeStartIndex = 1
|
const rangeStartIndex = 1
|
||||||
@ -16,20 +83,20 @@ export class ContextManager {
|
|||||||
// We first calculate half of the messages then divide by 2 to get the number of pairs.
|
// We first calculate half of the messages then divide by 2 to get the number of pairs.
|
||||||
// After flooring, we multiply by 2 to get the number of messages.
|
// After flooring, we multiply by 2 to get the number of messages.
|
||||||
// Note that this will also always be an even number.
|
// Note that this will also always be an even number.
|
||||||
messagesToRemove = Math.floor((messages.length - startOfRest) / 4) * 2 // Keep even number
|
messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
|
||||||
} else {
|
} else {
|
||||||
// Remove 3/4 of remaining user-assistant pairs
|
// Remove 3/4 of remaining user-assistant pairs
|
||||||
// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
|
// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
|
||||||
// After flooring, we multiply by 2 to get the number of messages.
|
// After flooring, we multiply by 2 to get the number of messages.
|
||||||
// Note that this will also always be an even number.
|
// Note that this will also always be an even number.
|
||||||
messagesToRemove = Math.floor(((messages.length - startOfRest) * 3) / 4 / 2) * 2
|
messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
|
||||||
}
|
}
|
||||||
|
|
||||||
let rangeEndIndex = startOfRest + messagesToRemove - 1
|
let rangeEndIndex = startOfRest + messagesToRemove - 1
|
||||||
|
|
||||||
// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
|
// Make sure the last message being removed is a user message, so that the next message after the initial task message is an assistant message. This preservers the user-assistant-user-assistant structure.
|
||||||
// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
|
// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
|
||||||
if (messages[rangeEndIndex].role !== "user") {
|
if (apiMessages[rangeEndIndex].role !== "user") {
|
||||||
rangeEndIndex -= 1
|
rangeEndIndex -= 1
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,7 +104,7 @@ export class ContextManager {
|
|||||||
return [rangeStartIndex, rangeEndIndex]
|
return [rangeStartIndex, rangeEndIndex]
|
||||||
}
|
}
|
||||||
|
|
||||||
getTruncatedMessages(
|
public getTruncatedMessages(
|
||||||
messages: Anthropic.Messages.MessageParam[],
|
messages: Anthropic.Messages.MessageParam[],
|
||||||
deletedRange: [number, number] | undefined,
|
deletedRange: [number, number] | undefined,
|
||||||
): Anthropic.Messages.MessageParam[] {
|
): Anthropic.Messages.MessageParam[] {
|
||||||
|
Loading…
Reference in New Issue
Block a user