mirror of
https://github.com/cline/cline.git
synced 2025-06-03 03:59:07 +00:00

* Add github action to codespell main on push and PRs * Add rudimentary codespell config * run codespell throughout fixing typos automagically (but ignoring overall fail due to ambigous ones) === Do not change lines below === { "chain": [], "cmd": "codespell -w || :", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ * Do interactive fixing of some ambigous typos === Do not change lines below === { "chain": [], "cmd": "codespell -w -i 3 -C 4", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ * Fix Formatting --------- Co-authored-by: Dennis Bartlett <bartlett.dc.1@gmail.com>
835 lines
32 KiB
TypeScript
835 lines
32 KiB
TypeScript
import { getContextWindowInfo } from "./context-window-utils"
|
|
import { formatResponse } from "../../prompts/responses"
|
|
import { GlobalFileNames } from "../../storage/disk"
|
|
import { fileExistsAtPath } from "../../../utils/fs"
|
|
import * as path from "path"
|
|
import fs from "fs/promises"
|
|
import cloneDeep from "clone-deep"
|
|
import { ClineApiReqInfo, ClineMessage } from "../../../shared/ExtensionMessage"
|
|
import { ApiHandler } from "../../../api"
|
|
import { Anthropic } from "@anthropic-ai/sdk"
|
|
|
|
enum EditType {
|
|
UNDEFINED = 0,
|
|
NO_FILE_READ = 1,
|
|
READ_FILE_TOOL = 2,
|
|
ALTER_FILE_TOOL = 3,
|
|
FILE_MENTION = 4,
|
|
}
|
|
|
|
// array of string values allows us to cover all changes for message types currently supported
|
|
type MessageContent = string[]
|
|
type MessageMetadata = string[][]
|
|
|
|
// Type for a single context update
|
|
type ContextUpdate = [number, string, MessageContent, MessageMetadata] // [timestamp, updateType, update, metadata]
|
|
|
|
// Type for the serialized format of our nested maps
|
|
type SerializedContextHistory = Array<
|
|
[
|
|
number, // messageIndex
|
|
[
|
|
number, // EditType (message type)
|
|
Array<
|
|
[
|
|
number, // blockIndex
|
|
ContextUpdate[], // updates array (now with 4 elements including metadata)
|
|
]
|
|
>,
|
|
],
|
|
]
|
|
>
|
|
|
|
export class ContextManager {
|
|
// mapping from the apiMessages outer index to the inner message index to a list of actual changes, ordered by timestamp
|
|
// timestamp is required in order to support full checkpointing, where the changes we apply need to be able to be undone when
|
|
// moving to an earlier conversation history checkpoint - this ordering intuitively allows for binary search on truncation
|
|
// there is also a number stored for each (EditType) which defines which message type it is, for custom handling
|
|
|
|
// format: { outerIndex => [EditType, { innerIndex => [[timestamp, updateType, update], ...] }] }
|
|
// example: { 1 => { [0, 0 => [[<timestamp>, "text", "[NOTE] Some previous conversation history with the user has been removed ..."], ...] }] }
|
|
// the above example would be how we update the first assistant message to indicate we truncated text
|
|
private contextHistoryUpdates: Map<number, [number, Map<number, ContextUpdate[]>]>
|
|
|
|
constructor() {
|
|
this.contextHistoryUpdates = new Map()
|
|
}
|
|
|
|
/**
|
|
* public function for loading contextHistoryUpdates from disk, if it exists
|
|
*/
|
|
async initializeContextHistory(taskDirectory: string) {
|
|
this.contextHistoryUpdates = await this.getSavedContextHistory(taskDirectory)
|
|
}
|
|
|
|
/**
|
|
* get the stored context history updates from disk
|
|
*/
|
|
private async getSavedContextHistory(taskDirectory: string): Promise<Map<number, [number, Map<number, ContextUpdate[]>]>> {
|
|
try {
|
|
const filePath = path.join(taskDirectory, GlobalFileNames.contextHistory)
|
|
if (await fileExistsAtPath(filePath)) {
|
|
const data = await fs.readFile(filePath, "utf8")
|
|
const serializedUpdates = JSON.parse(data) as SerializedContextHistory
|
|
|
|
// Update to properly reconstruct the tuple structure
|
|
return new Map(
|
|
serializedUpdates.map(([messageIndex, [numberValue, innerMapArray]]) => [
|
|
messageIndex,
|
|
[numberValue, new Map(innerMapArray)],
|
|
]),
|
|
)
|
|
}
|
|
} catch (error) {
|
|
console.error("Failed to load context history:", error)
|
|
}
|
|
return new Map()
|
|
}
|
|
|
|
/**
|
|
* save the context history updates to disk
|
|
*/
|
|
private async saveContextHistory(taskDirectory: string) {
|
|
try {
|
|
const serializedUpdates: SerializedContextHistory = Array.from(this.contextHistoryUpdates.entries()).map(
|
|
([messageIndex, [numberValue, innerMap]]) => [messageIndex, [numberValue, Array.from(innerMap.entries())]],
|
|
)
|
|
|
|
await fs.writeFile(
|
|
path.join(taskDirectory, GlobalFileNames.contextHistory),
|
|
JSON.stringify(serializedUpdates),
|
|
"utf8",
|
|
)
|
|
} catch (error) {
|
|
console.error("Failed to save context history:", error)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* primary entry point for getting up to date context & truncating when required
|
|
*/
|
|
async getNewContextMessagesAndMetadata(
|
|
apiConversationHistory: Anthropic.Messages.MessageParam[],
|
|
clineMessages: ClineMessage[],
|
|
api: ApiHandler,
|
|
conversationHistoryDeletedRange: [number, number] | undefined,
|
|
previousApiReqIndex: number,
|
|
taskDirectory: string,
|
|
) {
|
|
let updatedConversationHistoryDeletedRange = false
|
|
|
|
// If the previous API request's total token usage is close to the context window, truncate the conversation history to free up space for the new request
|
|
if (previousApiReqIndex >= 0) {
|
|
const previousRequest = clineMessages[previousApiReqIndex]
|
|
if (previousRequest && previousRequest.text) {
|
|
const timestamp = previousRequest.ts
|
|
const { tokensIn, tokensOut, cacheWrites, cacheReads }: ClineApiReqInfo = JSON.parse(previousRequest.text)
|
|
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
|
|
const { maxAllowedSize } = getContextWindowInfo(api)
|
|
|
|
// This is the most reliable way to know when we're close to hitting the context window.
|
|
if (totalTokens >= maxAllowedSize) {
|
|
// Since the user may switch between models with different context windows, truncating half may not be enough (ie if switching from claude 200k to deepseek 64k, half truncation will only remove 100k tokens, but we need to remove much more)
|
|
// So if totalTokens/2 is greater than maxAllowedSize, we truncate 3/4 instead of 1/2
|
|
const keep = totalTokens / 2 > maxAllowedSize ? "quarter" : "half"
|
|
|
|
// we later check how many chars we trim to determine if we should still truncate history
|
|
let [anyContextUpdates, uniqueFileReadIndices] = this.applyContextOptimizations(
|
|
apiConversationHistory,
|
|
conversationHistoryDeletedRange ? conversationHistoryDeletedRange[1] + 1 : 2,
|
|
timestamp,
|
|
)
|
|
|
|
let needToTruncate = true
|
|
if (anyContextUpdates) {
|
|
// determine whether we've saved enough chars to not truncate
|
|
const charactersSavedPercentage = this.calculateContextOptimizationMetrics(
|
|
apiConversationHistory,
|
|
conversationHistoryDeletedRange,
|
|
uniqueFileReadIndices,
|
|
)
|
|
if (charactersSavedPercentage >= 0.3) {
|
|
needToTruncate = false
|
|
}
|
|
}
|
|
|
|
if (needToTruncate) {
|
|
// go ahead with truncation
|
|
anyContextUpdates = this.applyStandardContextTruncationNoticeChange(timestamp) || anyContextUpdates
|
|
|
|
// NOTE: it's okay that we overwriteConversationHistory in resume task since we're only ever removing the last user message and not anything in the middle which would affect this range
|
|
conversationHistoryDeletedRange = this.getNextTruncationRange(
|
|
apiConversationHistory,
|
|
conversationHistoryDeletedRange,
|
|
keep,
|
|
)
|
|
|
|
updatedConversationHistoryDeletedRange = true
|
|
}
|
|
|
|
// if we alter the context history, save the updated version to disk
|
|
if (anyContextUpdates) {
|
|
await this.saveContextHistory(taskDirectory)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const truncatedConversationHistory = this.getAndAlterTruncatedMessages(
|
|
apiConversationHistory,
|
|
conversationHistoryDeletedRange,
|
|
)
|
|
|
|
return {
|
|
conversationHistoryDeletedRange: conversationHistoryDeletedRange,
|
|
updatedConversationHistoryDeletedRange: updatedConversationHistoryDeletedRange,
|
|
truncatedConversationHistory: truncatedConversationHistory,
|
|
}
|
|
}
|
|
|
|
/**
|
|
* get truncation range
|
|
*/
|
|
public getNextTruncationRange(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
currentDeletedRange: [number, number] | undefined,
|
|
keep: "half" | "quarter",
|
|
): [number, number] {
|
|
// We always keep the first user-assistant pairing, and truncate an even number of messages from there
|
|
const rangeStartIndex = 2 // index 0 and 1 are kept
|
|
const startOfRest = currentDeletedRange ? currentDeletedRange[1] + 1 : 2 // inclusive starting index
|
|
|
|
let messagesToRemove: number
|
|
if (keep === "half") {
|
|
// Remove half of remaining user-assistant pairs
|
|
// We first calculate half of the messages then divide by 2 to get the number of pairs.
|
|
// After flooring, we multiply by 2 to get the number of messages.
|
|
// Note that this will also always be an even number.
|
|
messagesToRemove = Math.floor((apiMessages.length - startOfRest) / 4) * 2 // Keep even number
|
|
} else {
|
|
// Remove 3/4 of remaining user-assistant pairs
|
|
// We calculate 3/4ths of the messages then divide by 2 to get the number of pairs.
|
|
// After flooring, we multiply by 2 to get the number of messages.
|
|
// Note that this will also always be an even number.
|
|
messagesToRemove = Math.floor(((apiMessages.length - startOfRest) * 3) / 4 / 2) * 2
|
|
}
|
|
|
|
let rangeEndIndex = startOfRest + messagesToRemove - 1 // inclusive ending index
|
|
|
|
// Make sure that the last message being removed is a assistant message, so the next message after the initial user-assistant pair is an assistant message. This preserves the user-assistant-user-assistant structure.
|
|
// NOTE: anthropic format messages are always user-assistant-user-assistant, while openai format messages can have multiple user messages in a row (we use anthropic format throughout cline)
|
|
if (apiMessages[rangeEndIndex].role !== "assistant") {
|
|
rangeEndIndex -= 1
|
|
}
|
|
|
|
// this is an inclusive range that will be removed from the conversation history
|
|
return [rangeStartIndex, rangeEndIndex]
|
|
}
|
|
|
|
/**
|
|
* external interface to support old calls
|
|
*/
|
|
public getTruncatedMessages(
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
deletedRange: [number, number] | undefined,
|
|
): Anthropic.Messages.MessageParam[] {
|
|
return this.getAndAlterTruncatedMessages(messages, deletedRange)
|
|
}
|
|
|
|
/**
|
|
* apply all required truncation methods to the messages in context
|
|
*/
|
|
private getAndAlterTruncatedMessages(
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
deletedRange: [number, number] | undefined,
|
|
): Anthropic.Messages.MessageParam[] {
|
|
if (messages.length <= 1) {
|
|
return messages
|
|
}
|
|
|
|
const updatedMessages = this.applyContextHistoryUpdates(messages, deletedRange ? deletedRange[1] + 1 : 2)
|
|
|
|
// OLD NOTE: if you try to console log these, don't forget that logging a reference to an array may not provide the same result as logging a slice() snapshot of that array at that exact moment. The following DOES in fact include the latest assistant message.
|
|
return updatedMessages
|
|
}
|
|
|
|
/**
|
|
* applies deletedRange truncation and other alterations based on changes in this.contextHistoryUpdates
|
|
*/
|
|
private applyContextHistoryUpdates(
|
|
messages: Anthropic.Messages.MessageParam[],
|
|
startFromIndex: number,
|
|
): Anthropic.Messages.MessageParam[] {
|
|
// runtime is linear in length of user messages, if expecting a limited number of alterations, could be more optimal to loop over alterations
|
|
|
|
const firstChunk = messages.slice(0, 2) // get first user-assistant pair
|
|
const secondChunk = messages.slice(startFromIndex) // get remaining messages within context
|
|
const messagesToUpdate = [...firstChunk, ...secondChunk]
|
|
|
|
// we need the mapping from the local indices in messagesToUpdate to the global array of updates in this.contextHistoryUpdates
|
|
const originalIndices = [
|
|
...Array(2).keys(),
|
|
...Array(secondChunk.length)
|
|
.fill(0)
|
|
.map((_, i) => i + startFromIndex),
|
|
]
|
|
|
|
for (let arrayIndex = 0; arrayIndex < messagesToUpdate.length; arrayIndex++) {
|
|
const messageIndex = originalIndices[arrayIndex]
|
|
|
|
const innerTuple = this.contextHistoryUpdates.get(messageIndex)
|
|
if (!innerTuple) {
|
|
continue
|
|
}
|
|
|
|
// because we are altering this, we need a deep copy
|
|
messagesToUpdate[arrayIndex] = cloneDeep(messagesToUpdate[arrayIndex])
|
|
|
|
// Extract the map from the tuple
|
|
const innerMap = innerTuple[1]
|
|
for (const [blockIndex, changes] of innerMap) {
|
|
// apply the latest change among n changes - [timestamp, updateType, update]
|
|
const latestChange = changes[changes.length - 1]
|
|
|
|
if (latestChange[1] === "text") {
|
|
// only altering text for now
|
|
const message = messagesToUpdate[arrayIndex]
|
|
|
|
if (Array.isArray(message.content)) {
|
|
const block = message.content[blockIndex]
|
|
if (block && block.type === "text") {
|
|
block.text = latestChange[2][0]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return messagesToUpdate
|
|
}
|
|
|
|
/**
|
|
* removes all context history updates that occurred after the specified timestamp and saves to disk
|
|
*/
|
|
async truncateContextHistory(timestamp: number, taskDirectory: string): Promise<void> {
|
|
this.truncateContextHistoryAtTimestamp(this.contextHistoryUpdates, timestamp)
|
|
|
|
// save the modified context history to disk
|
|
await this.saveContextHistory(taskDirectory)
|
|
}
|
|
|
|
/**
|
|
* alters the context history to remove all alterations after a given timestamp
|
|
* removes the index if there are no alterations there anymore, both outer and inner indices
|
|
*/
|
|
private truncateContextHistoryAtTimestamp(
|
|
contextHistory: Map<number, [number, Map<number, ContextUpdate[]>]>,
|
|
timestamp: number,
|
|
): void {
|
|
for (const [messageIndex, [_, innerMap]] of contextHistory) {
|
|
// track which blockIndices to delete
|
|
const blockIndicesToDelete: number[] = []
|
|
|
|
// loop over the innerIndices of the messages in this block
|
|
for (const [blockIndex, updates] of innerMap) {
|
|
// updates ordered by timestamp, so find cutoff point by iterating from right to left
|
|
let cutoffIndex = updates.length - 1
|
|
while (cutoffIndex >= 0 && updates[cutoffIndex][0] > timestamp) {
|
|
cutoffIndex--
|
|
}
|
|
|
|
// If we found updates to remove
|
|
if (cutoffIndex < updates.length - 1) {
|
|
// Modify the array in place to keep only updates up to cutoffIndex
|
|
updates.length = cutoffIndex + 1
|
|
|
|
// If no updates left after truncation, mark this block for deletion
|
|
if (updates.length === 0) {
|
|
blockIndicesToDelete.push(blockIndex)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove empty blocks from inner map
|
|
for (const blockIndex of blockIndicesToDelete) {
|
|
innerMap.delete(blockIndex)
|
|
}
|
|
|
|
// If inner map is now empty, remove the message index from outer map
|
|
if (innerMap.size === 0) {
|
|
contextHistory.delete(messageIndex)
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* applies the context optimization steps and returns whether any changes were made
|
|
*/
|
|
private applyContextOptimizations(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
startFromIndex: number,
|
|
timestamp: number,
|
|
): [boolean, Set<number>] {
|
|
const [fileReadUpdatesBool, uniqueFileReadIndices] = this.findAndPotentiallySaveFileReadContextHistoryUpdates(
|
|
apiMessages,
|
|
startFromIndex,
|
|
timestamp,
|
|
)
|
|
|
|
// true if any context optimization steps alter state
|
|
const contextHistoryUpdated = fileReadUpdatesBool
|
|
|
|
return [contextHistoryUpdated, uniqueFileReadIndices]
|
|
}
|
|
|
|
/**
|
|
* if there is any truncation and there is no other alteration already set, alter the assistant message to indicate this occurred
|
|
*/
|
|
private applyStandardContextTruncationNoticeChange(timestamp: number): boolean {
|
|
if (!this.contextHistoryUpdates.has(1)) {
|
|
// first assistant message always at index 1
|
|
const innerMap = new Map<number, ContextUpdate[]>()
|
|
innerMap.set(0, [[timestamp, "text", [formatResponse.contextTruncationNotice()], []]])
|
|
this.contextHistoryUpdates.set(1, [0, innerMap]) // EditType is undefined for first assistant message
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
/**
|
|
* wraps the logic for determining file reads to overwrite, and altering state
|
|
* returns whether any updates were made (bool) and indices where updates were made
|
|
*/
|
|
private findAndPotentiallySaveFileReadContextHistoryUpdates(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
startFromIndex: number,
|
|
timestamp: number,
|
|
): [boolean, Set<number>] {
|
|
const [fileReadIndices, messageFilePaths] = this.getPossibleDuplicateFileReads(apiMessages, startFromIndex)
|
|
return this.applyFileReadContextHistoryUpdates(fileReadIndices, messageFilePaths, apiMessages, timestamp)
|
|
}
|
|
|
|
/**
|
|
* generate a mapping from unique file reads from multiple tool calls to their outer index position(s)
|
|
* also return additional metadata to support multiple file reads in file mention text blocks
|
|
*/
|
|
private getPossibleDuplicateFileReads(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
startFromIndex: number,
|
|
): [Map<string, [number, number, string, string][]>, Map<number, string[]>] {
|
|
// fileReadIndices: { fileName => [outerIndex, EditType, searchText, replaceText] }
|
|
// messageFilePaths: { outerIndex => [fileRead1, fileRead2, ..] }
|
|
// searchText in fileReadIndices is only required for file mention file-reads since there can be more than one file in the text
|
|
// searchText will be the empty string "" in the case that it's not required, for non-file mentions
|
|
// messageFilePaths is only used for file mentions as there can be multiple files read in the same text chunk
|
|
|
|
// for all text blocks per file, has info for updating the block
|
|
const fileReadIndices = new Map<string, [number, number, string, string][]>()
|
|
|
|
// for file mention text blocks, track all the unique files read
|
|
const messageFilePaths = new Map<number, string[]>()
|
|
|
|
for (let i = startFromIndex; i < apiMessages.length; i++) {
|
|
let thisExistingFileReads: string[] = []
|
|
|
|
if (this.contextHistoryUpdates.has(i)) {
|
|
const innerTuple = this.contextHistoryUpdates.get(i)
|
|
|
|
if (innerTuple) {
|
|
// safety check
|
|
const editType = innerTuple[0]
|
|
|
|
if (editType === EditType.FILE_MENTION) {
|
|
const innerMap = innerTuple[1]
|
|
|
|
const blockIndex = 1 // file mention blocks assumed to be at index 1
|
|
const blockUpdates = innerMap.get(blockIndex)
|
|
|
|
// if we have updated this text previously, we want to check whether the lists of files in the metadata are the same
|
|
if (blockUpdates && blockUpdates.length > 0) {
|
|
// the first list indicates the files we have replaced in this text, second list indicates all unique files in this text
|
|
// if they are equal then we have replaced all the files in this text already, and can ignore further processing
|
|
if (
|
|
blockUpdates[blockUpdates.length - 1][3][0].length ===
|
|
blockUpdates[blockUpdates.length - 1][3][1].length
|
|
) {
|
|
continue
|
|
}
|
|
// otherwise there are still file reads here we can overwrite, so still need to process this text chunk
|
|
// to do so we need to keep track of which files we've already replaced so we don't replace them again
|
|
else {
|
|
thisExistingFileReads = blockUpdates[blockUpdates.length - 1][3][0]
|
|
}
|
|
}
|
|
} else {
|
|
// for all other cases we can assume that we dont need to check this again
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
const message = apiMessages[i]
|
|
if (message.role === "user" && Array.isArray(message.content) && message.content.length > 0) {
|
|
const firstBlock = message.content[0]
|
|
if (firstBlock.type === "text") {
|
|
const matchTup = this.parsePotentialToolCall(firstBlock.text)
|
|
let foundNormalFileRead = false
|
|
if (matchTup) {
|
|
if (matchTup[0] === "read_file") {
|
|
this.handleReadFileToolCall(i, matchTup[1], fileReadIndices)
|
|
foundNormalFileRead = true
|
|
} else if (matchTup[0] === "replace_in_file" || matchTup[0] === "write_to_file") {
|
|
if (message.content.length > 1) {
|
|
const secondBlock = message.content[1]
|
|
if (secondBlock.type === "text") {
|
|
this.handlePotentialFileChangeToolCalls(i, matchTup[1], secondBlock.text, fileReadIndices)
|
|
foundNormalFileRead = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// file mentions can happen in most other user message blocks
|
|
if (!foundNormalFileRead) {
|
|
if (message.content.length > 1) {
|
|
const secondBlock = message.content[1]
|
|
if (secondBlock.type === "text") {
|
|
const [hasFileRead, filePaths] = this.handlePotentialFileMentionCalls(
|
|
i,
|
|
secondBlock.text,
|
|
fileReadIndices,
|
|
thisExistingFileReads, // file reads we've already replaced in this text in the latest version of this updated text
|
|
)
|
|
if (hasFileRead) {
|
|
messageFilePaths.set(i, filePaths) // all file paths in this string
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return [fileReadIndices, messageFilePaths]
|
|
}
|
|
|
|
/**
|
|
* handles potential file content mentions in text blocks
|
|
* there will not be more than one of the same file read in a text block
|
|
*/
|
|
private handlePotentialFileMentionCalls(
|
|
i: number,
|
|
secondBlockText: string,
|
|
fileReadIndices: Map<string, [number, number, string, string][]>,
|
|
thisExistingFileReads: string[],
|
|
): [boolean, string[]] {
|
|
const pattern = new RegExp(`<file_content path="([^"]*)">([\\s\\S]*?)</file_content>`, "g")
|
|
|
|
let foundMatch = false
|
|
const filePaths: string[] = []
|
|
|
|
let match
|
|
while ((match = pattern.exec(secondBlockText)) !== null) {
|
|
foundMatch = true
|
|
|
|
const filePath = match[1]
|
|
filePaths.push(filePath) // we will record all unique paths from file mentions in this text
|
|
|
|
// we can assume that thisExistingFileReads does not have many entries
|
|
if (!thisExistingFileReads.includes(filePath)) {
|
|
// meaning we haven't already replaced this file read
|
|
|
|
const entireMatch = match[0] // The entire matched string
|
|
|
|
// Create the replacement text - keep the tags but replace the content
|
|
const replacementText = `<file_content path="${filePath}">${formatResponse.duplicateFileReadNotice()}</file_content>`
|
|
|
|
const indices = fileReadIndices.get(filePath) || []
|
|
indices.push([i, EditType.FILE_MENTION, entireMatch, replacementText])
|
|
fileReadIndices.set(filePath, indices)
|
|
}
|
|
}
|
|
|
|
return [foundMatch, filePaths]
|
|
}
|
|
|
|
/**
|
|
* parses specific tool call formats, returns null if no acceptable format is found
|
|
*/
|
|
private parsePotentialToolCall(text: string): [string, string] | null {
|
|
const match = text.match(/^\[([^\s]+) for '([^']+)'\] Result:$/)
|
|
|
|
if (!match) {
|
|
return null
|
|
}
|
|
|
|
return [match[1], match[2]]
|
|
}
|
|
|
|
/**
|
|
* file_read tool call always pastes the file, so this is always a hit
|
|
*/
|
|
private handleReadFileToolCall(
|
|
i: number,
|
|
filePath: string,
|
|
fileReadIndices: Map<string, [number, number, string, string][]>,
|
|
) {
|
|
const indices = fileReadIndices.get(filePath) || []
|
|
indices.push([i, EditType.READ_FILE_TOOL, "", formatResponse.duplicateFileReadNotice()])
|
|
fileReadIndices.set(filePath, indices)
|
|
}
|
|
|
|
/**
|
|
* write_to_file and replace_in_file tool output are handled similarly
|
|
*/
|
|
private handlePotentialFileChangeToolCalls(
|
|
i: number,
|
|
filePath: string,
|
|
secondBlockText: string,
|
|
fileReadIndices: Map<string, [number, number, string, string][]>,
|
|
) {
|
|
const pattern = new RegExp(`(<final_file_content path="[^"]*">)[\\s\\S]*?(</final_file_content>)`)
|
|
|
|
// check if this exists in the text, it won't exist if the user rejects the file change for example
|
|
if (pattern.test(secondBlockText)) {
|
|
const replacementText = secondBlockText.replace(pattern, `$1 ${formatResponse.duplicateFileReadNotice()} $2`)
|
|
const indices = fileReadIndices.get(filePath) || []
|
|
indices.push([i, EditType.ALTER_FILE_TOOL, "", replacementText])
|
|
fileReadIndices.set(filePath, indices)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* alter all occurrences of file read operations and track which messages were updated
|
|
* returns the outer index of messages we alter, to count number of changes
|
|
*/
|
|
private applyFileReadContextHistoryUpdates(
|
|
fileReadIndices: Map<string, [number, number, string, string][]>,
|
|
messageFilePaths: Map<number, string[]>,
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
timestamp: number,
|
|
): [boolean, Set<number>] {
|
|
let didUpdate = false
|
|
const updatedMessageIndices = new Set<number>() // track which messages we update on this round
|
|
const fileMentionUpdates = new Map<number, [string, string[]]>()
|
|
|
|
for (const [filePath, indices] of fileReadIndices.entries()) {
|
|
// Only process if there are multiple reads of the same file, else we will want to keep the latest read of the file
|
|
if (indices.length > 1) {
|
|
// Process all but the last index, as we will keep that instance of the file read
|
|
for (let i = 0; i < indices.length - 1; i++) {
|
|
const messageIndex = indices[i][0]
|
|
const messageType = indices[i][1] // EditType value
|
|
const searchText = indices[i][2] // search text (for file mentions, else empty string)
|
|
const messageString = indices[i][3] // what we will replace the string with
|
|
|
|
didUpdate = true
|
|
updatedMessageIndices.add(messageIndex)
|
|
|
|
// for single-fileread text we can set the updates here
|
|
// for potential multi-fileread text we need to determine all changes & iteratively update the text prior to saving the final change
|
|
if (messageType === EditType.FILE_MENTION) {
|
|
if (!fileMentionUpdates.has(messageIndex)) {
|
|
// Get base text either from existing updates or from apiMessages
|
|
let baseText = ""
|
|
let prevFilesReplaced: string[] = []
|
|
|
|
const innerTuple = this.contextHistoryUpdates.get(messageIndex)
|
|
if (innerTuple) {
|
|
const blockUpdates = innerTuple[1].get(1) // assumed index=1 for file mention filereads
|
|
if (blockUpdates && blockUpdates.length > 0) {
|
|
baseText = blockUpdates[blockUpdates.length - 1][2][0] // index 0 of MessageContent
|
|
prevFilesReplaced = blockUpdates[blockUpdates.length - 1][3][0] // previously overwritten file reads in this text
|
|
}
|
|
}
|
|
|
|
// can assume that this content will exist, otherwise it would not have been in fileReadIndices
|
|
const messageContent = apiMessages[messageIndex]?.content
|
|
if (!baseText && Array.isArray(messageContent) && messageContent.length > 1) {
|
|
const contentBlock = messageContent[1] // assume index=1 for all text to replace for file mention filereads
|
|
if (contentBlock.type === "text") {
|
|
baseText = contentBlock.text
|
|
}
|
|
}
|
|
|
|
// prevFilesReplaced keeps track of the previous file reads we've replace in this string, empty array if none
|
|
fileMentionUpdates.set(messageIndex, [baseText, prevFilesReplaced])
|
|
}
|
|
|
|
// Replace searchText with messageString for all file reads we need to replace in this text
|
|
if (searchText) {
|
|
const currentTuple = fileMentionUpdates.get(messageIndex) || ["", []]
|
|
if (currentTuple[0]) {
|
|
// safety check
|
|
// replace this text chunk
|
|
const updatedText = currentTuple[0].replace(searchText, messageString)
|
|
|
|
// add the newly added filePath read
|
|
const updatedFileReads = currentTuple[1]
|
|
updatedFileReads.push(filePath)
|
|
|
|
fileMentionUpdates.set(messageIndex, [updatedText, updatedFileReads])
|
|
}
|
|
}
|
|
} else {
|
|
let innerTuple = this.contextHistoryUpdates.get(messageIndex)
|
|
let innerMap: Map<number, ContextUpdate[]>
|
|
|
|
if (!innerTuple) {
|
|
innerMap = new Map<number, ContextUpdate[]>()
|
|
this.contextHistoryUpdates.set(messageIndex, [messageType, innerMap])
|
|
} else {
|
|
innerMap = innerTuple[1]
|
|
}
|
|
|
|
// block index for file reads from read_file, write_to_file, replace_in_file tools is 1
|
|
const blockIndex = 1
|
|
|
|
const updates = innerMap.get(blockIndex) || []
|
|
|
|
// metadata array is empty for non-file mention occurrences
|
|
updates.push([timestamp, "text", [messageString], []])
|
|
|
|
innerMap.set(blockIndex, updates)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// apply file mention updates to contextHistoryUpdates
|
|
// in fileMentionUpdates, filePathsUpdated includes all the file paths which are updated in the latest version of this altered text
|
|
for (const [messageIndex, [updatedText, filePathsUpdated]] of fileMentionUpdates.entries()) {
|
|
let innerTuple = this.contextHistoryUpdates.get(messageIndex)
|
|
let innerMap: Map<number, ContextUpdate[]>
|
|
|
|
if (!innerTuple) {
|
|
innerMap = new Map<number, ContextUpdate[]>()
|
|
this.contextHistoryUpdates.set(messageIndex, [EditType.FILE_MENTION, innerMap])
|
|
} else {
|
|
innerMap = innerTuple[1]
|
|
}
|
|
|
|
const blockIndex = 1 // we only consider the block index of 1 for file mentions
|
|
const updates = innerMap.get(blockIndex) || []
|
|
|
|
// filePathsUpdated includes changes done previously to this timestamp, and right now
|
|
if (messageFilePaths.has(messageIndex)) {
|
|
const allFileReads = messageFilePaths.get(messageIndex)
|
|
if (allFileReads) {
|
|
// safety check
|
|
// we gather all the file reads possible in this text from messageFilePaths
|
|
// filePathsUpdated from fileMentionUpdates stores all the files reads we have replaced now & previously
|
|
updates.push([timestamp, "text", [updatedText], [filePathsUpdated, allFileReads]])
|
|
innerMap.set(blockIndex, updates)
|
|
}
|
|
}
|
|
}
|
|
|
|
return [didUpdate, updatedMessageIndices]
|
|
}
|
|
|
|
/**
|
|
* count total characters in messages and total savings within this range
|
|
*/
|
|
private countCharactersAndSavingsInRange(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
startIndex: number,
|
|
endIndex: number,
|
|
uniqueFileReadIndices: Set<number>,
|
|
): { totalCharacters: number; charactersSaved: number } {
|
|
let totalCharCount = 0
|
|
let totalCharactersSaved = 0
|
|
|
|
for (let i = startIndex; i < endIndex; i++) {
|
|
// looping over the outer indices of messages
|
|
const message = apiMessages[i]
|
|
|
|
if (!message.content) {
|
|
continue
|
|
}
|
|
|
|
// hasExistingAlterations checks whether the outer idnex has any changes
|
|
// hasExistingAlterations will also include the alterations we just made
|
|
const hasExistingAlterations = this.contextHistoryUpdates.has(i)
|
|
const hasNewAlterations = uniqueFileReadIndices.has(i)
|
|
|
|
if (Array.isArray(message.content)) {
|
|
for (let blockIndex = 0; blockIndex < message.content.length; blockIndex++) {
|
|
// looping over inner indices of messages
|
|
const block = message.content[blockIndex]
|
|
|
|
if (block.type === "text" && block.text) {
|
|
// true if we just altered it, or it was altered before
|
|
if (hasExistingAlterations) {
|
|
const innerTuple = this.contextHistoryUpdates.get(i)
|
|
const updates = innerTuple?.[1].get(blockIndex) // updated text for this inner index
|
|
|
|
if (updates && updates.length > 0) {
|
|
// exists if we have an update for the message at this index
|
|
const latestUpdate = updates[updates.length - 1]
|
|
|
|
// if block was just altered, then calculate savings
|
|
if (hasNewAlterations) {
|
|
let originalTextLength
|
|
if (updates.length > 1) {
|
|
originalTextLength = updates[updates.length - 2][2][0].length // handles case if we have multiple updates for same text block
|
|
} else {
|
|
originalTextLength = block.text.length
|
|
}
|
|
|
|
const newTextLength = latestUpdate[2][0].length // replacement text
|
|
totalCharactersSaved += originalTextLength - newTextLength
|
|
|
|
totalCharCount += originalTextLength
|
|
} else {
|
|
// meaning there was an update to this text previously, but we didn't just alter it
|
|
totalCharCount += latestUpdate[2][0].length
|
|
}
|
|
} else {
|
|
// reach here if there was one inner index with an update, but now we are at a different index, so updates is not defined
|
|
totalCharCount += block.text.length
|
|
}
|
|
} else {
|
|
// reach here if there's no alterations for this outer index, meaning each inner index won't have any changes either
|
|
totalCharCount += block.text.length
|
|
}
|
|
} else if (block.type === "image" && block.source) {
|
|
if (block.source.type === "base64" && block.source.data) {
|
|
totalCharCount += block.source.data.length
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return { totalCharacters: totalCharCount, charactersSaved: totalCharactersSaved }
|
|
}
|
|
|
|
/**
|
|
* count total percentage character savings across in-range conversation
|
|
*/
|
|
private calculateContextOptimizationMetrics(
|
|
apiMessages: Anthropic.Messages.MessageParam[],
|
|
conversationHistoryDeletedRange: [number, number] | undefined,
|
|
uniqueFileReadIndices: Set<number>,
|
|
): number {
|
|
// count for first user-assistant message pair
|
|
const firstChunkResult = this.countCharactersAndSavingsInRange(apiMessages, 0, 2, uniqueFileReadIndices)
|
|
|
|
// count for the remaining in-range messages
|
|
const secondChunkResult = this.countCharactersAndSavingsInRange(
|
|
apiMessages,
|
|
conversationHistoryDeletedRange ? conversationHistoryDeletedRange[1] + 1 : 2,
|
|
apiMessages.length,
|
|
uniqueFileReadIndices,
|
|
)
|
|
|
|
const totalCharacters = firstChunkResult.totalCharacters + secondChunkResult.totalCharacters
|
|
const totalCharactersSaved = firstChunkResult.charactersSaved + secondChunkResult.charactersSaved
|
|
|
|
const percentCharactersSaved = totalCharacters === 0 ? 0 : totalCharactersSaved / totalCharacters
|
|
|
|
return percentCharactersSaved
|
|
}
|
|
}
|