diff --git a/README.md b/README.md index 69e5ffd02..4983af9c2 100644 --- a/README.md +++ b/README.md @@ -33,18 +33,21 @@ Claude Dev has access to the following capabilities: 3. **`write_to_file`**: Write content to a file at the specified path, automatically creating any necessary directories 4. **`list_files`**: List all paths for files in the specified directory. When `recursive = true`, it recursively lists all files in the directory and its nested folders (excludes files in .gitignore). When `recursive = false`, it lists only top-level files (useful for generic file operations like retrieving a file from your Desktop). 5. **`list_code_definition_names`**: Parses all source code files at the top level of the specified directory to extract names of key elements like classes and functions (see more below) -6. **`ask_followup_question`**: Ask the user a question to gather additional information needed to complete a task (due to the autonomous nature of the program, this isn't a typical chatbot–Claude Dev must explicitly interrupt his task loop to ask for more information) -7. **`attempt_completion`**: Present the result to the user after completing a task, potentially with a terminal command to kickoff a demonstration +6. **`search_files`**: Search files in a specified directory for text that matches a given regex pattern +7. **`ask_followup_question`**: Ask the user a question to gather additional information needed to complete a task (due to the autonomous nature of the program, this isn't a typical chatbot–Claude Dev must explicitly interrupt his task loop to ask for more information) +8. **`attempt_completion`**: Present the result to the user after completing a task, potentially with a terminal command to kickoff a demonstration ### Working in Existing Projects -When given a task in an existing project, Claude will look for the most relevant files to read and edit the same way you or I would–by first looking at the names of directories, files, classes, and functions since these names tend to reflect their purpose and role within the broader system, and often encapsulate high-level concepts and relationships that help understand a project's overall architecture. With tools like `list_code_definition_names`, Claude is able to extract names of various elements in a project to determine what files are most relevant to a given task without you having to mention `@file`s or `@folder`s yourself. +When given a task in an existing project, Claude will look for the most relevant files to read and edit the same way you or I would–by first looking at the names of directories, files, classes, and functions since these names tend to reflect their purpose and role within the broader system, and often encapsulate high-level concepts and relationships that help understand a project's overall architecture. With tools like `list_code_definition_names` and `search_files`, Claude is able to extract names of various elements in a project to determine what files are most relevant to a given task without you having to mention `@file`s or `@folder`s yourself. 1. **File Structure**: When a task is started, Claude is given an overview of your project's file structure. It turns out Claude 3.5 Sonnet is _really_ good at inferring what it needs to process further just from these file names alone. 2. **Source Code Definitions**: Claude may then use the `list_code_definition_names` tool on specific directories of interest. This tool uses [tree-sitter](https://github.com/tree-sitter/tree-sitter) to parse source code with custom tag queries that extract names of classes, functions, methods, and other definitions. It works by first identifying source code files that tree-sitter can parse (currently supports `python`, `javascript`, `typescript`, `ruby`, `go`, `java`, `php`, `rust`, `c`, `c++`, `c#`, `swift`), then parsing each file into an abstract syntax tree, and finally applying a language-specific query to extract definition names (you can see the exact query used for each language in `src/parse-source-code/queries`). The results are formatted into a concise & readable output that Claude can easily interpret to quickly understand the code's structure and purpose. -3. **Read Relevant Files**: With insights gained from the names of various files and source code definitions, Claude can then use the `read_file` tool to examine specific files that are most relevant to the task at hand. +3. **Search Files**: Claude can also use the `search_files` tool to search for specific patterns or content across multiple files. This tool uses [ripgrep](https://github.com/BurntSushi/ripgrep) to perform regex searches on files in a specified directory. The results are formatted into a concise & readable output that Claude can easily interpret to quickly understand the code's structure and purpose. This can be useful for tasks like refactoring function names, updating imports, addressing TODOs and FIXMEs, etc. + +4. **Read Relevant Files**: With insights gained from the names of various files and source code definitions, Claude can then use the `read_file` tool to examine specific files that are most relevant to the task at hand. By carefully managing what information is added to context, Claude can provide valuable assistance even for complex, large-scale projects without overwhelming its context window. diff --git a/package-lock.json b/package-lock.json index 52e9bec3e..89ca1bf1e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "claude-dev", - "version": "1.4.16", + "version": "1.4.24", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "claude-dev", - "version": "1.4.16", + "version": "1.4.24", "license": "MIT", "dependencies": { "@anthropic-ai/bedrock-sdk": "^0.10.2", diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts index 9f10b6e7d..d779a05bb 100644 --- a/src/ClaudeDev.ts +++ b/src/ClaudeDev.ts @@ -1,5 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" import defaultShell from "default-shell" +import delay from "delay" import * as diff from "diff" import { execa, ExecaError, ResultPromise } from "execa" import fs from "fs/promises" @@ -15,17 +16,17 @@ import { listFiles, parseSourceCodeForDefinitionsTopLevel } from "./parse-source import { ClaudeDevProvider } from "./providers/ClaudeDevProvider" import { ApiConfiguration } from "./shared/api" import { ClaudeRequestResult } from "./shared/ClaudeRequestResult" -import { DEFAULT_MAX_REQUESTS_PER_TASK } from "./shared/Constants" -import { ClaudeAsk, ClaudeMessage, ClaudeSay, ClaudeSayTool } from "./shared/ExtensionMessage" -import { Tool, ToolName } from "./shared/Tool" -import { ClaudeAskResponse } from "./shared/WebviewMessage" -import delay from "delay" -import { getApiMetrics } from "./shared/getApiMetrics" -import { HistoryItem } from "./shared/HistoryItem" import { combineApiRequests } from "./shared/combineApiRequests" import { combineCommandSequences } from "./shared/combineCommandSequences" +import { DEFAULT_MAX_REQUESTS_PER_TASK } from "./shared/Constants" +import { ClaudeAsk, ClaudeMessage, ClaudeSay, ClaudeSayTool } from "./shared/ExtensionMessage" +import { getApiMetrics } from "./shared/getApiMetrics" +import { HistoryItem } from "./shared/HistoryItem" +import { Tool, ToolName } from "./shared/Tool" +import { ClaudeAskResponse } from "./shared/WebviewMessage" import { findLastIndex } from "./utils" import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management" +import { regexSearchFiles } from "./utils/ripgrep" const SYSTEM_PROMPT = () => `You are Claude Dev, a highly skilled software developer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices. @@ -38,8 +39,9 @@ CAPABILITIES - You can debug complex issues and providing detailed explanations, offering architectural insights and design patterns. - You have access to tools that let you execute CLI commands on the user's computer, list files in a directory (top level or recursively), extract source code definitions, read and write files, and ask follow-up questions. These tools help you effectively accomplish a wide range of tasks, such as writing code, making edits or improvements to existing files, understanding the current state of a project, performing system operations, and much more. - When the user initially gives you a task, a recursive list of all filepaths in the current working directory ('${cwd}') will be included in potentially_relevant_details. This provides an overview of the project's file structure, offering key insights into the project from directory/file names (how developers conceptualize and organize their code) and file extensions (the language used). This can also guide decision-making on which files to explore further. If you need to further explore directories such as outside the current working directory, you can use the list_files tool. If you pass 'true' for the recursive parameter, it will list files recursively. Otherwise, it will list files at the top level, which is better suited for generic directories where you don't necessarily need the nested structure, like the Desktop. +- You can use search_files to perform regex searches across files in a specified directory, outputting context-rich results that include surrounding lines. This is particularly useful for understanding code patterns, finding specific implementations, or identifying areas that need refactoring. - You can use the list_code_definition_names tool to get an overview of source code definitions for all files at the top level of a specified directory. This can be particularly useful when you need to understand the broader context and relationships between certain parts of the code. You may need to call this tool multiple times to understand various parts of the codebase related to the task. - - For example, when asked to make edits or improvements you might analyze the file structure in the initial potentially_relevant_details to get an overview of the project, then use list_code_definition_names to get further insight using source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use the write_to_file tool to implement changes. + - For example, when asked to make edits or improvements you might analyze the file structure in the initial potentially_relevant_details to get an overview of the project, then use list_code_definition_names to get further insight using source code definitions for files located in relevant directories, then read_file to examine the contents of relevant files, analyze the code and suggest improvements or make necessary edits, then use the write_to_file tool to implement changes. If you refactored code that could affect other parts of the codebase, you could use search_files to ensure you update other files as needed. - The execute_command tool lets you run commands on the user's computer and should be used whenever you feel it can help accomplish the user's task. When you need to execute a CLI command, you must provide a clear explanation of what the command does. Prefer to execute complex CLI commands over creating executable scripts, since they are more flexible and easier to run. Interactive and long-running commands are allowed, since the user has the ability to send input to stdin and terminate the command on their own if needed. ==== @@ -51,6 +53,7 @@ RULES - Do not use the ~ character or $HOME to refer to the home directory. - Before using the execute_command tool, you must first think about the SYSTEM INFORMATION context provided to understand the user's environment and tailor your commands to ensure they are compatible with their system. You must also consider if the command you need to run should be executed in a specific directory outside of the current working directory '${cwd}', and if so prepend with \`cd\`'ing into that directory && then executing the command (as one command since you are stuck operating from '${cwd}'). For example, if you needed to run \`npm install\` in a project outside of '${cwd}', you would need to prepend with a \`cd\` i.e. pseudocode for this would be \`cd (path to project) && (command, in this case npm install)\`. - If you need to read or edit a file you have already read or edited, you can assume its contents have not changed since then (unless specified otherwise by the user) and skip using the read_file tool before proceeding. +- When using the search_files tool, craft your regex patterns carefully to balance specificity and flexibility. Based on the user's task you may use it to find code patterns, TODO comments, function definitions, or any text-based information across the project. The results include context, so analyze the surrounding code to better understand the matches. Leverage the search_files tool in combination with other tools for more comprehensive analysis. For example, use it to find specific code patterns, then use read_file to examine the full context of interesting matches before using write_to_file to make informed changes. - When creating a new project (such as an app, website, or any software project), organize all new files within a dedicated project directory unless the user specifies otherwise. Use appropriate file paths when writing files, as the write_to_file tool will automatically create any necessary directories. Structure the project logically, adhering to best practices for the specific type of project being created. Unless otherwise specified, new projects should be easily run without additional setup, for example most projects can be built in HTML, CSS, and JavaScript - which you can open in a browser. - You must try to use multiple tools in one request when possible. For example if you were to create a website, you would use the write_to_file tool to create the necessary files with their appropriate contents all at once. Or if you wanted to analyze a project, you could use the read_file tool multiple times to look at several key files. This will help you accomplish the user's task more efficiently. - Be sure to consider the type of project (e.g. Python, JavaScript, web application) when determining the appropriate structure and files to include. Also consider what files may be most relevant to accomplishing the task, for example looking at a project's manifest file would help you understand the project's dependencies, which you could incorporate into any code you write. @@ -141,6 +144,30 @@ const tools: Tool[] = [ required: ["path"], }, }, + { + name: "search_files", + description: + "Perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.", + input_schema: { + type: "object", + properties: { + path: { + type: "string", + description: `The path of the directory to search in (relative to the current working directory ${cwd}). This directory will be recursively searched.`, + }, + regex: { + type: "string", + description: "The regular expression pattern to search for. Uses Rust regex syntax.", + }, + filePattern: { + type: "string", + description: + "Optional glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).", + }, + }, + required: ["path", "regex"], + }, + }, { name: "read_file", description: @@ -686,6 +713,8 @@ export class ClaudeDev { return this.listFiles(toolInput.path, toolInput.recursive) case "list_code_definition_names": return this.listCodeDefinitionNames(toolInput.path) + case "search_files": + return this.searchFiles(toolInput.path, toolInput.regex, toolInput.filePattern) case "execute_command": return this.executeCommand(toolInput.command) case "ask_followup_question": @@ -1041,6 +1070,59 @@ export class ClaudeDev { } } + async searchFiles(relDirPath: string, regex: string, filePattern?: string): Promise { + if (relDirPath === undefined) { + await this.say( + "error", + "Claude tried to use search_files without value for required parameter 'path'. Retrying..." + ) + return "Error: Missing value for required parameter 'path'. Please retry with complete response." + } + + if (regex === undefined) { + await this.say( + "error", + `Claude tried to use search_files without value for required parameter 'regex'. Retrying...` + ) + return "Error: Missing value for required parameter 'regex'. Please retry with complete response." + } + + try { + const absolutePath = path.resolve(cwd, relDirPath) + const results = await regexSearchFiles(cwd, absolutePath, regex, filePattern) + + const message = JSON.stringify({ + tool: "searchFiles", + path: this.getReadablePath(relDirPath), + regex: regex, + filePattern: filePattern, + content: results, + } as ClaudeSayTool) + + if (this.alwaysAllowReadOnly) { + await this.say("tool", message) + } else { + const { response, text, images } = await this.ask("tool", message) + if (response !== "yesButtonTapped") { + if (response === "messageResponse") { + await this.say("user_feedback", text, images) + return this.formatIntoToolResponse(await this.formatGenericToolFeedback(text), images) + } + return "The user denied this operation." + } + } + + return results + } catch (error) { + const errorString = `Error searching files: ${JSON.stringify(serializeError(error))}` + await this.say( + "error", + `Error searching files:\n${error.message ?? JSON.stringify(serializeError(error), null, 2)}` + ) + return errorString + } + } + async executeCommand(command?: string, returnEmptyStringOnSuccess: boolean = false): Promise { if (command === undefined) { await this.say( diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 07978b13e..7965196f6 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -65,7 +65,10 @@ export interface ClaudeSayTool { | "listFilesTopLevel" | "listFilesRecursive" | "listCodeDefinitionNames" + | "searchFiles" path?: string diff?: string content?: string + regex?: string + filePattern?: string } diff --git a/src/shared/Tool.ts b/src/shared/Tool.ts index 86b8f5da1..3b446a32a 100644 --- a/src/shared/Tool.ts +++ b/src/shared/Tool.ts @@ -5,6 +5,7 @@ export type ToolName = | "read_file" | "list_files" | "list_code_definition_names" + | "search_files" | "execute_command" | "ask_followup_question" | "attempt_completion" diff --git a/src/utils/ripgrep.ts b/src/utils/ripgrep.ts new file mode 100644 index 000000000..c739e78c1 --- /dev/null +++ b/src/utils/ripgrep.ts @@ -0,0 +1,203 @@ +import * as vscode from "vscode" +import * as childProcess from "child_process" +import * as path from "path" +import * as fs from "fs" + +/* +This file provides functionality to perform regex searches on files using ripgrep. +Inspired by: https://github.com/DiscreteTom/vscode-ripgrep-utils + +Key components: +1. getBinPath: Locates the ripgrep binary within the VSCode installation. +2. execRipgrep: Executes the ripgrep command and returns the output. +3. regexSearchFiles: The main function that performs regex searches on files. + - Parameters: + * cwd: The current working directory (for relative path calculation) + * directoryPath: The directory to search in + * regex: The regular expression to search for (Rust regex syntax) + * filePattern: Optional glob pattern to filter files (default: '*') + - Returns: A formatted string containing search results with context + +The search results include: +- Relative file paths +- 2 lines of context before and after each match +- Matches formatted with pipe characters for easy reading + +Usage example: +const results = await regexSearchFiles('/path/to/cwd', '/path/to/search', 'TODO:', '*.ts'); + +rel/path/to/app.ts +│---- +│function processData(data: any) { +│ // Some processing logic here +│ // TODO: Implement error handling +│ return processedData; +│} +│---- + +rel/path/to/helper.ts +│---- +│ let result = 0; +│ for (let i = 0; i < input; i++) { +│ // TODO: Optimize this function for performance +│ result += Math.pow(i, 2); +│ } +│---- +*/ + +const isWindows = /^win/.test(process.platform) +const binName = isWindows ? "rg.exe" : "rg" + +interface SearchResult { + file: string + line: number + column: number + match: string + beforeContext: string[] + afterContext: string[] +} + +async function getBinPath(vscodeAppRoot: string): Promise { + const checkPath = async (pkgFolder: string) => { + const fullPath = path.join(vscodeAppRoot, pkgFolder, binName) + return (await pathExists(fullPath)) ? fullPath : undefined + } + + return ( + (await checkPath("node_modules/@vscode/ripgrep/bin/")) || + (await checkPath("node_modules/vscode-ripgrep/bin")) || + (await checkPath("node_modules.asar.unpacked/vscode-ripgrep/bin/")) || + (await checkPath("node_modules.asar.unpacked/@vscode/ripgrep/bin/")) + ) +} + +async function pathExists(path: string): Promise { + return new Promise((resolve) => { + fs.access(path, (err) => { + resolve(err === null) + }) + }) +} + +async function execRipgrep(bin: string, args: string[]): Promise { + return new Promise((resolve, reject) => { + const process = childProcess.spawn(bin, args) + let output = "" + let errorOutput = "" + + process.stdout.on("data", (data) => { + output += data.toString() + }) + + process.stderr.on("data", (data) => { + errorOutput += data.toString() + }) + + process.on("close", (code) => { + if (code === 0) { + resolve(output) + } else { + reject(new Error(`ripgrep process exited with code ${code}: ${errorOutput}`)) + } + }) + }) +} + +export async function regexSearchFiles( + cwd: string, + directoryPath: string, + regex: string, + filePattern?: string +): Promise { + const vscodeAppRoot = vscode.env.appRoot + const rgPath = await getBinPath(vscodeAppRoot) + + if (!rgPath) { + throw new Error("Could not find ripgrep binary") + } + + const args = ["--json", "-e", regex, "--glob", filePattern || "*", "--context", "1", directoryPath] + + let output: string + try { + output = await execRipgrep(rgPath, args) + } catch { + return "No results found" + } + const results: SearchResult[] = [] + let currentResult: Partial | null = null + + output.split("\n").forEach((line) => { + if (line) { + try { + const parsed = JSON.parse(line) + if (parsed.type === "match") { + if (currentResult) { + results.push(currentResult as SearchResult) + } + currentResult = { + file: parsed.data.path.text, + line: parsed.data.line_number, + column: parsed.data.submatches[0].start, + match: parsed.data.lines.text, + beforeContext: [], + afterContext: [], + } + } else if (parsed.type === "context" && currentResult) { + if (parsed.data.line_number < currentResult.line!) { + currentResult.beforeContext!.push(parsed.data.lines.text) + } else { + currentResult.afterContext!.push(parsed.data.lines.text) + } + } + } catch (error) { + console.error("Error parsing ripgrep output:", error) + } + } + }) + + if (currentResult) { + results.push(currentResult as SearchResult) + } + + return formatResults(results, cwd) +} + +function formatResults(results: SearchResult[], cwd: string): string { + const groupedResults: { [key: string]: SearchResult[] } = {} + + let output = "" + if (results.length >= 300) { + output += `Showing first 300 of ${results.length.toLocaleString()} results, use a more specific search if necessary...\n\n` + } else { + output += `Found ${results.length.toLocaleString()} results...\n\n` + } + + // Group results by file name + results.slice(0, 300).forEach((result) => { + const relativeFilePath = path.relative(cwd, result.file) + if (!groupedResults[relativeFilePath]) { + groupedResults[relativeFilePath] = [] + } + groupedResults[relativeFilePath].push(result) + }) + + for (const [filePath, fileResults] of Object.entries(groupedResults)) { + output += `${filePath}\n│----\n` + + fileResults.forEach((result, index) => { + const allLines = [...result.beforeContext, result.match, ...result.afterContext] + allLines.forEach((line) => { + output += `│${line?.trimEnd() ?? ""}\n` + }) + + if (index < fileResults.length - 1) { + output += "│----\n" + } + }) + + output += "│----\n\n" + } + + return output.trim() +} diff --git a/webview-ui/src/components/ChatRow.tsx b/webview-ui/src/components/ChatRow.tsx index 8481f9097..29c5007c2 100644 --- a/webview-ui/src/components/ChatRow.tsx +++ b/webview-ui/src/components/ChatRow.tsx @@ -591,6 +591,33 @@ const ChatRow: React.FC = ({ /> ) + case "searchFiles": + return ( + <> +
+ {toolIcon("search")} + + {message.type === "ask" ? ( + <> + Claude wants to search this directory for {tool.regex}: + + ) : ( + <> + Claude searched this directory for {tool.regex}: + + )} + +
+ + + ) default: return null }