-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathfindTextInFilesTool.tsx
More file actions
385 lines (338 loc) · 17.9 KB
/
findTextInFilesTool.tsx
File metadata and controls
385 lines (338 loc) · 17.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as l10n from '@vscode/l10n';
import { BasePromptElementProps, PromptElement, PromptElementProps, PromptPiece, PromptReference, PromptSizing, TextChunk } from '@vscode/prompt-tsx';
import type * as vscode from 'vscode';
import { IConfigurationService } from '../../../platform/configuration/common/configurationService';
import { OffsetLineColumnConverter } from '../../../platform/editing/common/offsetLineColumnConverter';
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
import { IPromptPathRepresentationService } from '../../../platform/prompts/common/promptPathRepresentationService';
import { ISearchService } from '../../../platform/search/common/searchService';
import { ITelemetryService } from '../../../platform/telemetry/common/telemetry';
import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
import { raceTimeoutAndCancellationError } from '../../../util/common/racePromise';
import { asArray } from '../../../util/vs/base/common/arrays';
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
import { isAbsolute } from '../../../util/vs/base/common/path';
import { count } from '../../../util/vs/base/common/strings';
import { URI } from '../../../util/vs/base/common/uri';
import { Position as EditorPosition } from '../../../util/vs/editor/common/core/position';
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
import { ExcludeSettingOptions, ExtendedLanguageModelToolResult, LanguageModelPromptTsxPart, Location, MarkdownString, Range } from '../../../vscodeTypes';
import { IBuildPromptContext } from '../../prompt/common/intents';
import { renderPromptElementJSON } from '../../prompts/node/base/promptRenderer';
import { Tag } from '../../prompts/node/base/tag';
import { ToolName } from '../common/toolNames';
import { CopilotToolMode, ICopilotTool, ToolRegistry } from '../common/toolsRegistry';
import { checkCancellation, InputGlobResult, inputGlobToPattern, patternContainsWorkspaceFolderPath } from './toolUtils';
interface IFindTextInFilesToolParams {
query: string;
isRegexp?: boolean;
includePattern?: string;
maxResults?: number;
/** Whether to include files that would normally be ignored according to .gitignore, other ignore files and `files.exclude` and `search.exclude` settings. */
includeIgnoredFiles?: boolean;
}
const MaxResultsCap = 200;
export class FindTextInFilesTool implements ICopilotTool<IFindTextInFilesToolParams> {
public static readonly toolName = ToolName.FindTextInFiles;
public static readonly nonDeferred = true;
constructor(
@IInstantiationService private readonly instantiationService: IInstantiationService,
@ISearchService private readonly searchService: ISearchService,
@IWorkspaceService private readonly workspaceService: IWorkspaceService,
@IEndpointProvider private readonly endpointProvider: IEndpointProvider,
@IConfigurationService private readonly configurationService: IConfigurationService,
@ITelemetryService private readonly telemetryService: ITelemetryService,
) { }
async invoke(options: vscode.LanguageModelToolInvocationOptions<IFindTextInFilesToolParams>, token: CancellationToken) {
// TODO strict input validation
// Certain models just really want to pass incorrect input
if ((options.input as unknown as Record<string, string>).pattern) {
throw new Error('The property "pattern" is not supported, please use "query"');
}
const endpoint = options.model && (await this.endpointProvider.getChatEndpoint(options.model));
const modelFamily = endpoint?.family;
// The input _should_ be a pattern matching inside a workspace, folder, but sometimes we get absolute paths, so try to resolve them
const globResult = options.input.includePattern ? inputGlobToPattern(options.input.includePattern, this.workspaceService, modelFamily) : undefined;
const patterns = globResult?.patterns;
void this.sendSearchToolTelemetry(options, globResult);
checkCancellation(token);
const askedForTooManyResults = options.input.maxResults && options.input.maxResults > MaxResultsCap;
const maxResults = Math.min(options.input.maxResults ?? 20, MaxResultsCap);
const isRegExp = options.input.isRegexp ?? true;
const queryIsValidRegex = this.isValidRegex(options.input.query);
const includeIgnoredFiles = options.input.includeIgnoredFiles ?? false;
// try find text with a timeout of 20s
const timeoutInMs = 20_000;
let results = await raceTimeoutAndCancellationError(
(searchToken) => this.searchAndCollectResults(options.input.query, isRegExp, patterns, maxResults, includeIgnoredFiles, searchToken),
token,
timeoutInMs,
// embed message to give LLM hint about what to do next
`Timeout in searching text in files with ${isRegExp ? 'regex' : 'literal'} search, try a more specific search pattern or change regex/literal mode`
);
// If we still have no results, we need to try the opposite regex mode
if (!results.length && queryIsValidRegex) {
results = await raceTimeoutAndCancellationError(
(searchToken) => this.searchAndCollectResults(options.input.query, !isRegExp, patterns, maxResults, includeIgnoredFiles, searchToken),
token,
timeoutInMs,
// embed message to give LLM hint about what to do next
`Find ${results.length} results in searching text in files with ${isRegExp ? 'regex' : 'literal'} search, and then another searching hits timeout in with ${!isRegExp ? 'regex' : 'literal'} search, try a more specific search pattern`
);
}
let noMatchInstructions: string | undefined = undefined;
if (!results.length && !includeIgnoredFiles) {
// Get the search.exclude configuration
const excludeSettings = this.configurationService.getNonExtensionConfig<Record<string, boolean>>('search.exclude');
const excludePaths: string[] = [];
if (excludeSettings) {
for (const [path, isExcluded] of Object.entries(excludeSettings)) {
if (isExcluded) {
excludePaths.push(path);
}
}
}
noMatchInstructions = `Your search pattern might be excluded completely by either the search.exclude settings or .*ignore files.
If you believe that it should have results, you can check into the .*ignore files and the exclude setting (here are some excluded patterns for reference:[${excludePaths.join(',')}]).
Then if you want to include those files you can call the tool again by setting "includeIgnoredFiles" to true.`;
}
const prompt = await renderPromptElementJSON(this.instantiationService,
FindTextInFilesResult,
{ textResults: results, maxResults, askedForTooManyResults: Boolean(askedForTooManyResults), noMatchInstructions },
options.tokenizationOptions,
token);
const result = new ExtendedLanguageModelToolResult([new LanguageModelPromptTsxPart(prompt)]);
const textMatches = results.flatMap(r => {
if ('ranges' in r) {
return asArray(r.ranges).map(rangeInfo => new Location(r.uri, rangeInfo.sourceRange));
}
return [];
}).slice(0, maxResults);
const query = this.formatQueryString(options.input, globResult);
result.toolResultMessage = this.getResultMessage(isRegExp, query, textMatches.length);
result.toolResultDetails = textMatches;
return result;
}
private async sendSearchToolTelemetry(options: vscode.LanguageModelToolInvocationOptions<IFindTextInFilesToolParams>, globResult: InputGlobResult | undefined): Promise<void> {
const model = options.model && (await this.endpointProvider.getChatEndpoint(options.model)).model;
const isMultiRoot = this.workspaceService.getWorkspaceFolders().length > 1;
const includePattern = options.input.includePattern;
/* __GDPR__
"findTextInFilesToolInvoked" : {
"owner": "roblourens",
"comment": "Telemetry for the findTextInFiles tool in multi-root workspaces",
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The id of the current request turn." },
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model that invoked the tool" },
"isMultiRoot": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the workspace has multiple root folders" },
"patternScopedToFolder": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the includePattern was resolved to a specific workspace folder" },
"patternStartsWithFolderPath": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the raw includePattern starts with a workspace folder absolute path" },
"patternContainsFolderPath": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the raw includePattern contains a workspace folder absolute path anywhere" }
}
*/
this.telemetryService.sendMSFTTelemetryEvent('findTextInFilesToolInvoked', {
requestId: options.chatRequestId,
model,
isMultiRoot: String(isMultiRoot),
patternScopedToFolder: String(!!globResult?.folderName),
patternStartsWithFolderPath: String(!!includePattern && isAbsolute(includePattern) && !!this.workspaceService.getWorkspaceFolder(URI.file(includePattern))),
patternContainsFolderPath: String(patternContainsWorkspaceFolderPath(includePattern, this.workspaceService)),
});
}
private getResultMessage(isRegExp: boolean, query: string, count: number): MarkdownString {
if (count === 0) {
return isRegExp
? new MarkdownString(l10n.t`Searched for regex ${query}, no results`)
: new MarkdownString(l10n.t`Searched for text ${query}, no results`);
} else if (count === 1) {
return isRegExp
? new MarkdownString(l10n.t`Searched for regex ${query}, 1 result`)
: new MarkdownString(l10n.t`Searched for text ${query}, 1 result`);
} else {
return isRegExp
? new MarkdownString(l10n.t`Searched for regex ${query}, ${count} results`)
: new MarkdownString(l10n.t`Searched for text ${query}, ${count} results`);
}
}
private isValidRegex(pattern: string): boolean {
try {
new RegExp(pattern);
return true;
} catch {
return false;
}
}
private async searchAndCollectResults(query: string, isRegExp: boolean, patterns: vscode.GlobPattern[] | undefined, maxResults: number, includeIgnoredFiles: boolean | undefined, token: CancellationToken): Promise<vscode.TextSearchResult2[]> {
const findOptions: vscode.FindTextInFilesOptions2 = {
include: patterns ? patterns : undefined,
maxResults: maxResults + 1,
useExcludeSettings: includeIgnoredFiles ? ExcludeSettingOptions.None : ExcludeSettingOptions.SearchAndFilesExclude,
useIgnoreFiles: includeIgnoredFiles ? { local: false, parent: false, global: false } : undefined,
caseInsensitive: true,
};
const searchResult = this.searchService.findTextInFiles2(
{
pattern: query,
isRegExp,
},
findOptions,
token);
const results: vscode.TextSearchResult2[] = [];
for await (const item of searchResult.results) {
checkCancellation(token);
results.push(item);
}
// Necessary in case it was rejected
await searchResult.complete;
return results;
}
prepareInvocation(options: vscode.LanguageModelToolInvocationPrepareOptions<IFindTextInFilesToolParams>, token: vscode.CancellationToken): vscode.ProviderResult<vscode.PreparedToolInvocation> {
const isRegExp = options.input.isRegexp ?? true;
const globResult = options.input.includePattern ? inputGlobToPattern(options.input.includePattern, this.workspaceService, undefined) : undefined;
const query = this.formatQueryString(options.input, globResult);
return {
invocationMessage: isRegExp ?
new MarkdownString(l10n.t`Searching for regex ${query}`) :
new MarkdownString(l10n.t`Searching for text ${query}`),
};
}
/**
* Formats text as a Markdown inline code span that is resilient to backticks within the text.
* It chooses a backtick fence one longer than the longest run of backticks in the content,
* and pads with a space when the content begins or ends with a backtick as per CommonMark.
*/
private formatCodeSpan(text: string): string {
const matches = text.match(/`+/g);
const maxRun = matches ? matches.reduce((m, s) => Math.max(m, s.length), 0) : 0;
const fence = '`'.repeat(maxRun + 1);
const needsPadding = text.startsWith('`') || text.endsWith('`');
const inner = needsPadding ? ` ${text} ` : text;
return `${fence}${inner}${fence}`;
}
private formatQueryString(input: IFindTextInFilesToolParams, globResult?: InputGlobResult): string {
const querySpan = this.formatCodeSpan(input.query);
if (globResult?.folderName) {
if (globResult.folderRelativePattern && globResult.folderRelativePattern !== '**') {
return `${querySpan} (\`${globResult.folderName}\` \u00B7 ${this.formatCodeSpan(globResult.folderRelativePattern)})`;
}
return `${querySpan} (\`${globResult.folderName}\`)`;
}
if (input.includePattern && input.includePattern !== '**/*') {
const patternSpan = this.formatCodeSpan(input.includePattern);
return `${querySpan} (${patternSpan})`;
}
return querySpan;
}
async resolveInput(input: IFindTextInFilesToolParams, _promptContext: IBuildPromptContext, mode: CopilotToolMode): Promise<IFindTextInFilesToolParams> {
let includePattern = input.includePattern;
if (includePattern === '**') {
includePattern = undefined;
}
if (includePattern && !includePattern.startsWith('**/') && !includePattern.startsWith('/') && !includePattern.includes(':')) {
includePattern = `**/${includePattern}`;
}
if (includePattern && includePattern.endsWith('/')) {
includePattern = `${includePattern}**`;
}
return {
maxResults: mode === CopilotToolMode.FullContext ? 200 : 20,
...input,
includePattern,
};
}
}
ToolRegistry.registerTool(FindTextInFilesTool);
export interface FindTextInFilesResultProps extends BasePromptElementProps {
textResults: vscode.TextSearchResult2[];
maxResults: number;
askedForTooManyResults?: boolean;
noMatchInstructions?: string;
}
/** Max number of characters between matching ranges. */
const MAX_CHARS_BETWEEN_MATCHES = 500;
/** Start priority for findFiles lines so that context is gradually trimmed. */
const FIND_FILES_START_PRIORITY = 1000;
export class FindTextInFilesResult extends PromptElement<FindTextInFilesResultProps> {
override async render(state: void, sizing: PromptSizing): Promise<PromptPiece> {
const textMatches = this.props.textResults.filter(isTextSearchMatch);
if (textMatches.length === 0) {
const noMatchInstructions = this.props.noMatchInstructions ?? '';
return <>No matches found.{noMatchInstructions}</>;
}
const numResults = textMatches.reduce((acc, result) => acc + result.ranges.length, 0);
const resultCountToDisplay = Math.min(numResults, this.props.maxResults);
const numResultsText = numResults === 1 ? '1 match' : `${resultCountToDisplay} matches`;
const maxResultsText = numResults > this.props.maxResults ? ` (more results are available)` : '';
const maxResultsTooLargeText = this.props.askedForTooManyResults ? ` (maxResults capped at ${MaxResultsCap})` : '';
return <>
{<TextChunk priority={20}>{numResultsText}{maxResultsText}{maxResultsTooLargeText}</TextChunk>}
{textMatches.flatMap(result => {
// The result preview line always ends in a newline, I think that makes sense but don't display an extra empty line
const previewText = result.previewText.replace(/\n$/, '');
return result.ranges.map((rangeInfo, i) => {
return <FindMatch
passPriority
preview={previewText}
rangeInPreview={rangeInfo.previewRange}
rangeInDocument={rangeInfo.sourceRange}
uri={result.uri}
/>;
});
})}
</>;
}
}
interface IFindMatchProps extends BasePromptElementProps {
preview: string;
rangeInPreview: Range;
rangeInDocument: Range;
uri: URI;
}
/**
* 1. Removes excessive extra character data from the match, e.g. avoiding
* giant minified lines
* 2. Wraps the match in a <match> tag
* 3. Prioritizes lines in the middle of the match where the range lies
*/
export class FindMatch extends PromptElement<IFindMatchProps> {
constructor(
props: PromptElementProps<IFindMatchProps>,
@IPromptPathRepresentationService private readonly promptPathRepresentationService: IPromptPathRepresentationService,
) {
super(props);
}
override render(): PromptPiece {
const { uri, preview, rangeInDocument, rangeInPreview } = this.props;
const convert = new OffsetLineColumnConverter(preview);
const start = convert.positionToOffset(new EditorPosition(rangeInPreview.start.line + 1, rangeInPreview.start.character + 1));
const end = convert.positionToOffset(new EditorPosition(rangeInPreview.end.line + 1, rangeInPreview.end.character + 1));
let toPreview = preview;
let lineStartsAt = (rangeInDocument.start.line + 1) - count(preview.slice(0, start), '\n');
if (preview.length - end > MAX_CHARS_BETWEEN_MATCHES) {
toPreview = preview.slice(0, end + MAX_CHARS_BETWEEN_MATCHES) + '...';
}
if (start > MAX_CHARS_BETWEEN_MATCHES) {
lineStartsAt += count(preview.slice(0, start - MAX_CHARS_BETWEEN_MATCHES), '\n');
toPreview = '...' + toPreview.slice(start - MAX_CHARS_BETWEEN_MATCHES);
}
const toPreviewLines = toPreview.split('\n');
const center = Math.floor(toPreviewLines.length / 2);
return <Tag name='match' attrs={{
path: this.promptPathRepresentationService.getFilePath(uri),
line: rangeInDocument.start.line + 1,
}}>
<references value={[new PromptReference(new Location(this.props.uri, rangeInDocument), undefined, { isFromTool: true })]} />
{toPreviewLines.map((line, i) =>
<TextChunk priority={FIND_FILES_START_PRIORITY - Math.abs(i - center)}>
{line}
</TextChunk>
)}
</Tag>;
}
}
export function isTextSearchMatch(obj: vscode.TextSearchResult2): obj is vscode.TextSearchMatch2 {
return 'ranges' in obj;
}