豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/extension/conversation/vscode-node/feedbackReporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,12 @@ export class FeedbackReporter extends Disposable implements IFeedbackReporter {
"rankResultsCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Count of the results from copilot search ranking." },
"combinedResultsCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Count of combined results from copilot search." },
"chunkSearchDuration": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Duration of the chunk search" },
"llmFilteringDuration": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Duration of the LLM filtering" },
"strategy": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Indicates the strategy used for the search." }
"llmFilteringDuration": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Duration of the LLM filtering" }
}
*/
this.telemetryService.sendMSFTTelemetryEvent('copilot.search.feedback', {
kind,
rankResult: SemanticSearchTextSearchProvider.feedBackTelemetry.rankResult,
strategy: SemanticSearchTextSearchProvider.feedBackTelemetry.strategy,
}, {
chunkCount: SemanticSearchTextSearchProvider.feedBackTelemetry.chunkCount,
rankResultsCount: SemanticSearchTextSearchProvider.feedBackTelemetry.rankResultsCount,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export interface ISearchFeedbackTelemetry {
llmSelectedCount?: number;
rawLlmRankingResultsCount?: number;
parseResult?: string;
strategy?: string;

llmBestInRerank?: number;
llmWorstInRerank?: number;
}
Expand Down Expand Up @@ -170,7 +170,6 @@ export class SemanticSearchTextSearchProvider implements vscode.AITextSearchProv
);
SemanticSearchTextSearchProvider.feedBackTelemetry.chunkSearchDuration = Date.now() - chunkSearchDuration;
SemanticSearchTextSearchProvider.feedBackTelemetry.chunkCount = result.chunks.length;
SemanticSearchTextSearchProvider.feedBackTelemetry.strategy = result.strategy;
this.treeSitterAIKeywords(query, progress, result.chunks.map(chunk => chunk.chunk), token);

const chunkResults = result.chunks.map(c => c.chunk);
Expand Down Expand Up @@ -317,15 +316,13 @@ export class SemanticSearchTextSearchProvider implements vscode.AITextSearchProv
"llmSelectedCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of chunks selected by LLM from the initial retrieval." },
"rawLlmRankingResultsCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Number of raw results returned by the LLM." },
"parseResult": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Indicates the result of parsing the LLM response." },
"strategy": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Indicates the strategy used for the search." },
"llmBestInRerank": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Best rank (lowest index) among LLM-selected chunks in the reranked results." },
"llmWorstInRerank": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Worst rank (highest index) among LLM-selected chunks in the reranked results." }
}
*/
this._telemetryService.sendMSFTTelemetryEvent('copilot.search.request', {
rankResult: SemanticSearchTextSearchProvider.feedBackTelemetry.rankResult,
parseResult: SemanticSearchTextSearchProvider.feedBackTelemetry.parseResult,
strategy: SemanticSearchTextSearchProvider.feedBackTelemetry.strategy,
}, {
chunkCount: SemanticSearchTextSearchProvider.feedBackTelemetry.chunkCount,
rankResultsCount: SemanticSearchTextSearchProvider.feedBackTelemetry.rankResultsCount,
Expand Down
38 changes: 0 additions & 38 deletions src/platform/workspaceChunkSearch/common/workspaceChunkSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import type * as vscode from 'vscode';
import { GlobIncludeOptions } from '../../../util/common/glob';
import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
import { FileChunkAndScore } from '../../chunking/common/chunk';
import { Embedding } from '../../embeddings/common/embeddingsComputer';
Expand All @@ -22,14 +21,6 @@ export interface WorkspaceChunkQueryWithEmbeddings extends WorkspaceChunkQuery {
resolveQueryEmbeddings(token: CancellationToken): Promise<Embedding>;
}

/**
* Internal ids used to identify strategies in telemetry.
*/
export enum WorkspaceChunkSearchStrategyId {
Embeddings = 'ada',// Do not change value as it's used for telemetry
CodeSearch = 'codesearch',
}

/**
* Sizing hints for the search strategy.
*/
Expand All @@ -53,32 +44,3 @@ export type WorkspaceSearchAlert =
| vscode.ChatResponseWarningPart
| vscode.ChatResponseCommandButtonPart
| vscode.ChatResponseMarkdownPart;

export interface IWorkspaceChunkSearchStrategy {
readonly id: WorkspaceChunkSearchStrategyId;

/**
* Invoked before the search is performed.
*
* This can be used to prompt the user or perform other actions.
*
* Unlike time spent in `searchWorkspace`, this method will not count towards timeouts
*/
prepareSearchWorkspace?(
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken,
): Promise<void>;

/**
* Takes search queries and returns the chunks of text that are most semantically similar to any of the queries.
*
* @return Either the result (which may have zero chunks) or undefined if the search could not be performed.
*/
searchWorkspace(
sizing: StrategySearchSizing,
query: WorkspaceChunkQueryWithEmbeddings,
options: WorkspaceChunkSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken
): Promise<StrategySearchResult | undefined>;
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ import { isGitHubRemoteRepository } from '../../../remoteRepositories/common/uti
import { IExperimentationService } from '../../../telemetry/common/nullExperimentationService';
import { ITelemetryService } from '../../../telemetry/common/telemetry';
import { IWorkspaceService } from '../../../workspace/common/workspaceService';
import { IWorkspaceChunkSearchStrategy, StrategySearchResult, StrategySearchSizing, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions, WorkspaceChunkSearchStrategyId } from '../../common/workspaceChunkSearch';
import { StrategySearchResult, StrategySearchSizing, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions } from '../../common/workspaceChunkSearch';
import { EmbeddingsChunkSearch } from '../embeddingsChunkSearch';

import { WorkspaceChunkEmbeddingsIndex } from '../workspaceChunkEmbeddingsIndex';
Expand Down Expand Up @@ -87,9 +87,7 @@ interface AvailableFailureMetadata {
* ChunkSearch strategy that first calls the Github code search API to get a context window of files that are similar to the query.
* Then it uses the embeddings index to find the most similar chunks in the context window.
*/
export class CodeSearchChunkSearch extends Disposable implements IWorkspaceChunkSearchStrategy {

readonly id = WorkspaceChunkSearchStrategyId.CodeSearch;
export class CodeSearchChunkSearch extends Disposable {

/**
* Maximum number of files that have changed from what code search has indexed.
Expand Down Expand Up @@ -680,7 +678,7 @@ export class CodeSearchChunkSearch extends Disposable implements IWorkspaceChunk
if (diffArray.length <= embeddingsMaxFiles) {
const batchInfo = new ComputeBatchInfo();
const result = await this._embeddingsChunkSearch.searchSubsetOfFiles(sizing, query, diffArray, subSearchOptions, { info: innerTelemetryInfo, batchInfo }, token);
return { ...result, strategyId: this._embeddingsChunkSearch.id, embeddingsComputeInfo: batchInfo };
return { ...result, strategyId: 'localEmbeddings', embeddingsComputeInfo: batchInfo };
} else {
// No way to search out-of-sync files; caller will use code search results alone and warn the user
this._logService.debug(`CodeSearchChunkSearch.searchLocalDiff: ${diffArray.length} out-of-sync files exceeds threshold (${embeddingsMaxFiles}), skipping local diff search`);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { ICodeSearchAuthenticationService } from '../../remoteCodeSearch/node/co
import { ISimulationTestContext } from '../../simulationTestContext/common/simulationTestContext';
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { IWorkspaceChunkSearchStrategy, StrategySearchResult, StrategySearchSizing, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions, WorkspaceChunkSearchStrategyId } from '../common/workspaceChunkSearch';
import { StrategySearchResult, StrategySearchSizing, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions } from '../common/workspaceChunkSearch';
import { BuildIndexTriggerReason } from './codeSearch/codeSearchRepo';
import { WorkspaceChunkEmbeddingsIndex } from './workspaceChunkEmbeddingsIndex';
import { IWorkspaceFileIndex } from './workspaceFileIndex';
Expand All @@ -41,9 +41,7 @@ export enum LocalEmbeddingsIndexStatus {
*
* This can be costly so it is only available for smaller workspaces.
*/
export class EmbeddingsChunkSearch extends Disposable implements IWorkspaceChunkSearchStrategy {

readonly id = WorkspaceChunkSearchStrategyId.Embeddings;
export class EmbeddingsChunkSearch extends Disposable {

/** Max workspace size that will be automatically indexed. */
private static readonly defaultAutomaticIndexingFileCap = 750;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import { StopWatch } from '../../../util/vs/base/common/stopwatch';
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
import { ChatResponseProgressPart2, ChatResponseWarningPart } from '../../../vscodeTypes';
import { IAuthenticationService } from '../../authentication/common/authentication';
import { IAuthenticationChatUpgradeService } from '../../authentication/common/authenticationUpgrade';
import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk';
import { MAX_CHUNK_SIZE_TOKENS } from '../../chunking/node/naiveChunker';
import { distance, Embedding, EmbeddingDistance, Embeddings, EmbeddingType, IEmbeddingsComputer } from '../../embeddings/common/embeddingsComputer';
Expand All @@ -29,14 +28,13 @@ import { logExecTime, LogExecTime } from '../../log/common/logExecTime';
import { ILogService } from '../../log/common/logService';
import { IChatEndpoint } from '../../networking/common/networking';
import { ISimulationTestContext } from '../../simulationTestContext/common/simulationTestContext';
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { getWorkspaceFileDisplayPath, IWorkspaceService } from '../../workspace/common/workspaceService';
import { IGithubAvailableEmbeddingTypesService } from '../common/githubAvailableEmbeddingTypes';
import { IRerankerService } from '../common/rerankerService';
import { StrategySearchResult, StrategySearchSizing, WorkspaceChunkQuery, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions, WorkspaceChunkSearchStrategyId, WorkspaceSearchAlert } from '../common/workspaceChunkSearch';
import { StrategySearchResult, StrategySearchSizing, WorkspaceChunkQuery, WorkspaceChunkQueryWithEmbeddings, WorkspaceChunkSearchOptions, WorkspaceSearchAlert } from '../common/workspaceChunkSearch';
import { CodeSearchChunkSearch, CodeSearchRemoteIndexState } from './codeSearch/codeSearchChunkSearch';
import { BuildIndexTriggerReason, CodeSearchRepoStatus, TriggerIndexingError } from './codeSearch/codeSearchRepo';
import { BuildIndexTriggerReason, TriggerIndexingError } from './codeSearch/codeSearchRepo';
import { IWorkspaceFileIndex } from './workspaceFileIndex';

const maxEmbeddingSpread = 0.65;
Expand All @@ -49,7 +47,6 @@ interface ScoredFileChunk<T extends FileChunk = FileChunk> {
export interface WorkspaceChunkSearchResult {
readonly chunks: readonly FileChunkAndScore[];
readonly alerts?: readonly WorkspaceSearchAlert[];
readonly strategy?: string;
}

export interface WorkspaceChunkSearchSizing {
Expand Down Expand Up @@ -88,17 +85,12 @@ export interface IWorkspaceChunkSearchService extends IDisposable {
}


interface StrategySearchOk {
readonly strategy: WorkspaceChunkSearchStrategyId;
readonly result: StrategySearchResult;
}

interface StrategySearchErr {
readonly errorDiagMessage: string;
alerts?: readonly WorkspaceSearchAlert[];
}

type StrategySearchOutcome = Result<StrategySearchOk, StrategySearchErr>;
type StrategySearchOutcome = Result<StrategySearchResult, StrategySearchErr>;

export class WorkspaceChunkSearchService extends Disposable implements IWorkspaceChunkSearchService {
declare readonly _serviceBrand: undefined;
Expand Down Expand Up @@ -213,9 +205,7 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
constructor(
private readonly _embeddingType: EmbeddingType,
@IInstantiationService instantiationService: IInstantiationService,
@IAuthenticationChatUpgradeService private readonly _authUpgradeService: IAuthenticationChatUpgradeService,
@IEmbeddingsComputer private readonly _embeddingsComputer: IEmbeddingsComputer,
@IExperimentationService private readonly _experimentationService: IExperimentationService,
@IIgnoreService private readonly _ignoreService: IIgnoreService,
@ILogService private readonly _logService: ILogService,
@IRerankerService private readonly _rerankerService: IRerankerService,
Expand All @@ -236,14 +226,6 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
250
)(() => this._onDidChangeIndexState.fire()));

this._register(this._authUpgradeService.onDidGrantAuthUpgrade(() => {
if (this._experimentationService.getTreatmentVariable<boolean>('copilotchat.workspaceChunkSearch.shouldRemoteIndexOnAuthUpgrade') ?? true) {
void this.triggerRemoteIndexing('auto', () => { }, new TelemetryCorrelationId('onDidGrantAuthUpgrade'), CancellationToken.None).catch(e => {
// noop
});
}
}));

/* __GDPR__
"workspaceChunkSearch.created" : {
"owner": "mjbvz",
Expand All @@ -263,12 +245,7 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
}

async isAvailable(): Promise<boolean> {
if (this._experimentationService.getTreatmentVariable<boolean>('copilotchat.workspaceChunkSearch.markAllSearchesSlow')) {
return false;
}

const indexState = await this.getIndexState();
return (indexState.remoteIndexState.status === 'loaded' && indexState.remoteIndexState.repos.length > 0 && indexState.remoteIndexState.repos.every(repo => repo.status === CodeSearchRepoStatus.Ready));
return this._codeSearchChunkSearch.isAvailable(new TelemetryCorrelationId('WorkspaceChunkSearchServiceImpl.isAvailable'), false, CancellationToken.None);
}

triggerRemoteIndexing(trigger: BuildIndexTriggerReason, onProgress: (message: string) => void, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<Result<true, TriggerIndexingError>> {
Expand Down Expand Up @@ -325,7 +302,7 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
}
*/
this._telemetryService.sendMSFTTelemetryEvent('workspaceChunkSearchStrategy', {
strategy: searchResult.isOk() ? searchResult.val.strategy : 'none',
strategy: searchResult.isOk() ? 'codesearch' : 'none', // For backwards compatibility with existing telemetry only
errorDiagMessage: searchResult.isError() ? searchResult.err.errorDiagMessage : undefined,
embeddingType: this._embeddingType.id,
workspaceSearchSource: telemetryInfo.callTracker.toString(),
Expand All @@ -348,9 +325,9 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
};
}

this._logService.trace(`WorkspaceChunkSearch.searchFileChunks: found ${searchResult.val.result.chunks.length} chunks using '${searchResult.val.strategy}'`);
this._logService.trace(`WorkspaceChunkSearch.searchFileChunks: found ${searchResult.val.chunks.length} chunks'`);
Comment thread
mjbvz marked this conversation as resolved.
Outdated

const filteredChunks = await raceCancellationError(this.filterIgnoredChunks(searchResult.val.result.chunks), token);
const filteredChunks = await raceCancellationError(this.filterIgnoredChunks(searchResult.val.chunks), token);
if (this._simulationTestContext.isInSimulationTests) {
if (!filteredChunks.length) {
throw new Error('No chunks returned');
Expand All @@ -360,7 +337,7 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
const filteredResult = {
...searchResult.val,
result: {
alerts: searchResult.val.result.alerts,
alerts: searchResult.val.alerts,
chunks: filteredChunks,
}
};
Comment thread
mjbvz marked this conversation as resolved.
Expand All @@ -372,7 +349,6 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
return {
chunks: reranked.slice(0, this.getMaxChunks(sizing)),
alerts: filteredResult.result.alerts,
strategy: filteredResult.strategy,
};
} catch (e) {
this._logService.error(e, 'Reranker service failed; falling back to local rerank');
Expand Down Expand Up @@ -433,17 +409,14 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh

const result = await raceCancellationError(this._codeSearchChunkSearch.searchWorkspace(sizing, query, options, telemetryInfo, token), token);
if (result) {
return Result.ok<StrategySearchOk>({
strategy: this._codeSearchChunkSearch.id,
result: result,
});
return Result.ok<StrategySearchResult>(result);
}
} catch (e) {
if (isCancellationError(e)) {
throw e;
}

this._logService.error(e, `Error during ${this._codeSearchChunkSearch.id} search`);
this._logService.error(e, `Error during code search chunk search`);
}

return Result.error<StrategySearchErr>({
Expand Down Expand Up @@ -477,13 +450,12 @@ class WorkspaceChunkSearchServiceImpl extends Disposable implements IWorkspaceCh
}

@LogExecTime(self => self._logService, 'WorkspaceChunkSearch::rerankResultIfNeeded')
private async rerankResultIfNeeded(query: WorkspaceChunkQueryWithEmbeddings, result: StrategySearchOk, maxResults: number, telemetryInfo: TelemetryCorrelationId, progress: vscode.Progress<vscode.ChatResponsePart> | undefined, token: CancellationToken): Promise<WorkspaceChunkSearchResult> {
const chunks = result.result.chunks;
private async rerankResultIfNeeded(query: WorkspaceChunkQueryWithEmbeddings, result: StrategySearchResult, maxResults: number, telemetryInfo: TelemetryCorrelationId, progress: vscode.Progress<vscode.ChatResponsePart> | undefined, token: CancellationToken): Promise<WorkspaceChunkSearchResult> {
const chunks = result.chunks;
const orderedChunks = await this.rerankChunks(query, chunks, maxResults, telemetryInfo, progress, token);
return {
chunks: orderedChunks,
alerts: result.result.alerts,
strategy: result.strategy,
alerts: result.alerts,
};
}

Expand Down
Loading