豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit 0298b57

Browse files
committed
chore: add basic eval
1 parent 002a2ba commit 0298b57

File tree

7 files changed

+344
-1
lines changed

7 files changed

+344
-1
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ yarn-error.log*
77
lerna-debug.log*
88
.pnpm-debug.log*
99

10+
trace.json
11+
trace.json.gz
12+
1013
# Diagnostic reports (https://nodejs.org/api/report.html)
1114
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
1215

GEMINI.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,12 @@
44
- Use `npm run build` to run tsc and test build.
55
- Use `npm run test` to build and run tests, run all tests to verify correctness.
66
- use `npm run test path-to-test.ts` to build and run a single test file, for example, `npm run test tests/McpContext.test.ts`.
7+
8+
## Rules for TypeScript
9+
10+
- Do not use `any` type.
11+
- Do not use `as` keyword for type casting.
12+
- Do not use `!` operator for type assertion.
13+
- Do not use `// @ts-ignore` comments.
14+
- Do not use `// @ts-nocheck` comments.
15+
- Do not use `// @ts-expect-error` comments.

package-lock.json

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
"test:only": "npm run build && node scripts/test.mjs --test-only",
2222
"test:update-snapshots": "npm run build && node scripts/test.mjs --test-update-snapshots",
2323
"prepare": "node --experimental-strip-types scripts/prepare.ts",
24-
"verify-server-json-version": "node --experimental-strip-types scripts/verify-server-json-version.ts"
24+
"verify-server-json-version": "node --experimental-strip-types scripts/verify-server-json-version.ts",
25+
"eval": "npm run build && node --experimental-strip-types --test scripts/eval_gemini.ts"
2526
},
2627
"files": [
2728
"build/src",
@@ -39,6 +40,7 @@
3940
"mcpName": "io.github.ChromeDevTools/chrome-devtools-mcp",
4041
"devDependencies": {
4142
"@eslint/js": "^9.35.0",
43+
"@google/generative-ai": "^0.24.1",
4244
"@modelcontextprotocol/sdk": "1.25.2",
4345
"@rollup/plugin-commonjs": "^29.0.0",
4446
"@rollup/plugin-json": "^6.1.0",

scripts/eval_gemini.ts

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import fs from 'node:fs';
8+
import path from 'node:path';
9+
import {describe, test} from 'node:test';
10+
import {fileURLToPath} from 'node:url';
11+
12+
import {
13+
GoogleGenerativeAI,
14+
type FunctionDeclaration,
15+
SchemaType,
16+
} from '@google/generative-ai';
17+
import {Client} from '@modelcontextprotocol/sdk/client/index.js';
18+
import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js';
19+
20+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
21+
const ROOT_DIR = path.resolve(__dirname, '..');
22+
const SCENARIOS_DIR = path.join(__dirname, 'eval_scenarios');
23+
24+
// Define schema for our test scenarios
25+
export interface CapturedFunctionCall {
26+
name: string;
27+
args: Record<string, unknown>;
28+
}
29+
30+
interface TestScenario {
31+
prompt: string;
32+
maxTurns: number;
33+
expectations?: (calls: CapturedFunctionCall[]) => void;
34+
}
35+
36+
async function loadScenario(scenarioPath: string): Promise<TestScenario> {
37+
// Dynamic import of the test scenario
38+
// We assume the test file exports a 'scenario' object
39+
const module = await import(scenarioPath);
40+
if (!module.scenario) {
41+
throw new Error(
42+
`Scenario file ${scenarioPath} does not export a 'scenario' object.`,
43+
);
44+
}
45+
return module.scenario;
46+
}
47+
48+
async function runSingleScenario(
49+
scenarioPath: string,
50+
apiKey: string,
51+
): Promise<void> {
52+
const absolutePath = path.resolve(scenarioPath);
53+
console.log(`\n### Running Scenario: ${absolutePath} ###`);
54+
55+
let client: Client | undefined;
56+
let transport: StdioClientTransport | undefined;
57+
58+
try {
59+
const scenario = await loadScenario(absolutePath);
60+
61+
// Path to the compiled MCP server
62+
const serverPath = path.join(ROOT_DIR, 'build/src/index.js');
63+
if (!fs.existsSync(serverPath)) {
64+
throw new Error(
65+
`MCP server not found at ${serverPath}. Please run 'npm run build' first.`,
66+
);
67+
}
68+
69+
// Environment variables
70+
const env: Record<string, string> = {};
71+
Object.entries(process.env).forEach(([key, value]) => {
72+
if (value !== undefined) {
73+
env[key] = value;
74+
}
75+
});
76+
77+
transport = new StdioClientTransport({
78+
command: 'node',
79+
args: [serverPath],
80+
env,
81+
});
82+
83+
client = new Client(
84+
{name: 'gemini-eval-client', version: '1.0.0'},
85+
{capabilities: {}},
86+
);
87+
88+
await client.connect(transport);
89+
90+
const toolsResult = await client.listTools();
91+
const mcpTools = toolsResult.tools;
92+
93+
// Helper to sanitize schema for Gemini
94+
function isRecord(v: unknown): v is Record<string, unknown> {
95+
return typeof v === 'object' && v !== null && !Array.isArray(v);
96+
}
97+
98+
const cleanSchemaRecursive = (schema: unknown): unknown => {
99+
if (!isRecord(schema)) {
100+
return schema;
101+
}
102+
103+
const out: Record<string, unknown> = {};
104+
for (const key in schema) {
105+
if (
106+
key === 'default' ||
107+
key === 'additionalProperties' ||
108+
key === 'exclusiveMinimum'
109+
) {
110+
continue;
111+
}
112+
113+
const value = schema[key];
114+
if (Array.isArray(value)) {
115+
out[key] = value.map(cleanSchemaRecursive);
116+
} else if (isRecord(value)) {
117+
out[key] = cleanSchemaRecursive(value);
118+
} else {
119+
out[key] = value;
120+
}
121+
}
122+
return out;
123+
};
124+
125+
// Convert MCP tools to Gemini function declarations
126+
const functionDeclarations: FunctionDeclaration[] = mcpTools.map(tool => ({
127+
name: tool.name.replace(/-/g, '_').replace(/\./g, '_'), // Sanitize name for Gemini
128+
description: tool.description?.substring(0, 1024) || '',
129+
parameters: cleanSchemaRecursive({
130+
type: SchemaType.OBJECT,
131+
properties:
132+
isRecord(tool.inputSchema) && 'properties' in tool.inputSchema
133+
? tool.inputSchema.properties
134+
: {},
135+
required:
136+
isRecord(tool.inputSchema) &&
137+
'required' in tool.inputSchema &&
138+
Array.isArray(tool.inputSchema.required)
139+
? tool.inputSchema.required
140+
: [],
141+
}) as FunctionDeclaration['parameters'],
142+
}));
143+
144+
// Keep a map of sanitized names to original names for execution
145+
const contentToolsMap = new Map<string, string>();
146+
mcpTools.forEach(tool => {
147+
const sanitized = tool.name.replace(/-/g, '_').replace(/\./g, '_');
148+
contentToolsMap.set(sanitized, tool.name);
149+
});
150+
151+
const genAI = new GoogleGenerativeAI(apiKey);
152+
const model = genAI.getGenerativeModel({
153+
model: 'gemini-3-pro-preview',
154+
tools: [{functionDeclarations}],
155+
});
156+
157+
const chat = model.startChat({
158+
systemInstruction: {
159+
role: 'system',
160+
parts: [{text: `Use available tools.`}],
161+
},
162+
});
163+
164+
const expectations = scenario.expectations;
165+
const allCalls: CapturedFunctionCall[] = [];
166+
167+
// Execute turns
168+
let turnCount = 0;
169+
console.log(`\n--- Turn 1 (User) ---`);
170+
console.log(scenario.prompt);
171+
172+
let result = await chat.sendMessage(scenario.prompt);
173+
let response = result.response;
174+
175+
while (turnCount < scenario.maxTurns) {
176+
turnCount++;
177+
console.log(`\n--- Turn ${turnCount} (Model) ---`);
178+
const text = response.text();
179+
if (text) {
180+
console.log(`Text: ${text}`);
181+
}
182+
183+
const functionCalls = response.functionCalls();
184+
if (functionCalls && functionCalls.length > 0) {
185+
console.log(
186+
`Function Calls: ${JSON.stringify(functionCalls, null, 2)}`,
187+
);
188+
189+
const functionResponses = [];
190+
for (const call of functionCalls) {
191+
const originalName = contentToolsMap.get(call.name);
192+
if (!originalName) {
193+
console.error(`Unknown tool called: ${call.name}`);
194+
functionResponses.push({
195+
functionResponse: {
196+
name: call.name,
197+
response: {error: `Unknown tool: ${call.name}`},
198+
},
199+
});
200+
continue;
201+
}
202+
203+
const safeArgs = isRecord(call.args) ? call.args : {};
204+
205+
console.log(
206+
`Executing tool: ${originalName} with args: ${JSON.stringify(call.args)}`,
207+
);
208+
209+
allCalls.push({
210+
name: originalName,
211+
args: safeArgs,
212+
});
213+
214+
try {
215+
const toolResult = await client.callTool({
216+
name: originalName,
217+
arguments: safeArgs,
218+
});
219+
220+
functionResponses.push({
221+
functionResponse: {
222+
name: call.name,
223+
response: {name: call.name, content: toolResult},
224+
},
225+
});
226+
} catch (e) {
227+
const errorMessage = e instanceof Error ? e.message : String(e);
228+
console.error(`Error executing tool ${originalName}:`, e);
229+
functionResponses.push({
230+
functionResponse: {
231+
name: call.name,
232+
response: {error: errorMessage},
233+
},
234+
});
235+
}
236+
}
237+
238+
// Send tool results back
239+
console.log(`Sending ${functionResponses.length} tool outputs back...`);
240+
result = await chat.sendMessage(functionResponses);
241+
response = result.response;
242+
} else {
243+
console.log('No tool calls. Interaction finished.');
244+
break;
245+
}
246+
}
247+
248+
console.log('\nVerifying expectations...');
249+
if (expectations) {
250+
expectations(allCalls);
251+
}
252+
} finally {
253+
if (client) {
254+
await client.close();
255+
}
256+
if (transport) {
257+
await transport.close();
258+
}
259+
}
260+
}
261+
262+
void describe('Gemini Eval Scenarios', () => {
263+
const apiKey = process.env.GEMINI_API_KEY;
264+
265+
const files = fs.readdirSync(SCENARIOS_DIR).filter(file => {
266+
return file.endsWith('.ts') || file.endsWith('.js');
267+
});
268+
269+
for (const file of files) {
270+
void test(file, async () => {
271+
if (!apiKey) {
272+
throw new Error('GEMINI_API_KEY environment variable is required.');
273+
}
274+
await runSingleScenario(path.join(SCENARIOS_DIR, file), apiKey);
275+
});
276+
}
277+
});
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import {type CapturedFunctionCall} from '../eval_gemini.ts';
9+
10+
export const scenario = {
11+
prompt: 'Navigate to https://developers.chrome.com and tell me if it worked.',
12+
maxTurns: 1,
13+
expectations: (calls: CapturedFunctionCall[]) => {
14+
assert.deepStrictEqual(calls, [
15+
{
16+
name: 'navigate_page',
17+
args: {url: 'https://developers.chrome.com'},
18+
},
19+
]);
20+
},
21+
};
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import {type CapturedFunctionCall} from '../eval_gemini.ts';
9+
10+
export const scenario = {
11+
prompt: 'Check the performance of https://developers.chrome.com',
12+
maxTurns: 2,
13+
expectations: (calls: CapturedFunctionCall[]) => {
14+
assert.strictEqual(calls.length, 2);
15+
assert.ok(
16+
calls[0].name === 'navigate_page' || calls[0].name === 'new_page',
17+
);
18+
assert.ok(calls[1].name === 'performance_start_trace');
19+
},
20+
};

0 commit comments

Comments
 (0)