豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit 482a288

Browse files
authored
chore: evaluate select_page scenario (#925)
Passes the test with gemini-2.5-flash with no errors. Closes #921
1 parent 6a3ca98 commit 482a288

File tree

2 files changed

+43
-1
lines changed

2 files changed

+43
-1
lines changed

scripts/eval_gemini.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ async function runSingleScenario(
142142
name: request.name,
143143
args: (request.arguments as Record<string, unknown>) || {},
144144
});
145-
return originalCallTool(request, schema);
145+
const response = await originalCallTool(request, schema);
146+
debugLog(`Tool response: ${JSON.stringify(response)}`);
147+
return response;
146148
};
147149

148150
const ai = new GoogleGenAI({apiKey});
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import assert from 'node:assert';
8+
9+
import type {TestScenario} from '../eval_gemini.ts';
10+
11+
export const scenario: TestScenario = {
12+
prompt:
13+
'Open new page <TEST_URL> and then open new page https://developers.chrome.com. Select the <TEST_URL> page.',
14+
maxTurns: 3,
15+
htmlRoute: {
16+
path: '/test.html',
17+
htmlContent: `
18+
<h1>test</h1>
19+
`,
20+
},
21+
expectations: calls => {
22+
assert.strictEqual(calls.length, 3);
23+
assert.ok(calls[0].name === 'new_page', 'First call should be navigation');
24+
assert.ok(calls[1].name === 'new_page', 'Second call should be navigation');
25+
assert.ok(
26+
calls[2].name === 'select_page',
27+
'Third call should be select_page',
28+
);
29+
assert.strictEqual(
30+
calls[2].args.pageId,
31+
2,
32+
'PageId has to be set to 2. about:blank is 1, <TEST_URL> is 2, https://developers.chrome.com is 3.',
33+
);
34+
assert.strictEqual(
35+
calls[2].args.bringToFront,
36+
undefined,
37+
'bringToFront should use the default value.',
38+
);
39+
},
40+
};

0 commit comments

Comments
 (0)