豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit 303a138

Browse files
authored
chore: add more eval scenarios (#780)
1 parent a106fba commit 303a138

File tree

4 files changed

+118
-0
lines changed

4 files changed

+118
-0
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import type {TestScenario} from '../eval_gemini.ts';
9+
10+
export const scenario: TestScenario = {
11+
prompt: 'Navigate to <TEST_URL> and check the console messages.',
12+
maxTurns: 2,
13+
htmlRoute: {
14+
path: '/console_test.html',
15+
htmlContent: `
16+
<script>
17+
console.log('Test log message');
18+
console.error('Test error message');
19+
</script>
20+
`,
21+
},
22+
expectations: calls => {
23+
const navigate = calls.find(
24+
c => c.name === 'navigate_page' || c.name === 'new_page',
25+
);
26+
const listMessages = calls.find(c => c.name === 'list_console_messages');
27+
28+
assert.ok(navigate, 'Should navigate to the page');
29+
assert.ok(listMessages, 'Should list console messages');
30+
},
31+
};
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import type {TestScenario} from '../eval_gemini.ts';
9+
10+
export const scenario: TestScenario = {
11+
prompt: 'Emulate offline network conditions.',
12+
maxTurns: 2,
13+
expectations: calls => {
14+
const emulate = calls.find(c => c.name === 'emulate');
15+
assert.ok(emulate, 'Should call emulate tool');
16+
assert.strictEqual(emulate.args.networkConditions, 'Offline');
17+
},
18+
};
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import type {TestScenario} from '../eval_gemini.ts';
9+
10+
export const scenario: TestScenario = {
11+
prompt:
12+
'Go to <TEST_URL>, fill the input with "hello world" and click the button.',
13+
maxTurns: 3,
14+
htmlRoute: {
15+
path: '/input_test.html',
16+
htmlContent: `
17+
<input type="text" id="test-input" />
18+
<button id="test-button">Submit</button>
19+
`,
20+
},
21+
expectations: calls => {
22+
// Expected sequence: navigate -> fill -> click
23+
// But model might take snapshot in between or do things in parallel if supported (but standard loop is sequential turns usually)
24+
// We just check if the tools were called.
25+
26+
const navigate = calls.find(
27+
c => c.name === 'navigate_page' || c.name === 'new_page',
28+
);
29+
const fill = calls.find(c => c.name === 'fill');
30+
const click = calls.find(c => c.name === 'click');
31+
32+
assert.ok(navigate, 'Should navigate to the page');
33+
assert.ok(fill, 'Should fill the input');
34+
assert.ok(click, 'Should click the button');
35+
36+
assert.strictEqual(fill.args.value, 'hello world');
37+
},
38+
};
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import type {TestScenario} from '../eval_gemini.ts';
9+
10+
export const scenario: TestScenario = {
11+
prompt: 'Navigate to <TEST_URL> and list all network requests.',
12+
maxTurns: 2,
13+
htmlRoute: {
14+
path: '/network_test.html',
15+
htmlContent: `
16+
<h1>Network Test</h1>
17+
<script>
18+
fetch('/network_test.html'); // Self fetch to ensure at least one request
19+
</script>
20+
`,
21+
},
22+
expectations: calls => {
23+
const navigate = calls.find(
24+
c => c.name === 'navigate_page' || c.name === 'new_page',
25+
);
26+
const listRequests = calls.find(c => c.name === 'list_network_requests');
27+
28+
assert.ok(navigate, 'Should navigate to the page');
29+
assert.ok(listRequests, 'Should list network requests');
30+
},
31+
};

0 commit comments

Comments
 (0)