豆豆友情提示:这是一个非官方 GitHub 代理镜像,主要用于网络测试或访问加速。请勿在此进行登录、注册或处理任何敏感信息。进行这些操作请务必访问官方网站 github.com。 Raw 内容也通过此代理提供。
Skip to content

Commit caf601a

Browse files
authored
feat: add pageId routing for parallel multi-agent workflows (#1022)
## Summary Adds optional `pageId` routing to page-scoped tools, gated behind `--experimental-page-id-routing`. When enabled, multi-agent callers can target a specific page without relying on global selection state. Fully backward-compatible: without the flag, behavior is unchanged. ### Key changes - **`pageScoped` annotation**: tools declare `pageScoped: true`; the server merges `pageId` into their schema at registration time (when the flag is on) - **`McpPage` wrapper**: consolidates per-page state (numeric id, isolated context name, focus tracking) into a single class - **Request-scoped page routing**: `resolvePageById()` resolves the target page, `setRequestPage()` threads it through the handler so tools like `getSelectedPage()` see the right page - **`assertPageIsFocused`**: keyboard/input tools validate that the target page holds browser focus, returning an actionable error ("call select_page first") instead of silently dispatching to the wrong page - **`--experimental-page-id-routing` CLI flag** (hidden): gates schema injection and request-scoped routing so the feature can be tested before graduating - **Eval scenarios**: `page_id_routing_test` and `page_focus_keyboard_test` with `serverArgs` support in the eval harness Addresses #1019
1 parent e4db250 commit caf601a

24 files changed

+1517
-225
lines changed

scripts/eval_gemini.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ export interface TestScenario {
3333
path: string;
3434
htmlContent: string;
3535
};
36+
/** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */
37+
serverArgs?: string[];
3638
}
3739

3840
async function loadScenario(scenarioPath: string): Promise<TestScenario> {
@@ -117,6 +119,9 @@ async function runSingleScenario(
117119
if (!debug) {
118120
args.push('--headless');
119121
}
122+
if (scenario.serverArgs) {
123+
args.push(...scenario.serverArgs);
124+
}
120125

121126
transport = new StdioClientTransport({
122127
command: 'node',
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import assert from 'node:assert';
8+
9+
import type {TestScenario} from '../eval_gemini.ts';
10+
11+
export const scenario: TestScenario = {
12+
serverArgs: ['--experimental-page-id-routing'],
13+
prompt: `Open two pages in the same isolated context "session":
14+
- Page 1 at data:text/html,<textarea id="ta"></textarea>
15+
- Page 2 at data:text/html,<h1>Other</h1>
16+
17+
Now use the press_key tool to type "a" on Page 1 without selecting it first. You must use press_key, not fill or type_text. If you encounter any errors, recover from them.`,
18+
maxTurns: 10,
19+
expectations: calls => {
20+
// Should open 2 pages in the same context.
21+
const newPages = calls.filter(c => c.name === 'new_page');
22+
assert.strictEqual(newPages.length, 2, 'Should open 2 pages');
23+
assert.strictEqual(newPages[0].args.isolatedContext, 'session');
24+
assert.strictEqual(newPages[1].args.isolatedContext, 'session');
25+
26+
// Should attempt press_key at least once.
27+
const pressKeys = calls.filter(c => c.name === 'press_key');
28+
assert.ok(pressKeys.length >= 1, 'Should attempt press_key at least once');
29+
30+
const selectPages = calls.filter(c => c.name === 'select_page');
31+
32+
if (selectPages.length > 0) {
33+
const firstPressKeyIndex = calls.indexOf(pressKeys[0]);
34+
const firstSelectPageIndex = calls.indexOf(selectPages[0]);
35+
36+
if (firstPressKeyIndex < firstSelectPageIndex) {
37+
// Error path: press_key was attempted first and failed.
38+
// Verify recovery: must have a second press_key after select_page.
39+
assert.ok(
40+
pressKeys.length >= 2,
41+
'Should retry press_key after error recovery',
42+
);
43+
const lastPressKeyIndex = calls.lastIndexOf(pressKeys.at(-1)!);
44+
assert.ok(
45+
firstSelectPageIndex < lastPressKeyIndex,
46+
'select_page should precede the successful press_key',
47+
);
48+
} else {
49+
// Proactive path: model selected page first.
50+
assert.ok(
51+
firstSelectPageIndex < firstPressKeyIndex,
52+
'select_page should precede press_key',
53+
);
54+
}
55+
}
56+
// If no select_page was called, the model found another recovery path.
57+
// This is acceptable as long as press_key was attempted.
58+
},
59+
};
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import assert from 'node:assert';
8+
9+
import type {TestScenario} from '../eval_gemini.ts';
10+
11+
export const scenario: TestScenario = {
12+
serverArgs: ['--experimental-page-id-routing'],
13+
prompt: `Open two new pages in isolated contexts:
14+
- Page A (isolatedContext "contextA") at data:text/html,<button>Click A</button>
15+
- Page B (isolatedContext "contextB") at data:text/html,<button>Click B</button>
16+
Then take a snapshot of Page A, take a snapshot of Page B, and then click the button on Page A.`,
17+
maxTurns: 12,
18+
expectations: calls => {
19+
// Should have 2 new_page calls with isolatedContext.
20+
const newPages = calls.filter(c => c.name === 'new_page');
21+
assert.strictEqual(newPages.length, 2, 'Should open 2 pages');
22+
for (const np of newPages) {
23+
assert.strictEqual(
24+
typeof np.args.isolatedContext,
25+
'string',
26+
'new_page should use isolatedContext',
27+
);
28+
}
29+
30+
// Should have at least 2 take_snapshot calls (one per page).
31+
// The model may use pageId directly or select_page before each snapshot.
32+
const snapshots = calls.filter(c => c.name === 'take_snapshot');
33+
assert.ok(snapshots.length >= 2, 'Should take at least 2 snapshots');
34+
35+
// Should have a click call (resolving uid from Page A's snapshot
36+
// even though Page B was snapshotted after).
37+
const clicks = calls.filter(c => c.name === 'click');
38+
assert.ok(clicks.length >= 1, 'Should click the button on Page A');
39+
},
40+
};

0 commit comments

Comments
 (0)