From 19ee6e2b24882625f1f6901d826e4090906b5f3a Mon Sep 17 00:00:00 2001 From: "Jason A. Novak" Date: Sat, 2 May 2026 14:14:19 -0700 Subject: [PATCH] feat: add press_key MCP tool for keyboard input simulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets the model send keyboard input to the browser — named keys (Enter, Escape, Tab, arrows, F1–F12), modifier combinations (ctrl+l, ctrl+shift+t, alt+F4), or plain characters — directed at a specific snapshot element (uid) or at the currently focused element. Implementation: - dom.ts: add KEY_MAP and MODIFIER_MAP (named keys and modifier aliases mapped to Selenium Key.* unicode constants) and a pressKey(key, uid?) method that parses "ctrl+shift+t"-style combos. With a uid it routes through Key.chord + sendKeys on the element; without one it drives selenium's W3C Actions keyboard source directly so the key goes to the active element. - firefox/index.ts: expose FirefoxClient.pressKey with a not-connected guard. - tools/input.ts: define pressKeyTool schema and handlePressKey, with the same UID error mapping used by the other input tools. - tools/index.ts, src/index.ts: register the tool and handler. Also drops an orphaned yaml dev-dependency entry from package-lock. Co-Authored-By: Claude Opus 4.7 --- package-lock.json | 18 -------- src/firefox/dom.ts | 97 ++++++++++++++++++++++++++++++++++++++++++++ src/firefox/index.ts | 7 ++++ src/index.ts | 2 + src/tools/index.ts | 2 + src/tools/input.ts | 46 +++++++++++++++++++++ 6 files changed, 154 insertions(+), 18 deletions(-) diff --git a/package-lock.json b/package-lock.json index a870b29..b58b14d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7040,24 +7040,6 @@ "node": ">=10" } }, - "node_modules/yaml": { - "version": "2.8.3", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", - "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", - "dev": true, - "license": "ISC", - "optional": true, - "peer": true, - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - }, - "funding": { - "url": "https://github.com/sponsors/eemeli" - } - }, "node_modules/yargs": { "version": "17.7.2", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", diff --git a/src/firefox/dom.ts b/src/firefox/dom.ts index 93f429d..5daef3b 100644 --- a/src/firefox/dom.ts +++ b/src/firefox/dom.ts @@ -4,6 +4,55 @@ import { By, Key, WebDriver, WebElement } from 'selenium-webdriver'; +// Map of lowercase key names → Selenium unicode values +const KEY_MAP: Record = { + enter: Key.RETURN, + return: Key.RETURN, + escape: Key.ESCAPE, + esc: Key.ESCAPE, + tab: Key.TAB, + backspace: Key.BACK_SPACE, + delete: Key.DELETE, + space: Key.SPACE, + ' ': Key.SPACE, + arrowup: Key.ARROW_UP, + up: Key.ARROW_UP, + arrowdown: Key.ARROW_DOWN, + down: Key.ARROW_DOWN, + arrowleft: Key.ARROW_LEFT, + left: Key.ARROW_LEFT, + arrowright: Key.ARROW_RIGHT, + right: Key.ARROW_RIGHT, + home: Key.HOME, + end: Key.END, + pageup: Key.PAGE_UP, + pagedown: Key.PAGE_DOWN, + insert: Key.INSERT, + f1: Key.F1, + f2: Key.F2, + f3: Key.F3, + f4: Key.F4, + f5: Key.F5, + f6: Key.F6, + f7: Key.F7, + f8: Key.F8, + f9: Key.F9, + f10: Key.F10, + f11: Key.F11, + f12: Key.F12, +}; + +const MODIFIER_MAP: Record = { + ctrl: Key.CONTROL, + control: Key.CONTROL, + alt: Key.ALT, + shift: Key.SHIFT, + meta: Key.META, + cmd: Key.META, + win: Key.META, + super: Key.META, +}; + export class DomInteractions { constructor( private driver: WebDriver, @@ -303,6 +352,54 @@ export class DomInteractions { await this.waitForEventsAfterAction(); } + /** + * Press a key or key combination. + * @param key Key name or combo like "Enter", "Escape", "ctrl+l", "ctrl+shift+t", "F5" + * @param uid Optional element UID to focus before pressing. If omitted, sends to active element. + */ + async pressKey(key: string, uid?: string): Promise { + const parts = key.split('+').map((p) => p.trim().toLowerCase()); + const modifiers: string[] = []; + let mainKey = ''; + + for (const part of parts) { + if (part in MODIFIER_MAP) { + modifiers.push(MODIFIER_MAP[part]!); + } else { + mainKey = KEY_MAP[part] ?? part; + } + } + + if (!mainKey) { + throw new Error(`pressKey: no key specified in "${key}"`); + } + + if (uid) { + if (!this.resolveUid) { + throw new Error( + 'pressKey: resolveUid callback not set. Ensure snapshot is initialized.' + ); + } + const el = await this.resolveUid(uid); + // Key.chord concatenates modifiers + key + Key.NULL (releases all modifiers) + await el.sendKeys(Key.chord(...modifiers, mainKey)); + } else { + // Send to the currently focused element via W3C Actions keyboard source + const actions = this.driver.actions({ async: true }); + for (const mod of modifiers) { + actions.keyDown(mod); + } + actions.keyDown(mainKey); + actions.keyUp(mainKey); + for (const mod of [...modifiers].reverse()) { + actions.keyUp(mod); + } + await actions.perform(); + } + + await this.waitForEventsAfterAction(); + } + /** * Wait for events to propagate after user action * Gives the page time to respond to interactions diff --git a/src/firefox/index.ts b/src/firefox/index.ts index a195e04..96a00d3 100644 --- a/src/firefox/index.ts +++ b/src/firefox/index.ts @@ -197,6 +197,13 @@ export class FirefoxClient { return await this.dom.uploadFileByUid(uid, filePath); } + async pressKey(key: string, uid?: string): Promise { + if (!this.dom) { + throw new Error('Not connected'); + } + return await this.dom.pressKey(key, uid); + } + // ============================================================================ // Console // ============================================================================ diff --git a/src/index.ts b/src/index.ts index 8014b15..c0042f3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -199,6 +199,7 @@ const toolHandlers = new Map Promise { try { @@ -307,3 +328,28 @@ export async function handleUploadFileByUid(args: unknown): Promise { + try { + const { key, uid } = args as { key: string; uid?: string }; + + if (!key || typeof key !== 'string') { + throw new Error('key parameter is required and must be a string'); + } + + const { getFirefox } = await import('../index.js'); + const firefox = await getFirefox(); + + try { + await firefox.pressKey(key, uid); + return successResponse(uid ? `✅ press_key "${key}" on ${uid}` : `✅ press_key "${key}"`); + } catch (error) { + if (uid) { + throw handleUidError(error as Error, uid); + } + throw error; + } + } catch (error) { + return errorResponse(error as Error); + } +}