chore: 添加 .gitignore 并更新依赖与文档配置
This commit is contained in:
238
src/browser.js
Normal file
238
src/browser.js
Normal file
@@ -0,0 +1,238 @@
|
||||
/**
|
||||
* browser.js — 浏览器生命周期管理(内部模块,不对外暴露)
|
||||
*
|
||||
* 设计思路:
|
||||
* Skill 内部自己管理 Chrome 进程,对外只暴露 getSession()。
|
||||
* 调用方不需要关心 launch/connect/端口/CDP 等细节。
|
||||
*
|
||||
* 流程:
|
||||
* 1. 先检查指定端口是否已有 Chrome 在跑 → 有就 connect
|
||||
* 2. 没有 → 启动新 Chrome(需要 executablePath)
|
||||
* 3. 找到 / 新开 Gemini 标签页
|
||||
* 4. 返回 { browser, page }
|
||||
*/
|
||||
import puppeteerCore from 'puppeteer-core';
|
||||
import { addExtra } from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { homedir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { createConnection } from 'node:net';
|
||||
|
||||
// ── 用 puppeteer-extra 包装 puppeteer-core,注入 stealth 插件 ──
|
||||
const puppeteer = addExtra(puppeteerCore);
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
// ── 模块级单例:跨调用复用同一个浏览器 ──
|
||||
let _browser = null;
|
||||
|
||||
/** 默认配置 */
|
||||
const DEFAULTS = {
|
||||
port: 9222,
|
||||
userDataDir: join(homedir(), '.gemini-skill', 'chrome-data'),
|
||||
headless: false,
|
||||
protocolTimeout: 60_000,
|
||||
};
|
||||
|
||||
/**
|
||||
* 探测指定端口是否有 Chrome 在监听
|
||||
* @param {number} port
|
||||
* @param {string} [host='127.0.0.1']
|
||||
* @param {number} [timeout=1500]
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
function isPortAlive(port, host = '127.0.0.1', timeout = 1500) {
|
||||
return new Promise((resolve) => {
|
||||
const socket = createConnection({ host, port });
|
||||
const timer = setTimeout(() => {
|
||||
socket.destroy();
|
||||
resolve(false);
|
||||
}, timeout);
|
||||
socket.on('connect', () => {
|
||||
clearTimeout(timer);
|
||||
socket.destroy();
|
||||
resolve(true);
|
||||
});
|
||||
socket.on('error', () => {
|
||||
clearTimeout(timer);
|
||||
resolve(false);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/** Chrome 启动参数 */
|
||||
const CHROME_ARGS = [
|
||||
// ── 基础 ──
|
||||
'--no-first-run',
|
||||
'--disable-default-apps',
|
||||
'--disable-popup-blocking',
|
||||
|
||||
// ── 渲染稳定性(无头 / 无显卡服务器) ──
|
||||
'--disable-gpu',
|
||||
'--disable-software-rasterizer',
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
|
||||
// ── 反检测(配合 stealth 插件 + ignoreDefaultArgs) ──
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
|
||||
// ── 网络 / 性能 ──
|
||||
'--disable-background-networking',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
|
||||
// ── UI 纯净度 ──
|
||||
'--disable-features=Translate',
|
||||
'--no-default-browser-check',
|
||||
'--disable-crash-reporter',
|
||||
'--hide-crash-restore-bubble',
|
||||
];
|
||||
|
||||
/**
|
||||
* 连接到已运行的 Chrome
|
||||
* @param {number} port
|
||||
* @returns {Promise<import('puppeteer-core').Browser>}
|
||||
*/
|
||||
async function connectToChrome(port) {
|
||||
const browserURL = `http://127.0.0.1:${port}`;
|
||||
const browser = await puppeteer.connect({
|
||||
browserURL,
|
||||
defaultViewport: null,
|
||||
protocolTimeout: DEFAULTS.protocolTimeout,
|
||||
});
|
||||
console.log('[browser] connected to existing Chrome on port', port);
|
||||
return browser;
|
||||
}
|
||||
|
||||
/**
|
||||
* 启动新的 Chrome 实例
|
||||
* @param {object} opts
|
||||
* @param {string} opts.executablePath
|
||||
* @param {number} opts.port
|
||||
* @param {string} opts.userDataDir
|
||||
* @param {boolean} opts.headless
|
||||
* @returns {Promise<import('puppeteer-core').Browser>}
|
||||
*/
|
||||
async function launchChrome({ executablePath, port, userDataDir, headless }) {
|
||||
const browser = await puppeteer.launch({
|
||||
executablePath,
|
||||
headless,
|
||||
userDataDir,
|
||||
defaultViewport: null,
|
||||
args: [
|
||||
...CHROME_ARGS,
|
||||
`--remote-debugging-port=${port}`,
|
||||
],
|
||||
ignoreDefaultArgs: ['--enable-automation'],
|
||||
protocolTimeout: DEFAULTS.protocolTimeout,
|
||||
});
|
||||
console.log('[browser] launched Chrome, pid:', browser.process()?.pid, 'port:', port, 'dataDir:', userDataDir);
|
||||
return browser;
|
||||
}
|
||||
|
||||
/**
|
||||
* 在浏览器中找到 Gemini 标签页,或新开一个
|
||||
* @param {import('puppeteer-core').Browser} browser
|
||||
* @returns {Promise<import('puppeteer-core').Page>}
|
||||
*/
|
||||
async function findOrCreateGeminiPage(browser) {
|
||||
const pages = await browser.pages();
|
||||
|
||||
// 优先复用已有的 Gemini 标签页
|
||||
for (const page of pages) {
|
||||
const url = page.url();
|
||||
if (url.includes('gemini.google.com')) {
|
||||
console.log('[browser] reusing existing Gemini tab:', url);
|
||||
await page.bringToFront();
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
||||
// 没找到,新开一个
|
||||
const page = pages.length > 0 ? pages[0] : await browser.newPage();
|
||||
await page.goto('https://gemini.google.com/app', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30_000,
|
||||
});
|
||||
console.log('[browser] opened new Gemini tab');
|
||||
return page;
|
||||
}
|
||||
|
||||
/**
|
||||
* 确保浏览器可用 — Skill 唯一的对外浏览器管理入口
|
||||
*
|
||||
* 逻辑:
|
||||
* 1. 如果已有 _browser 且未断开 → 直接复用
|
||||
* 2. 检查端口是否有 Chrome → connect
|
||||
* 3. 否则 launch 新 Chrome(需要 executablePath)
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.executablePath] - Chrome 路径(仅 launch 时需要)
|
||||
* @param {number} [opts.port=9222] - 调试端口
|
||||
* @param {string} [opts.userDataDir] - 用户数据目录
|
||||
* @param {boolean} [opts.headless=false]
|
||||
* @returns {Promise<{browser: import('puppeteer-core').Browser, page: import('puppeteer-core').Page}>}
|
||||
*/
|
||||
export async function ensureBrowser(opts = {}) {
|
||||
const {
|
||||
executablePath,
|
||||
port = DEFAULTS.port,
|
||||
userDataDir = DEFAULTS.userDataDir,
|
||||
headless = DEFAULTS.headless,
|
||||
} = opts;
|
||||
|
||||
// 1. 复用已有连接
|
||||
if (_browser && _browser.isConnected()) {
|
||||
console.log('[browser] reusing existing connection');
|
||||
const page = await findOrCreateGeminiPage(_browser);
|
||||
return { browser: _browser, page };
|
||||
}
|
||||
|
||||
// 2. 尝试连接已在运行的 Chrome
|
||||
const alive = await isPortAlive(port);
|
||||
if (alive) {
|
||||
try {
|
||||
_browser = await connectToChrome(port);
|
||||
const page = await findOrCreateGeminiPage(_browser);
|
||||
return { browser: _browser, page };
|
||||
} catch (err) {
|
||||
console.warn('[browser] connect failed, will try launch:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 启动新 Chrome
|
||||
if (!executablePath) {
|
||||
throw new Error(
|
||||
`[browser] 端口 ${port} 无可用 Chrome,且未提供 executablePath。\n` +
|
||||
`请先手动启动 Chrome:chrome --remote-debugging-port=${port} --user-data-dir="${userDataDir}"\n` +
|
||||
`或传入 executablePath 让 skill 自动启动。`
|
||||
);
|
||||
}
|
||||
|
||||
_browser = await launchChrome({ executablePath, port, userDataDir, headless });
|
||||
const page = await findOrCreateGeminiPage(_browser);
|
||||
return { browser: _browser, page };
|
||||
}
|
||||
|
||||
/**
|
||||
* 断开浏览器连接(不杀 Chrome 进程,方便下次复用)
|
||||
*/
|
||||
export function disconnect() {
|
||||
if (_browser) {
|
||||
_browser.disconnect();
|
||||
_browser = null;
|
||||
console.log('[browser] disconnected');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭浏览器(杀 Chrome 进程)
|
||||
*/
|
||||
export async function close() {
|
||||
if (_browser) {
|
||||
await _browser.close();
|
||||
_browser = null;
|
||||
console.log('[browser] closed');
|
||||
}
|
||||
}
|
||||
47
src/demo.js
Normal file
47
src/demo.js
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* demo.js — 使用示例
|
||||
*
|
||||
* 两种启动方式:
|
||||
*
|
||||
* 方式 1(推荐):先手动启动 Chrome,再运行 demo
|
||||
* chrome --remote-debugging-port=9222 --user-data-dir="~/.gemini-skill/chrome-data"
|
||||
* node src/demo.js
|
||||
*
|
||||
* 方式 2:让 skill 自动启动 Chrome
|
||||
* CHROME_PATH="C:/Program Files/Google/Chrome/Application/chrome.exe" node src/demo.js
|
||||
*/
|
||||
import { createGeminiSession, disconnect } from './index.js';
|
||||
|
||||
async function main() {
|
||||
console.log('=== Gemini Skill Demo ===\n');
|
||||
|
||||
// 创建会话(自动 connect 或 launch)
|
||||
const { ops } = await createGeminiSession({
|
||||
executablePath: process.env.CHROME_PATH || undefined,
|
||||
});
|
||||
|
||||
try {
|
||||
// 1. 探测页面状态
|
||||
console.log('[1] 探测页面元素...');
|
||||
const probe = await ops.probe();
|
||||
console.log('probe:', JSON.stringify(probe, null, 2));
|
||||
|
||||
// 2. 发送一句话
|
||||
console.log('\n[2] 发送提示词...');
|
||||
const result = await ops.sendAndWait('Hello Gemini!', {
|
||||
timeout: 60_000,
|
||||
onPoll(poll) {
|
||||
console.log(` polling... status=${poll.status}`);
|
||||
},
|
||||
});
|
||||
console.log('result:', JSON.stringify(result, null, 2));
|
||||
|
||||
} catch (err) {
|
||||
console.error('Error:', err);
|
||||
} finally {
|
||||
disconnect();
|
||||
console.log('\n[done]');
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
350
src/gemini-ops.js
Normal file
350
src/gemini-ops.js
Normal file
@@ -0,0 +1,350 @@
|
||||
/**
|
||||
* gemini-ops.js — Gemini 操作高层 API
|
||||
*
|
||||
* 职责:
|
||||
* 基于 operator.js 的底层原子操作,编排 Gemini 特定的业务流程。
|
||||
* 全部通过 CDP 实现,不往页面注入任何对象。
|
||||
*/
|
||||
import { createOperator } from './operator.js';
|
||||
|
||||
// ── Gemini 页面元素选择器 ──
|
||||
const SELECTORS = {
|
||||
promptInput: [
|
||||
'div.ql-editor[contenteditable="true"][role="textbox"]',
|
||||
'[contenteditable="true"][aria-label*="Gemini"]',
|
||||
'[contenteditable="true"][data-placeholder*="Gemini"]',
|
||||
'div[contenteditable="true"][role="textbox"]',
|
||||
],
|
||||
actionBtn: [
|
||||
'.send-button-container button.send-button',
|
||||
'.send-button-container button',
|
||||
],
|
||||
newChatBtn: [
|
||||
'[data-test-id="new-chat-button"] a',
|
||||
'[data-test-id="new-chat-button"]',
|
||||
'a[aria-label="发起新对话"]',
|
||||
'a[aria-label*="new chat" i]',
|
||||
],
|
||||
modelBtn: [
|
||||
'button:has-text("Gemini")',
|
||||
'[role="button"][aria-haspopup="menu"]',
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
* 创建 GeminiOps 操控实例
|
||||
* @param {import('puppeteer-core').Page} page
|
||||
*/
|
||||
export function createOps(page) {
|
||||
const op = createOperator(page);
|
||||
|
||||
return {
|
||||
/** 暴露底层 operator,供高级用户直接使用 */
|
||||
operator: op,
|
||||
|
||||
/** 暴露选择器定义,方便调试和外部扩展 */
|
||||
selectors: SELECTORS,
|
||||
|
||||
/**
|
||||
* 探测页面各元素是否就位
|
||||
* @returns {Promise<{promptInput: boolean, actionBtn: boolean, newChatBtn: boolean, modelBtn: boolean, status: object}>}
|
||||
*/
|
||||
async probe() {
|
||||
const [promptInput, actionBtn, newChatBtn, modelBtn] = await Promise.all([
|
||||
op.locate(SELECTORS.promptInput),
|
||||
op.locate(SELECTORS.actionBtn),
|
||||
op.locate(SELECTORS.newChatBtn),
|
||||
op.locate(SELECTORS.modelBtn),
|
||||
]);
|
||||
const status = await this.getStatus();
|
||||
return {
|
||||
promptInput: promptInput.found,
|
||||
actionBtn: actionBtn.found,
|
||||
newChatBtn: newChatBtn.found,
|
||||
modelBtn: modelBtn.found,
|
||||
status,
|
||||
};
|
||||
},
|
||||
|
||||
/**
|
||||
* 点击指定按钮
|
||||
* @param {'actionBtn'|'newChatBtn'|'modelBtn'} key
|
||||
*/
|
||||
async click(key) {
|
||||
const sels = SELECTORS[key];
|
||||
if (!sels) {
|
||||
return { ok: false, error: `unknown_key: ${key}` };
|
||||
}
|
||||
return op.click(sels);
|
||||
},
|
||||
|
||||
/**
|
||||
* 填写提示词(快速填充,非逐字输入)
|
||||
* @param {string} text
|
||||
*/
|
||||
async fillPrompt(text) {
|
||||
return op.fill(SELECTORS.promptInput, text);
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取当前按钮状态(通过一次性 evaluate 读取,不注入任何东西)
|
||||
*/
|
||||
async getStatus() {
|
||||
return op.query((sels) => {
|
||||
// 在页面上下文中查找 actionBtn
|
||||
let btn = null;
|
||||
for (const sel of sels) {
|
||||
try {
|
||||
const all = [...document.querySelectorAll(sel)];
|
||||
btn = all.find(n => {
|
||||
const r = n.getBoundingClientRect();
|
||||
const st = getComputedStyle(n);
|
||||
return r.width > 0 && r.height > 0
|
||||
&& st.display !== 'none' && st.visibility !== 'hidden';
|
||||
}) || null;
|
||||
} catch { /* skip */ }
|
||||
if (btn) break;
|
||||
}
|
||||
|
||||
if (!btn) return { status: 'unknown', error: 'btn_not_found' };
|
||||
|
||||
const label = (btn.getAttribute('aria-label') || '').trim();
|
||||
const disabled = btn.getAttribute('aria-disabled') === 'true';
|
||||
|
||||
if (/停止|Stop/i.test(label)) {
|
||||
return { status: 'loading', label };
|
||||
}
|
||||
if (/发送|Send|Submit/i.test(label)) {
|
||||
return { status: 'ready', label, disabled };
|
||||
}
|
||||
return { status: 'idle', label, disabled };
|
||||
}, SELECTORS.actionBtn);
|
||||
},
|
||||
|
||||
/**
|
||||
* 单次轮询状态(保活式,不阻塞)
|
||||
*/
|
||||
async pollStatus() {
|
||||
const status = await this.getStatus();
|
||||
const pageVisible = await op.query(() => !document.hidden);
|
||||
return { ...status, pageVisible, ts: Date.now() };
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取最新生成的图片信息
|
||||
*/
|
||||
async getLatestImage() {
|
||||
return op.query(() => {
|
||||
const imgs = [...document.querySelectorAll('img.image.loaded')];
|
||||
if (!imgs.length) {
|
||||
return { ok: false, error: 'no_loaded_images' };
|
||||
}
|
||||
const img = imgs[imgs.length - 1];
|
||||
// 查找下载按钮
|
||||
let container = img;
|
||||
while (container && container !== document.body) {
|
||||
if (container.classList?.contains('image-container')) break;
|
||||
container = container.parentElement;
|
||||
}
|
||||
const dlBtn = container
|
||||
? (container.querySelector('mat-icon[fonticon="download"]')
|
||||
|| container.querySelector('mat-icon[data-mat-icon-name="download"]'))
|
||||
: null;
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
src: img.src || '',
|
||||
alt: img.alt || '',
|
||||
width: img.naturalWidth || 0,
|
||||
height: img.naturalHeight || 0,
|
||||
hasDownloadBtn: !!dlBtn,
|
||||
};
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* 提取最新图片的 Base64 数据(Canvas 优先,fetch 兜底)
|
||||
*/
|
||||
async extractImageBase64() {
|
||||
return op.query(() => {
|
||||
const imgs = [...document.querySelectorAll('img.image.loaded')];
|
||||
if (!imgs.length) {
|
||||
return { ok: false, error: 'no_loaded_images' };
|
||||
}
|
||||
const img = imgs[imgs.length - 1];
|
||||
const w = img.naturalWidth || img.width;
|
||||
const h = img.naturalHeight || img.height;
|
||||
|
||||
// 尝试 Canvas 同步提取
|
||||
try {
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = w;
|
||||
canvas.height = h;
|
||||
canvas.getContext('2d').drawImage(img, 0, 0);
|
||||
const dataUrl = canvas.toDataURL('image/png');
|
||||
return { ok: true, dataUrl, width: w, height: h, method: 'canvas' };
|
||||
} catch { /* canvas tainted, fallback */ }
|
||||
|
||||
// 标记需要 fetch fallback
|
||||
return { ok: false, needFetch: true, src: img.src, width: w, height: h };
|
||||
}).then(async (result) => {
|
||||
if (result.ok || !result.needFetch) return result;
|
||||
|
||||
// Fetch fallback: 在页面上下文中异步执行
|
||||
return page.evaluate(async (src, w, h) => {
|
||||
try {
|
||||
const r = await fetch(src);
|
||||
if (!r.ok) throw new Error(`fetch_status_${r.status}`);
|
||||
const blob = await r.blob();
|
||||
return await new Promise((resolve) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve({
|
||||
ok: true, dataUrl: reader.result, width: w, height: h, method: 'fetch',
|
||||
});
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
} catch (err) {
|
||||
return { ok: false, error: 'extract_failed', detail: err.message || String(err) };
|
||||
}
|
||||
}, result.src, result.width, result.height);
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* 点击最新图片的下载按钮
|
||||
*/
|
||||
async downloadLatestImage() {
|
||||
return op.query(() => {
|
||||
const imgs = [...document.querySelectorAll('img.image.loaded')];
|
||||
if (!imgs.length) return { ok: false, error: 'no_loaded_images' };
|
||||
|
||||
const img = imgs[imgs.length - 1];
|
||||
let container = img;
|
||||
while (container && container !== document.body) {
|
||||
if (container.classList?.contains('image-container')) break;
|
||||
container = container.parentElement;
|
||||
}
|
||||
const dlBtn = container
|
||||
? (container.querySelector('mat-icon[fonticon="download"]')
|
||||
|| container.querySelector('mat-icon[data-mat-icon-name="download"]'))
|
||||
: null;
|
||||
|
||||
if (!dlBtn) return { ok: false, error: 'download_btn_not_found' };
|
||||
|
||||
const clickable = dlBtn.closest('button,[role="button"],.button-icon-wrapper') || dlBtn;
|
||||
clickable.click();
|
||||
return { ok: true, src: img.src || '' };
|
||||
});
|
||||
},
|
||||
|
||||
// ─── 高层组合操作 ───
|
||||
|
||||
/**
|
||||
* 发送提示词并等待生成完成
|
||||
* @param {string} prompt
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.timeout=120000]
|
||||
* @param {number} [opts.interval=8000]
|
||||
* @param {(status: object) => void} [opts.onPoll]
|
||||
* @returns {Promise<{ok: boolean, elapsed: number, finalStatus?: object, error?: string}>}
|
||||
*/
|
||||
async sendAndWait(prompt, opts = {}) {
|
||||
const { timeout = 120_000, interval = 8_000, onPoll } = opts;
|
||||
|
||||
// 1. 填写
|
||||
const fillResult = await this.fillPrompt(prompt);
|
||||
if (!fillResult.ok) {
|
||||
return { ok: false, error: 'fill_failed', detail: fillResult, elapsed: 0 };
|
||||
}
|
||||
|
||||
// 短暂等待 UI 响应
|
||||
await sleep(300);
|
||||
|
||||
// 2. 点击发送
|
||||
const clickResult = await this.click('actionBtn');
|
||||
if (!clickResult.ok) {
|
||||
return { ok: false, error: 'send_click_failed', detail: clickResult, elapsed: 0 };
|
||||
}
|
||||
|
||||
// 3. 轮询等待
|
||||
const start = Date.now();
|
||||
let lastStatus = null;
|
||||
|
||||
while (Date.now() - start < timeout) {
|
||||
await sleep(interval);
|
||||
|
||||
const poll = await this.pollStatus();
|
||||
lastStatus = poll;
|
||||
onPoll?.(poll);
|
||||
|
||||
if (poll.status === 'idle') {
|
||||
return { ok: true, elapsed: Date.now() - start, finalStatus: poll };
|
||||
}
|
||||
if (poll.status === 'unknown') {
|
||||
console.warn('[ops] unknown status, may need screenshot to debug');
|
||||
}
|
||||
}
|
||||
|
||||
return { ok: false, error: 'timeout', elapsed: Date.now() - start, finalStatus: lastStatus };
|
||||
},
|
||||
|
||||
/**
|
||||
* 完整生图流程:新建会话 → 发送提示词 → 等待 → 提取图片
|
||||
* @param {string} prompt
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.timeout=120000]
|
||||
* @param {boolean} [opts.newChat=true]
|
||||
* @param {boolean} [opts.highRes=false]
|
||||
* @param {(status: object) => void} [opts.onPoll]
|
||||
*/
|
||||
async generateImage(prompt, opts = {}) {
|
||||
const { timeout = 120_000, newChat = true, highRes = false, onPoll } = opts;
|
||||
|
||||
// 1. 可选:新建会话
|
||||
if (newChat) {
|
||||
const newChatResult = await this.click('newChatBtn');
|
||||
if (!newChatResult.ok) {
|
||||
console.warn('[ops] newChatBtn click failed, continuing anyway');
|
||||
}
|
||||
await sleep(1500);
|
||||
}
|
||||
|
||||
// 2. 发送并等待
|
||||
const waitResult = await this.sendAndWait(prompt, { timeout, onPoll });
|
||||
if (!waitResult.ok) {
|
||||
return { ...waitResult, step: 'sendAndWait' };
|
||||
}
|
||||
|
||||
// 3. 等图片渲染完成
|
||||
await sleep(2000);
|
||||
|
||||
// 4. 获取图片
|
||||
const imgInfo = await this.getLatestImage();
|
||||
if (!imgInfo.ok) {
|
||||
await sleep(3000);
|
||||
const retry = await this.getLatestImage();
|
||||
if (!retry.ok) {
|
||||
return { ok: false, error: 'no_image_found', elapsed: waitResult.elapsed, imgInfo: retry };
|
||||
}
|
||||
}
|
||||
|
||||
// 5. 提取 / 下载
|
||||
if (highRes) {
|
||||
const dlResult = await this.downloadLatestImage();
|
||||
return { ok: dlResult.ok, method: 'download', elapsed: waitResult.elapsed, ...dlResult };
|
||||
} else {
|
||||
const b64Result = await this.extractImageBase64();
|
||||
return { ok: b64Result.ok, method: b64Result.method, elapsed: waitResult.elapsed, ...b64Result };
|
||||
}
|
||||
},
|
||||
|
||||
/** 底层 page 引用 */
|
||||
get page() {
|
||||
return page;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(r => setTimeout(r, ms));
|
||||
}
|
||||
37
src/index.js
Normal file
37
src/index.js
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* gemini-skill — 统一入口
|
||||
*
|
||||
* 对外只暴露高层 API,浏览器管理在内部自动完成。
|
||||
*
|
||||
* 用法:
|
||||
* import { createGeminiSession, disconnect } from './index.js';
|
||||
*
|
||||
* const { ops } = await createGeminiSession();
|
||||
* await ops.generateImage('画一只猫');
|
||||
* disconnect();
|
||||
*/
|
||||
import { ensureBrowser, disconnect, close } from './browser.js';
|
||||
import { createOps } from './gemini-ops.js';
|
||||
|
||||
export { disconnect, close };
|
||||
|
||||
/**
|
||||
* 创建 Gemini 操控会话
|
||||
*
|
||||
* 内部自动管理浏览器连接:
|
||||
* 1. 端口有 Chrome → 直接 connect
|
||||
* 2. 无 Chrome + 提供了 executablePath → 自动 launch
|
||||
* 3. 无 Chrome + 无 executablePath → 报错并提示手动启动
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.executablePath] - Chrome 路径(可选,仅自动启动时需要)
|
||||
* @param {number} [opts.port=9222] - 调试端口
|
||||
* @param {string} [opts.userDataDir] - 用户数据目录(默认 ~/.gemini-skill/chrome-data)
|
||||
* @param {boolean} [opts.headless=false]
|
||||
* @returns {Promise<{ops: ReturnType<typeof createOps>, page: import('puppeteer-core').Page, browser: import('puppeteer-core').Browser}>}
|
||||
*/
|
||||
export async function createGeminiSession(opts = {}) {
|
||||
const { browser, page } = await ensureBrowser(opts);
|
||||
const ops = createOps(page);
|
||||
return { ops, page, browser };
|
||||
}
|
||||
326
src/operator.js
Normal file
326
src/operator.js
Normal file
@@ -0,0 +1,326 @@
|
||||
/**
|
||||
* operator.js — 纯 CDP 底层操作封装
|
||||
*
|
||||
* 职责:
|
||||
* 封装最基础的浏览器交互原语(点击、输入、查询、等待等),
|
||||
* 全部通过 CDP 协议实现,不往页面注入任何对象。
|
||||
*
|
||||
* 设计原则:
|
||||
* - 所有 DOM 操作通过 page.evaluate() 一次性执行,执行完即走,不留痕迹
|
||||
* - 鼠标 / 键盘事件通过 CDP Input 域发送,生成 isTrusted=true 的原生事件
|
||||
* - 每个方法都是独立的原子操作,上层 gemini-ops.js 负责编排组合
|
||||
*/
|
||||
|
||||
/**
|
||||
* 创建 operator 实例
|
||||
* @param {import('puppeteer-core').Page} page
|
||||
*/
|
||||
export function createOperator(page) {
|
||||
|
||||
// ─── 内部工具 ───
|
||||
|
||||
/**
|
||||
* 通过 CSS 选择器列表查找第一个可见元素,返回其中心坐标和边界信息
|
||||
* 在页面上下文中执行,执行完即走
|
||||
* @param {string[]} selectors - 候选选择器,按优先级排列
|
||||
* @returns {Promise<{found: boolean, x?: number, y?: number, width?: number, height?: number, selector?: string, tagName?: string}>}
|
||||
*/
|
||||
async function locate(selectors) {
|
||||
return page.evaluate((sels) => {
|
||||
for (const sel of sels) {
|
||||
let el = null;
|
||||
try {
|
||||
// 支持 :has-text("xxx") 伪选择器
|
||||
if (sel.includes(':has-text(')) {
|
||||
const m = sel.match(/^(.*):has-text\("(.*)"\)$/);
|
||||
if (m) {
|
||||
const candidates = [...document.querySelectorAll(m[1] || '*')];
|
||||
el = candidates.find(n => {
|
||||
const r = n.getBoundingClientRect();
|
||||
const st = getComputedStyle(n);
|
||||
return r.width > 0 && r.height > 0
|
||||
&& st.display !== 'none' && st.visibility !== 'hidden'
|
||||
&& n.textContent?.includes(m[2]);
|
||||
}) || null;
|
||||
}
|
||||
} else {
|
||||
const all = [...document.querySelectorAll(sel)];
|
||||
el = all.find(n => {
|
||||
const r = n.getBoundingClientRect();
|
||||
const st = getComputedStyle(n);
|
||||
return r.width > 0 && r.height > 0
|
||||
&& st.display !== 'none' && st.visibility !== 'hidden';
|
||||
}) || null;
|
||||
}
|
||||
} catch { /* 选择器语法错误,跳过 */ }
|
||||
|
||||
if (el) {
|
||||
const rect = el.getBoundingClientRect();
|
||||
return {
|
||||
found: true,
|
||||
x: rect.x + rect.width / 2,
|
||||
y: rect.y + rect.height / 2,
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
selector: sel,
|
||||
tagName: el.tagName.toLowerCase(),
|
||||
};
|
||||
}
|
||||
}
|
||||
return { found: false };
|
||||
}, selectors);
|
||||
}
|
||||
|
||||
/**
|
||||
* 给坐标加一点随机偏移,模拟人类鼠标不精确的特征
|
||||
* @param {number} x
|
||||
* @param {number} y
|
||||
* @param {number} [jitter=3] - 最大偏移像素
|
||||
* @returns {{x: number, y: number}}
|
||||
*/
|
||||
function humanize(x, y, jitter = 3) {
|
||||
return {
|
||||
x: x + (Math.random() * 2 - 1) * jitter,
|
||||
y: y + (Math.random() * 2 - 1) * jitter,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 随机延迟(毫秒),模拟人类反应时间
|
||||
* @param {number} min
|
||||
* @param {number} max
|
||||
*/
|
||||
function randomDelay(min, max) {
|
||||
const ms = min + Math.random() * (max - min);
|
||||
return new Promise(r => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
// ─── 公开 API ───
|
||||
|
||||
return {
|
||||
|
||||
/**
|
||||
* 定位元素 — 通过选择器列表查找第一个可见元素
|
||||
* @param {string|string[]} selectors - 单个选择器或候选列表
|
||||
* @returns {Promise<{found: boolean, x?: number, y?: number, width?: number, height?: number, selector?: string, tagName?: string}>}
|
||||
*/
|
||||
async locate(selectors) {
|
||||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||||
return locate(sels);
|
||||
},
|
||||
|
||||
/**
|
||||
* 点击元素 — 通过 CDP Input.dispatchMouseEvent 发送真实鼠标事件
|
||||
*
|
||||
* 生成 isTrusted=true 的原生事件,比 element.click() 更真实
|
||||
*
|
||||
* @param {string|string[]} selectors - 候选选择器
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.jitter=3] - 坐标随机偏移像素
|
||||
* @param {number} [opts.delayBeforeClick=50] - 移动到元素后、点击前的等待(ms)
|
||||
* @param {number} [opts.clickDuration=80] - mousedown 到 mouseup 的间隔(ms)
|
||||
* @returns {Promise<{ok: boolean, selector?: string, x?: number, y?: number, error?: string}>}
|
||||
*/
|
||||
async click(selectors, opts = {}) {
|
||||
const { jitter = 3, delayBeforeClick = 50, clickDuration = 80 } = opts;
|
||||
|
||||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||||
const loc = await locate(sels);
|
||||
if (!loc.found) {
|
||||
return { ok: false, error: 'element_not_found', triedSelectors: sels };
|
||||
}
|
||||
|
||||
const { x, y } = humanize(loc.x, loc.y, jitter);
|
||||
|
||||
// 先移动鼠标到目标位置
|
||||
await page.mouse.move(x, y);
|
||||
await randomDelay(delayBeforeClick * 0.5, delayBeforeClick * 1.5);
|
||||
|
||||
// mousedown → 短暂停留 → mouseup(模拟真实点击节奏)
|
||||
await page.mouse.down();
|
||||
await randomDelay(clickDuration * 0.5, clickDuration * 1.5);
|
||||
await page.mouse.up();
|
||||
|
||||
return { ok: true, selector: loc.selector, x, y };
|
||||
},
|
||||
|
||||
/**
|
||||
* 输入文本 — 支持两种模式
|
||||
*
|
||||
* - `'paste'`(默认):通过剪贴板粘贴,整段文本一次性输入,人类也经常这样操作
|
||||
* - `'typeChar'`:逐字符键盘输入,每个字符间有随机延迟,模拟打字节奏
|
||||
*
|
||||
* @param {string} text - 要输入的文本
|
||||
* @param {object} [opts]
|
||||
* @param {'paste'|'typeChar'} [opts.mode='paste'] - 输入模式
|
||||
* @param {number} [opts.minDelay=30] - typeChar 模式下字符间最小间隔(ms)
|
||||
* @param {number} [opts.maxDelay=80] - typeChar 模式下字符间最大间隔(ms)
|
||||
* @returns {Promise<{ok: boolean, length: number, mode: string}>}
|
||||
*/
|
||||
async type(text, opts = {}) {
|
||||
const { mode = 'paste', minDelay = 30, maxDelay = 80 } = opts;
|
||||
|
||||
if (mode === 'typeChar') {
|
||||
// 逐字符输入,模拟真实打字
|
||||
for (const char of text) {
|
||||
await page.keyboard.type(char);
|
||||
await randomDelay(minDelay, maxDelay);
|
||||
}
|
||||
} else {
|
||||
// 粘贴模式:通过 CDP Input.insertText 一次性输入整段文本
|
||||
// 等价于用户从剪贴板粘贴,但不依赖 clipboard API 权限
|
||||
const client = page._client();
|
||||
await client.send('Input.insertText', { text });
|
||||
}
|
||||
|
||||
return { ok: true, length: text.length, mode };
|
||||
},
|
||||
|
||||
/**
|
||||
* 快速设置文本 — 对 contenteditable 元素,用 Ctrl+A → 粘贴的方式填充
|
||||
*
|
||||
* 比逐字输入快得多,适合长文本(如 prompt)
|
||||
* 同样不注入任何对象,通过 evaluate 执行一次性 DOM 操作
|
||||
*
|
||||
* @param {string|string[]} selectors - 目标输入框选择器
|
||||
* @param {string} text - 要填入的文本
|
||||
* @returns {Promise<{ok: boolean, selector?: string, error?: string}>}
|
||||
*/
|
||||
async fill(selectors, text) {
|
||||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||||
const loc = await locate(sels);
|
||||
if (!loc.found) {
|
||||
return { ok: false, error: 'element_not_found', triedSelectors: sels };
|
||||
}
|
||||
|
||||
// 先点击聚焦目标元素
|
||||
const { x, y } = humanize(loc.x, loc.y, 2);
|
||||
await page.mouse.click(x, y);
|
||||
await randomDelay(100, 200);
|
||||
|
||||
// 在页面上下文中执行文本填充(一次性,不留痕迹)
|
||||
const result = await page.evaluate((selsInner, textInner) => {
|
||||
// 重新查找元素(因为 click 后 DOM 可能有变化)
|
||||
let el = null;
|
||||
for (const sel of selsInner) {
|
||||
try {
|
||||
const all = [...document.querySelectorAll(sel)];
|
||||
el = all.find(n => {
|
||||
const r = n.getBoundingClientRect();
|
||||
return r.width > 0 && r.height > 0;
|
||||
}) || null;
|
||||
} catch { /* skip */ }
|
||||
if (el) break;
|
||||
}
|
||||
|
||||
if (!el) return { ok: false, error: 'element_lost_after_click' };
|
||||
|
||||
el.focus();
|
||||
|
||||
if (el.tagName === 'TEXTAREA' || el.tagName === 'INPUT') {
|
||||
// 原生表单元素
|
||||
el.value = textInner;
|
||||
el.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
} else {
|
||||
// contenteditable 元素(如 Gemini 的富文本输入框)
|
||||
document.execCommand('selectAll', false, null);
|
||||
document.execCommand('insertText', false, textInner);
|
||||
}
|
||||
return { ok: true };
|
||||
}, sels, text);
|
||||
|
||||
return { ...result, selector: loc.selector };
|
||||
},
|
||||
|
||||
/**
|
||||
* 在页面上下文中执行一次性查询(不注入任何对象)
|
||||
*
|
||||
* @param {((...args: any[]) => any)} fn - 要在页面中执行的函数
|
||||
* @param {...any} args - 传入函数的参数
|
||||
* @returns {Promise<any>}
|
||||
*/
|
||||
async query(fn, ...args) {
|
||||
return page.evaluate(fn, ...args);
|
||||
},
|
||||
|
||||
/**
|
||||
* 等待某个条件满足(轮询式)
|
||||
*
|
||||
* @param {((...args: any[]) => any)} conditionFn - 在页面中执行的判断函数,返回 truthy 值表示满足
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.timeout=30000] - 最大等待时间(ms)
|
||||
* @param {number} [opts.interval=500] - 轮询间隔(ms)
|
||||
* @param {any[]} [opts.args=[]] - 传入 conditionFn 的参数
|
||||
* @returns {Promise<{ok: boolean, result?: any, elapsed: number, error?: string}>}
|
||||
*/
|
||||
async waitFor(conditionFn, opts = {}) {
|
||||
const { timeout = 30_000, interval = 500, args = [] } = opts;
|
||||
const start = Date.now();
|
||||
|
||||
while (Date.now() - start < timeout) {
|
||||
try {
|
||||
const result = await page.evaluate(conditionFn, ...args);
|
||||
if (result) {
|
||||
return { ok: true, result, elapsed: Date.now() - start };
|
||||
}
|
||||
} catch { /* 页面可能还在加载 */ }
|
||||
await new Promise(r => setTimeout(r, interval));
|
||||
}
|
||||
|
||||
return { ok: false, error: 'timeout', elapsed: Date.now() - start };
|
||||
},
|
||||
|
||||
/**
|
||||
* 等待导航完成
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.waitUntil='networkidle2']
|
||||
* @param {number} [opts.timeout=30000]
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async waitForNavigation(opts = {}) {
|
||||
const { waitUntil = 'networkidle2', timeout = 30_000 } = opts;
|
||||
await page.waitForNavigation({ waitUntil, timeout });
|
||||
},
|
||||
|
||||
/**
|
||||
* 按下键盘快捷键
|
||||
*
|
||||
* @param {string} key - 键名(如 'Enter'、'Tab'、'Escape')
|
||||
* @param {object} [opts]
|
||||
* @param {number} [opts.delay=50] - keydown 到 keyup 的间隔
|
||||
* @returns {Promise<{ok: boolean, key: string}>}
|
||||
*/
|
||||
async press(key, opts = {}) {
|
||||
const { delay = 50 } = opts;
|
||||
await page.keyboard.press(key, { delay });
|
||||
return { ok: true, key };
|
||||
},
|
||||
|
||||
/**
|
||||
* 页面截图(用于调试或状态验证)
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {boolean} [opts.fullPage=false]
|
||||
* @param {'png'|'jpeg'|'webp'} [opts.type='png']
|
||||
* @param {string} [opts.path] - 保存路径(不传则返回 Buffer)
|
||||
* @returns {Promise<Buffer>}
|
||||
*/
|
||||
async screenshot(opts = {}) {
|
||||
return page.screenshot(opts);
|
||||
},
|
||||
|
||||
/**
|
||||
* 获取页面当前 URL
|
||||
* @returns {string}
|
||||
*/
|
||||
url() {
|
||||
return page.url();
|
||||
},
|
||||
|
||||
/** 底层 page 对象引用 */
|
||||
get page() {
|
||||
return page;
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user