Files
gemini-skill/src/gemini-ops.js

958 lines
36 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* gemini-ops.js — Gemini 操作高层 API
*
* 职责:
* 基于 operator.js 的底层原子操作,编排 Gemini 特定的业务流程。
* 全部通过 CDP 实现,不往页面注入任何对象。
*/
import { createOperator } from './operator.js';
import { sleep } from './util.js';
// ── Gemini 页面元素选择器 ──
const SELECTORS = {
promptInput: [
'div.ql-editor[contenteditable="true"][role="textbox"]',
'[contenteditable="true"][aria-label*="Gemini"]',
'[contenteditable="true"][data-placeholder*="Gemini"]',
'div[contenteditable="true"][role="textbox"]',
],
/** 输入区底部按钮的父容器(包裹麦克风 + 发送按钮) */
actionBtnWrapper: [
'div.input-buttons-wrapper-bottom',
],
/** 麦克风容器 — class 带 hidden 时隐藏(表示输入框有文字) */
micContainer: [
'div.mic-button-container',
],
/** 发送按钮容器 — class 带 visible 时可见(输入框有文字),否则隐藏 */
sendBtnContainer: [
'div.send-button-container',
],
/** 发送按钮本身 — class 末尾 submit可发送或 stop加载中 */
sendBtn: [
'.send-button-container button.send-button',
'.send-button-container button',
],
newChatBtn: [
'[data-test-id="new-chat-button"] a',
'[data-test-id="new-chat-button"]',
'a[aria-label="发起新对话"]',
'a[aria-label*="new chat" i]',
],
modelBtn: [
'[data-test-id="bard-mode-menu-button"]', // 测试专属属性
'button[aria-label="打开模式选择器"]', // 中文 aria-label
'button[aria-label*="mode selector" i]', // 英文 aria-label 兜底
'button.mat-mdc-menu-trigger.input-area-switch',// class 组合兜底
],
/** 模型标签文本容器(读取当前选中的模型名,如 "Pro" */
modelLabel: [
'[data-test-id="logo-pill-label-container"] span', // 最内层 span 包含模型名
'div.logo-pill-label-container span', // class 兜底
],
/** 模型选项Pro */
modelOptionPro: [
'[data-test-id="bard-mode-option-pro"]', // 中英文统一
],
/** 模型选项:快速 / Quick */
modelOptionQuick: [
'[data-test-id="bard-mode-option-快速"]', // 中文
'[data-test-id="bard-mode-option-quick"]', // 英文
],
/** 模型选项:思考 / Think */
modelOptionThink: [
'[data-test-id="bard-mode-option-思考"]', // 中文
'[data-test-id="bard-mode-option-think"]', // 英文
'[data-test-id="bard-mode-option-thinking"]', // 英文变体
],
tempChatBtn: [
'[data-test-id="temp-chat-button"]', // 最稳定:测试专属属性
'button[aria-label="临时对话"]', // 中文 aria-label
'button[aria-label*="temporary" i]', // 英文 aria-label 兜底
'button.temp-chat-button', // class 名兜底
'button[mattooltip="临时对话"]', // Angular Material tooltip 属性
],
sidebarContainer: [
'[data-test-id="overflow-container"]', // 测试专属属性
'div.overflow-container', // class 兜底
],
/** 加号面板按钮(点击后弹出上传菜单) */
uploadPanelBtn: [
'button.upload-card-button[aria-haspopup="menu"]', // class + aria 组合
'button[aria-controls="upload-file-u"]', // aria-controls 兜底
'button.upload-card-button', // class 兜底
],
/** 上传文件选项(加号面板展开后的"上传文件"按钮) */
uploadFileBtn: [
'[data-test-id="uploader-images-files-button-advanced"]', // 测试专属属性
'images-files-uploader', // 标签名兜底
],
};
/**
* 创建 GeminiOps 操控实例
* @param {import('puppeteer-core').Page} page
*/
export function createOps(page) {
const op = createOperator(page);
return {
/** 暴露底层 operator供高级用户直接使用 */
operator: op,
/** 暴露选择器定义,方便调试和外部扩展 */
selectors: SELECTORS,
/**
* 探测页面各元素是否就位
* @returns {Promise<{promptInput: boolean, actionBtnWrapper: boolean, newChatBtn: boolean, modelBtn: boolean, modelLabel: boolean, tempChatBtn: boolean, currentModel: string, status: object}>}
*/
async probe() {
const [promptInput, actionBtnWrapper, newChatBtn, modelBtn, modelLabel, tempChatBtn, status, currentModelResult] = await Promise.all([
op.locate(SELECTORS.promptInput),
op.locate(SELECTORS.actionBtnWrapper),
op.locate(SELECTORS.newChatBtn),
op.locate(SELECTORS.modelBtn),
op.locate(SELECTORS.modelLabel),
op.locate(SELECTORS.tempChatBtn),
this.getStatus(),
this.getCurrentModel(),
]);
return {
promptInput: promptInput.found,
actionBtnWrapper: actionBtnWrapper.found,
newChatBtn: newChatBtn.found,
modelBtn: modelBtn.found,
modelLabel: modelLabel.found,
tempChatBtn: tempChatBtn.found,
currentModel: currentModelResult.ok ? currentModelResult.raw : '',
status,
};
},
/**
* 点击指定按钮
* @param {'sendBtn'|'newChatBtn'|'modelBtn'|'tempChatBtn'|'modelOptionPro'|'modelOptionQuick'|'modelOptionThink'} key
*/
async click(key) {
const sels = SELECTORS[key];
if (!sels) {
return { ok: false, error: `unknown_key: ${key}` };
}
return op.click(sels);
},
/**
* 进入临时会话模式
*
* 点击页面上的"临时会话"按钮data-test-id="temp-chat-button"
* 然后等待页面完成导航 / 刷新,确保后续操作在临时会话中进行。
*
* @param {object} [opts]
* @param {number} [opts.timeout=15000] - 等待页面导航完成的超时时间ms
* @returns {Promise<{ok: boolean, error?: string}>}
*/
async clickTempChat(opts = {}) {
const { timeout = 15_000 } = opts;
const clickResult = await this.click('tempChatBtn');
if (!clickResult.ok) {
return { ok: false, error: 'temp_chat_btn_not_found' };
}
// 给一点时间让 UI 稳定
await sleep(500);
console.log('[ops] entered temp chat mode');
return { ok: true };
},
/**
* 获取当前选中的模型名称
*
* 读取模型选择按钮中 logo-pill-label-container 内的 span 文本,
* 返回去除空白后的小写文本(如 "pro"、"快速"、"思考")。
*
* @returns {Promise<{ok: boolean, model: string, raw: string, error?: string}>}
*/
async getCurrentModel() {
return op.query((sels) => {
let el = null;
for (const sel of sels) {
try { el = document.querySelector(sel); } catch { /* skip */ }
if (el) break;
}
if (!el) {
return { ok: false, model: '', raw: '', error: 'model_label_not_found' };
}
const raw = (el.textContent || '').trim();
return { ok: true, model: raw.toLowerCase(), raw };
}, SELECTORS.modelLabel);
},
/**
* 判断当前模型是否为 Pro
*
* @returns {Promise<boolean>}
*/
async isModelPro() {
const result = await this.getCurrentModel();
if (!result.ok) return false;
return result.model === 'pro';
},
/**
* 切换到指定模型
*
* 流程:
* 1. 点击模型选择按钮,打开模型下拉菜单
* 2. 等待菜单出现
* 3. 点击目标模型选项
* 4. 等待 UI 稳定
*
* @param {'pro'|'quick'|'think'} model - 目标模型
* @returns {Promise<{ok: boolean, error?: string, previousModel?: string}>}
*/
async switchToModel(model) {
const selectorMap = {
pro: SELECTORS.modelOptionPro,
quick: SELECTORS.modelOptionQuick,
think: SELECTORS.modelOptionThink,
};
const targetSels = selectorMap[model];
if (!targetSels) {
return { ok: false, error: `unknown_model: ${model}` };
}
// 记录切换前的模型
const before = await this.getCurrentModel();
const previousModel = before.ok ? before.raw : undefined;
// 1. 点击模型选择按钮,打开下拉菜单
const openResult = await this.click('modelBtn');
if (!openResult.ok) {
return { ok: false, error: 'model_menu_open_failed', previousModel };
}
// 2. 等待菜单动画展开
await sleep(250);
// 3. 点击目标模型选项
const selectResult = await op.click(targetSels);
if (!selectResult.ok) {
return { ok: false, error: `model_option_${model}_not_found`, previousModel };
}
// 4. 等待 UI 稳定
await sleep(800);
console.log(`[ops] switched model: ${previousModel || '?'}${model}`);
return { ok: true, previousModel };
},
/**
* 确保当前模型为 Pro如果不是则自动切换
*
* @returns {Promise<{ok: boolean, switched: boolean, previousModel?: string, error?: string}>}
*/
async ensureModelPro() {
const isPro = await this.isModelPro();
if (isPro) {
console.log('[ops] model is already Pro');
return { ok: true, switched: false };
}
console.log('[ops] model is not Pro, switching...');
const result = await this.switchToModel('pro');
if (!result.ok) {
return { ok: false, switched: false, error: result.error, previousModel: result.previousModel };
}
return { ok: true, switched: true, previousModel: result.previousModel };
},
/**
* 填写提示词(快速填充,非逐字输入)
* @param {string} text
*/
async fillPrompt(text) {
return op.fill(SELECTORS.promptInput, text);
},
/**
* 获取输入区 action 按钮的详细状态
*
* 状态模型(基于 DOM class 判断):
*
* ┌──────────────────────────────────────────────────────────────────┐
* │ input-buttons-wrapper-bottom父容器
* │ ┌─────────────────────┐ ┌────────────────────────────────┐ │
* │ │ mic-button-container│ │ send-button-container │ │
* │ │ class 带 hidden │ │ class 带 visible / 无 │ │
* │ │ → 输入框有文字 │ │ ┌──────────────────────────┐ │ │
* │ │ class 无 hidden │ │ │ button.send-button │ │ │
* │ │ → 输入框为空(待命) │ │ │ class 尾 submit → 可发送│ │ │
* │ └─────────────────────┘ │ │ class 尾 stop → 加载中│ │ │
* │ │ └──────────────────────────┘ │ │
* │ └────────────────────────────────┘ │
* └──────────────────────────────────────────────────────────────────┘
*
* 返回值:
* - status: 'mic' — 麦克风态输入框为空Gemini 待命)
* - status: 'submit' — 发送态(输入框有文字,可点击发送)
* - status: 'stop' — 加载态Gemini 正在回答,按钮变为停止)
* - status: 'unknown' — 无法识别
*
* @returns {Promise<{status: 'mic'|'submit'|'stop'|'unknown', micHidden: boolean, sendVisible: boolean, btnClass: string, error?: string}>}
*/
async getStatus() {
return op.query((selectors) => {
const { micContainer: micSels, sendBtnContainer: sendSels, sendBtn: btnSels } = selectors;
// ── 查找麦克风容器 ──
let micEl = null;
for (const sel of micSels) {
try { micEl = document.querySelector(sel); } catch { /* skip */ }
if (micEl) break;
}
// ── 查找发送按钮容器 ──
let sendContainerEl = null;
for (const sel of sendSels) {
try { sendContainerEl = document.querySelector(sel); } catch { /* skip */ }
if (sendContainerEl) break;
}
// ── 查找发送按钮本身 ──
let btnEl = null;
for (const sel of btnSels) {
try { btnEl = document.querySelector(sel); } catch { /* skip */ }
if (btnEl) break;
}
// 都找不到则 unknown
if (!micEl && !sendContainerEl) {
return { status: 'unknown', micHidden: false, sendVisible: false, btnClass: '', error: 'containers_not_found' };
}
const micClass = micEl ? micEl.className : '';
const sendClass = sendContainerEl ? sendContainerEl.className : '';
const btnClass = btnEl ? btnEl.className : '';
const micHidden = /\bhidden\b/.test(micClass);
const sendVisible = /\bvisible\b/.test(sendClass);
// ── 判断状态 ──
// 1. 发送容器可见 → 看按钮 class 是 submit 还是 stop
if (sendVisible) {
if (/\bstop\b/.test(btnClass)) {
return { status: 'stop', micHidden, sendVisible, btnClass };
}
if (/\bsubmit\b/.test(btnClass)) {
return { status: 'submit', micHidden, sendVisible, btnClass };
}
// 发送容器可见但按钮 class 无法识别,降级为 submit
return { status: 'submit', micHidden, sendVisible, btnClass };
}
// 2. 麦克风未隐藏 → 待命态(输入框为空)
if (!micHidden) {
return { status: 'mic', micHidden, sendVisible, btnClass };
}
// 3. 麦克风隐藏但发送容器不可见 → 可能的中间状态,用按钮 class 兜底
if (/\bstop\b/.test(btnClass)) {
return { status: 'stop', micHidden, sendVisible, btnClass };
}
return { status: 'unknown', micHidden, sendVisible, btnClass, error: 'ambiguous_state' };
}, { micContainer: SELECTORS.micContainer, sendBtnContainer: SELECTORS.sendBtnContainer, sendBtn: SELECTORS.sendBtn });
},
/**
* 判断 Gemini 当前的回答状态
*
* 基于 actionBtn 状态推导:
* - 'idle' — 待命(麦克风态 或 发送态Gemini 没在回答)
* - 'answering' — 回答中(按钮为 stop 态Gemini 正在生成)
*
* @returns {Promise<{answering: boolean, status: 'idle'|'answering', detail: object}>}
*/
async getAnswerState() {
const detail = await this.getActionBtnStatus();
const answering = detail.status === 'stop';
return {
answering,
status: answering ? 'answering' : 'idle',
detail,
};
},
/**
* 单次轮询状态(保活式,不阻塞)
*/
async pollStatus() {
const status = await this.getStatus();
const pageVisible = await op.query(() => !document.hidden);
return { ...status, pageVisible, ts: Date.now() };
},
/**
* 检查生成的图片是否加载完成
*
* 通过检测页面中 div.loader.animate 元素判断:
* 存在 → 图片还在加载中
* 不存在 → 加载完毕
*
* @returns {Promise<{loaded: boolean}>}
*/
async checkImageLoaded() {
return isImageLoaded(op);
},
/**
* 获取当前会话中所有 Gemini 的文字回复
*
* 选择器div.response-content
* 直接使用 innerText 提取渲染后的文本,浏览器排版引擎会自动处理换行和格式
*
* @returns {Promise<{ok: boolean, responses: Array<{index: number, text: string}>, total: number, error?: string}>}
*/
async getAllTextResponses() {
return op.query(() => {
const divs = [...document.querySelectorAll('div.response-content')];
if (!divs.length) {
return { ok: false, responses: [], total: 0, error: 'no_responses' };
}
const responses = divs.map((div, i) => ({
index: i,
text: (div.innerText || '').trim(),
}));
return { ok: true, responses, total: responses.length };
});
},
/**
* 获取最新一条 Gemini 文字回复
*
* 取最后一个 div.response-content使用 innerText 提取渲染后的文本
*
* @returns {Promise<{ok: boolean, text?: string, index?: number, error?: string}>}
*/
async getLatestTextResponse() {
return op.query(() => {
const divs = [...document.querySelectorAll('div.response-content')];
if (!divs.length) {
return { ok: false, error: 'no_responses' };
}
const last = divs[divs.length - 1];
return { ok: true, text: (last.innerText || '').trim(), index: divs.length - 1 };
});
},
/**
* 获取本次会话中所有已加载的图片
*
* 选择器逻辑:
* - img.image.loaded — 历史已加载图片(不带 animate
* - img.image.animate.loaded — 最新生成的图片(带入场动画)
* 两者都匹配 img.image.loaded所以用它拿全部。
*
* @returns {Promise<{ok: boolean, images: Array<{src: string, alt: string, width: number, height: number, isNew: boolean, index: number}>, total: number, newCount: number, error?: string}>}
*/
async getAllImages() {
return op.query(() => {
const imgs = [...document.querySelectorAll('img.image.loaded')];
if (!imgs.length) {
return { ok: false, images: [], total: 0, newCount: 0, error: 'no_loaded_images' };
}
const images = imgs.map((img, i) => ({
src: img.src || '',
alt: img.alt || '',
width: img.naturalWidth || 0,
height: img.naturalHeight || 0,
isNew: img.classList.contains('animate'),
index: i,
}));
const newCount = images.filter(i => i.isNew).length;
return { ok: true, images, total: images.length, newCount };
});
},
/**
* 获取最新生成的图片信息
*
* 优先查找带 animate class 的图片(刚生成的),
* 如果没有则回退到最后一张已加载图片。
*
* @returns {Promise<{ok: boolean, src?: string, alt?: string, width?: number, height?: number, isNew?: boolean, hasDownloadBtn?: boolean, error?: string}>}
*/
async getLatestImage() {
return op.query(() => {
// 优先:最新生成的图片(带 animate
const newImgs = [...document.querySelectorAll('img.image.animate.loaded')];
// 回退:所有已加载图片
const allImgs = [...document.querySelectorAll('img.image.loaded')];
if (!allImgs.length) {
return { ok: false, error: 'no_loaded_images' };
}
// 取最新生成的最后一张,没有则取全部的最后一张
const img = newImgs.length > 0
? newImgs[newImgs.length - 1]
: allImgs[allImgs.length - 1];
const isNew = newImgs.length > 0 && newImgs[newImgs.length - 1] === img;
// 查找下载按钮
let container = img;
while (container && container !== document.body) {
if (container.classList?.contains('image-container')) break;
container = container.parentElement;
}
const dlBtn = container
? (container.querySelector('mat-icon[fonticon="download"]')
|| container.querySelector('mat-icon[data-mat-icon-name="download"]'))
: null;
return {
ok: true,
src: img.src || '',
alt: img.alt || '',
width: img.naturalWidth || 0,
height: img.naturalHeight || 0,
isNew,
hasDownloadBtn: !!dlBtn,
};
});
},
/**
* 提取指定图片的 Base64 数据
*
* 降级策略:
* 1. Canvas — 同步提取,最快(但跨域图片会被 taint【已注释留作参考】
* 2. 页面 fetch — 异步读取 blob受 CORS 限制Google 图片通常不可用)【已注释,留作参考】
* 3. CDP loadNetworkResource — 通过 CDP 协议用浏览器网络栈下载,绕过 CORS
*
* @param {string} url - 目标图片的 src URL
* @returns {Promise<{ok: boolean, dataUrl?: string, method?: 'cdp', error?: string}>}
*/
async extractImageBase64(url) {
if (!url) {
console.warn('[extractImageBase64] ❌ 未提供 url 参数');
return { ok: false, error: 'missing_url' };
}
console.log(`[extractImageBase64] 🔍 开始提取, url=${url.slice(0, 120)}...`);
// 无论是fetch还是canvas提取都会失败这里留作学习走CDP的url获取兜底
// // ── 阶段 1: Canvas 提取 ──
// const canvasResult = await op.query((targetUrl) => {
// const imgs = [...document.querySelectorAll('img.image.loaded')];
// const img = imgs.find(i => i.src === targetUrl);
// if (!img) {
// return { ok: false, error: 'img_not_found_by_url', searched: imgs.length };
// }
// const w = img.naturalWidth || img.width;
// const h = img.naturalHeight || img.height;
// try {
// const canvas = document.createElement('canvas');
// canvas.width = w;
// canvas.height = h;
// canvas.getContext('2d').drawImage(img, 0, 0);
// const dataUrl = canvas.toDataURL('image/png');
// return { ok: true, dataUrl, width: w, height: h, method: 'canvas' };
// } catch (e) {
// return { ok: false, needFallback: true, src: img.src, width: w, height: h, canvasError: e.message || String(e) };
// }
// }, url);
// if (canvasResult.ok) {
// console.log(`[extractImageBase64] ✅ Canvas 提取成功 (${canvasResult.width}x${canvasResult.height})`);
// return canvasResult;
// }
// if (!canvasResult.needFallback) {
// console.warn(`[extractImageBase64] ❌ 页面中未找到匹配的 img 元素 (已扫描 ${canvasResult.searched || 0} 张)`);
// return canvasResult;
// }
// console.log(`[extractImageBase64] ⚠ Canvas 被污染 (${canvasResult.canvasError}),尝试页面 fetch...`);
// // ── 阶段 2: 页面 fetch可能被 CORS 拦截) ──
// const fetchResult = await page.evaluate(async (src) => {
// try {
// const r = await fetch(src);
// if (!r.ok) return { ok: false, error: `fetch_status_${r.status}` };
// const blob = await r.blob();
// const mime = blob.type || 'image/png';
// return await new Promise((resolve) => {
// const reader = new FileReader();
// reader.onloadend = () => resolve({ ok: true, dataUrl: reader.result, mime });
// reader.onerror = () => resolve({ ok: false, error: 'filereader_error' });
// reader.readAsDataURL(blob);
// });
// } catch (err) {
// return { ok: false, error: 'fetch_failed', detail: err.message || String(err) };
// }
// }, canvasResult.src);
// if (fetchResult.ok) {
// console.log(`[extractImageBase64] ✅ 页面 fetch 提取成功 (${canvasResult.width}x${canvasResult.height})`);
// return { ...fetchResult, width: canvasResult.width, height: canvasResult.height, method: 'fetch' };
// }
// console.log(`[extractImageBase64] ⚠ 页面 fetch 失败 (${fetchResult.error}${fetchResult.detail ? ' — ' + fetchResult.detail : ''}),尝试 CDP 缓存读取...`);
// ── CDP Network.loadNetworkResource通过 CDP 发请求,绕过 CORS ──
try {
const client = page._client();
const frameId = page.mainFrame()._id;
console.log(`[extractImageBase64] 📡 CDP 请求中... frameId=${frameId}`);
const { resource } = await client.send('Network.loadNetworkResource', {
frameId,
url,
options: { disableCache: false, includeCredentials: true },
});
if (!resource.success) {
const errMsg = `CDP 请求失败: httpStatusCode=${resource.httpStatusCode || 'N/A'}`;
console.warn(`[extractImageBase64] ❌ ${errMsg}`);
return { ok: false, error: 'cdp_request_failed', detail: errMsg };
}
// 通过 IO.read 读取 stream 数据
const streamHandle = resource.stream;
if (!streamHandle) {
console.warn('[extractImageBase64] ❌ CDP 返回无 stream handle');
return { ok: false, error: 'cdp_no_stream' };
}
const chunks = [];
let eof = false;
while (!eof) {
const { data, base64Encoded, eof: done } = await client.send('IO.read', {
handle: streamHandle,
size: 1024 * 1024, // 每次读 1MB
});
if (data) {
chunks.push(base64Encoded ? data : Buffer.from(data).toString('base64'));
}
eof = done;
}
await client.send('IO.close', { handle: streamHandle });
const base64Full = chunks.join('');
// 从 response headers 推断 MIMECDP 有时不提供,默认用 image/png
const mime = (resource.headers?.['content-type'] || resource.headers?.['Content-Type'] || 'image/png').split(';')[0].trim();
const dataUrl = `data:${mime};base64,${base64Full}`;
console.log(`[extractImageBase64] ✅ CDP 提取成功 (mime=${mime}, size=${(base64Full.length * 0.75 / 1024).toFixed(1)}KB)`);
return { ok: true, dataUrl, method: 'cdp' };
} catch (err) {
const errMsg = err.message || String(err);
console.warn(`[extractImageBase64] ❌ CDP 提取异常: ${errMsg}`);
return { ok: false, error: 'cdp_error', detail: errMsg };
}
},
/**
* 点击最新图片的下载按钮
*/
async downloadLatestImage() {
return op.query(() => {
const imgs = [...document.querySelectorAll('img.image.loaded')];
if (!imgs.length) return { ok: false, error: 'no_loaded_images' };
const img = imgs[imgs.length - 1];
let container = img;
while (container && container !== document.body) {
if (container.classList?.contains('image-container')) break;
container = container.parentElement;
}
const dlBtn = container
? (container.querySelector('mat-icon[fonticon="download"]')
|| container.querySelector('mat-icon[data-mat-icon-name="download"]'))
: null;
if (!dlBtn) return { ok: false, error: 'download_btn_not_found' };
const clickable = dlBtn.closest('button,[role="button"],.button-icon-wrapper') || dlBtn;
clickable.click();
return { ok: true, src: img.src || '' };
});
},
// ─── 高层组合操作 ───
/**
* 刷新当前页面
*
* 适用于页面卡住、状态异常等场景。
* 刷新后会等待页面重新加载完成waitUntil: networkidle2
*
* @param {object} [options]
* @param {number} [options.timeout=30000] - 等待页面加载的超时时间ms
* @returns {Promise<{ok: boolean, elapsed?: number, error?: string, detail?: string}>}
*/
async reloadPage({ timeout = 30_000 } = {}) {
try {
const start = Date.now();
await page.reload({ waitUntil: 'networkidle2', timeout });
const elapsed = Date.now() - start;
console.log(`[ops] 页面刷新完成 (${elapsed}ms)`);
return { ok: true, elapsed };
} catch (e) {
return { ok: false, error: 'reload_failed', detail: e.message };
}
},
/**
* 上传图片到 Gemini 输入框
*
* 流程:
* 1. 点击加号面板按钮,展开上传菜单
* 2. 等待 300ms 让菜单动画稳定
* 3. 拦截文件选择器 + 点击"上传文件"按钮Promise.all 并发)
* 4. 向文件选择器塞入指定图片路径
* 5. 轮询等待图片加载完成(.image-preview.loading 消失)
*
* @param {string} filePath - 本地图片的绝对路径
* @returns {Promise<{ok: boolean, elapsed?: number, warning?: string, error?: string, detail?: string}>}
*/
async uploadImage(filePath) {
try {
// 1. 点击加号面板按钮,展开上传菜单
const panelClick = await this.click('uploadPanelBtn');
if (!panelClick.ok) {
return { ok: false, error: 'upload_panel_click_failed', detail: panelClick.error };
}
// 2. 等待菜单动画稳定
await sleep(250);
// 3. Promise.all 是精髓:一边开始监听文件选择器弹窗,一边点击"上传文件"按钮
const [fileChooser] = await Promise.all([
page.waitForFileChooser({ timeout: 3_000 }),
this.click('uploadFileBtn'),
]);
// 4. 弹窗被拦截,塞入文件
await fileChooser.accept([filePath]);
console.log(`[ops] 文件已塞入,等待 Gemini 加载图片...`);
// 5. 等待图片加载完成(.image-preview.loading 消失)
const loadTimeout = 10_000;
const loadInterval = 250;
const loadStart = Date.now();
await sleep(500); // 短暂等待 UI 响应
while (Date.now() - loadStart < loadTimeout) {
const loading = await op.query(() => {
const el = document.querySelector('.image-preview.loading');
return !!el;
});
if (!loading) {
console.log(`[ops] 图片加载完成 (${Date.now() - loadStart}ms): ${filePath}`);
return { ok: true, elapsed: Date.now() - loadStart };
}
await sleep(loadInterval);
}
// 超时了但文件已经塞进去了,不算完全失败
console.warn(`[ops] 图片加载超时 (${loadTimeout}ms),但文件已提交`);
return { ok: true, warning: 'load_timeout', elapsed: Date.now() - loadStart };
} catch (e) {
return { ok: false, error: 'upload_image_failed', detail: e.message };
}
},
/**
* 发送提示词并等待生成完成
* @param {string} prompt
* @param {object} [opts]
* @param {number} [opts.timeout=120000]
* @param {number} [opts.interval=8000]
* @param {(status: object) => void} [opts.onPoll]
* @returns {Promise<{ok: boolean, elapsed: number, finalStatus?: object, error?: string}>}
*/
async sendAndWait(prompt, opts = {}) {
const { timeout = 120_000, interval = 1_000, onPoll } = opts;
// 1. 填写
const fillResult = await this.fillPrompt(prompt);
if (!fillResult.ok) {
return { ok: false, error: 'fill_failed', detail: fillResult, elapsed: 0 };
}
// 短暂等待 UI 响应
await sleep(300);
// 2. 点击发送
const clickResult = await this.click('sendBtn');
if (!clickResult.ok) {
return { ok: false, error: 'send_click_failed', detail: clickResult, elapsed: 0 };
}
// 3. 轮询等待(回到麦克风态 = Gemini 回答完毕)
const start = Date.now();
let lastStatus = null;
while (Date.now() - start < timeout) {
await sleep(interval);
const poll = await this.pollStatus();
lastStatus = poll;
onPoll?.(poll);
if (poll.status === 'mic') {
return { ok: true, elapsed: Date.now() - start, finalStatus: poll };
}
if (poll.status === 'unknown') {
console.warn('[ops] unknown status, may need screenshot to debug');
}
}
return { ok: false, error: 'timeout', elapsed: Date.now() - start, finalStatus: lastStatus };
},
/**
* 完整生图流程:新建会话 → 发送提示词 → 等待 → 提取图片
* @param {string} prompt
* @param {object} [opts]
* @param {number} [opts.timeout=120000]
* @param {boolean} [opts.newChat=true]
* @param {boolean} [opts.highRes=false]
* @param {(status: object) => void} [opts.onPoll]
*/
async generateImage(prompt, opts = {}) {
const { timeout = 120_000, newChat = true, highRes = false, onPoll } = opts;
// 1. 可选:新建会话
if (newChat) {
const newChatResult = await this.click('newChatBtn');
if (!newChatResult.ok) {
console.warn('[ops] newChatBtn click failed, continuing anyway');
}
await sleep(1500);
}
// 2. 发送并等待
const waitResult = await this.sendAndWait(prompt, { timeout, onPoll });
if (!waitResult.ok) {
return { ...waitResult, step: 'sendAndWait' };
}
// 3. 等图片渲染完成
await sleep(2000);
// 4. 获取图片
let imgInfo = await this.getLatestImage();
if (!imgInfo.ok) {
await sleep(3000);
imgInfo = await this.getLatestImage();
if (!imgInfo.ok) {
return { ok: false, error: 'no_image_found', elapsed: waitResult.elapsed, imgInfo };
}
}
// 5. 提取 / 下载
if (highRes) {
const dlResult = await this.downloadLatestImage();
return { ok: dlResult.ok, method: 'download', elapsed: waitResult.elapsed, ...dlResult };
} else {
const b64Result = await this.extractImageBase64(imgInfo.src);
return { ok: b64Result.ok, method: b64Result.method, elapsed: waitResult.elapsed, ...b64Result };
}
},
/** 底层 page 引用 */
get page() {
return page;
},
/**
* 检查是否已登录 Google 账号
*
* @returns {Promise<{ok: boolean, loggedIn: boolean, barText?: string, error?: string}>}
*/
async checkLogin() {
return isLoggedIn(op);
},
};
}
/**
* 判断侧边栏是否处于展开状态(内部工具函数,不对外暴露)
*
* 通过 overflow-container 元素的实际渲染宽度判断:
* - width >= 100px → 展开
* - width < 100px → 折叠
*
* @param {ReturnType<typeof createOperator>} op
* @returns {Promise<{ok: boolean, expanded: boolean, width: number, error?: string}>}
*/
function isSidebarExpanded(op) {
return op.query((sels) => {
let el = null;
for (const sel of sels) {
try { el = document.querySelector(sel); } catch { /* skip */ }
if (el) break;
}
if (!el) {
return { ok: false, expanded: false, width: 0, error: 'sidebar_container_not_found' };
}
const width = el.getBoundingClientRect().width;
return { ok: true, expanded: width >= 100, width };
}, SELECTORS.sidebarContainer);
}
/**
* 检查生成的图片是否加载完成
*
* 判断依据:页面中是否存在 div.loader.animate 元素。
* 存在 → 图片还在加载;不存在 → 加载完毕。
*
* @param {ReturnType<typeof createOperator>} op
* @returns {Promise<{loaded: boolean}>}
*/
function isImageLoaded(op) {
return op.query(() => {
const loader = document.querySelector('div.loader.animate');
return { loaded: !loader };
});
}
/**
* 检查是否已登录 Google 账号
*
* 判断依据:顶部导航栏 div.boqOnegoogleliteOgbOneGoogleBar 的 innerText
* 包含"登录"或"sign in"(不区分大小写)则视为未登录
*
* @param {ReturnType<typeof createOperator>} op
* @returns {Promise<{ok: boolean, loggedIn: boolean, barText?: string, error?: string}>}
*/
function isLoggedIn(op) {
return op.query(() => {
const bar = document.querySelector('div.boqOnegoogleliteOgbOneGoogleBar');
if (!bar) {
return { ok: false, loggedIn: false, error: 'login_bar_not_found' };
}
const text = (bar.innerText || '').trim();
const lower = text.toLowerCase();
const notLoggedIn = lower.includes('登录') || lower.includes('sign in');
return { ok: true, loggedIn: !notLoggedIn, barText: text };
});
}