feat(gemini-ops): 优化图片提取逻辑,增加 CDP 缓存读取机制
This commit is contained in:
@@ -84,6 +84,50 @@ const SELECTORS = {
|
|||||||
export function createOps(page) {
|
export function createOps(page) {
|
||||||
const op = createOperator(page);
|
const op = createOperator(page);
|
||||||
|
|
||||||
|
// ── 图片请求缓存 map:URL → { requestId, ts } ──
|
||||||
|
// 监听 Network.responseReceived,收集图片请求的 requestId,
|
||||||
|
// 供 extractImageBase64 的 getResponseBody 缓存阶段使用。
|
||||||
|
// 每条缓存 TTL 5 分钟,过期后 getResponseBody 大概率也失效。
|
||||||
|
const IMAGE_CACHE_TTL = 5 * 60 * 1000; // 5 min
|
||||||
|
const imageRequestMap = new Map();
|
||||||
|
|
||||||
|
function setImageRequest(url, requestId) {
|
||||||
|
// 先处理url已存在的情况,这个时候取消掉url的定时器
|
||||||
|
if(imageRequestMap.has(url)) {
|
||||||
|
const entry = imageRequestMap.get(url);
|
||||||
|
clearTimeout(entry.timer);
|
||||||
|
}
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
imageRequestMap.delete(url);
|
||||||
|
}, IMAGE_CACHE_TTL);
|
||||||
|
timer.unref();// 这个方法可以防止定时器影响进程退出
|
||||||
|
imageRequestMap.set(url, {
|
||||||
|
requestId,
|
||||||
|
timer: timer,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function getImageRequestId(url) {
|
||||||
|
const entry = imageRequestMap.get(url);
|
||||||
|
return entry ? entry.requestId : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const client = page._client();
|
||||||
|
await client.send('Network.enable');
|
||||||
|
client.on('Network.responseReceived', (params) => {
|
||||||
|
const { requestId, response } = params;
|
||||||
|
const mime = response.mimeType || '';
|
||||||
|
if (mime.startsWith('image/')) {
|
||||||
|
setImageRequest(response.url, requestId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[ops] Network 监听初始化失败(不影响核心功能):', e.message);
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
/** 暴露底层 operator,供高级用户直接使用 */
|
/** 暴露底层 operator,供高级用户直接使用 */
|
||||||
operator: op,
|
operator: op,
|
||||||
@@ -467,13 +511,14 @@ export function createOps(page) {
|
|||||||
/**
|
/**
|
||||||
* 提取指定图片的 Base64 数据
|
* 提取指定图片的 Base64 数据
|
||||||
*
|
*
|
||||||
* 三级降级策略:
|
* 四级降级策略:
|
||||||
* 1. Canvas — 同步提取,最快(但跨域图片会被 taint)
|
* 1. Canvas — 同步提取,最快(但跨域图片会被 taint)
|
||||||
* 2. 页面 fetch — 异步读取 blob(受 CORS 限制,Google 图片通常不可用)
|
* 2. 页面 fetch — 异步读取 blob(受 CORS 限制,Google 图片通常不可用)
|
||||||
* 3. CDP Network — 通过 CDP 协议用浏览器网络栈下载,绕过 CORS,终极兜底
|
* 3. CDP getResponseBody — 从浏览器内存缓存读取,零网络开销(需要 requestId 命中)
|
||||||
|
* 4. CDP loadNetworkResource — 通过 CDP 协议用浏览器网络栈重新下载,绕过 CORS,终极兜底
|
||||||
*
|
*
|
||||||
* @param {string} url - 目标图片的 src URL
|
* @param {string} url - 目标图片的 src URL
|
||||||
* @returns {Promise<{ok: boolean, dataUrl?: string, width?: number, height?: number, method?: 'canvas'|'fetch'|'cdp', error?: string}>}
|
* @returns {Promise<{ok: boolean, dataUrl?: string, width?: number, height?: number, method?: 'canvas'|'fetch'|'cdp-cache'|'cdp', error?: string}>}
|
||||||
*/
|
*/
|
||||||
async extractImageBase64(url) {
|
async extractImageBase64(url) {
|
||||||
if (!url) {
|
if (!url) {
|
||||||
@@ -539,9 +584,28 @@ export function createOps(page) {
|
|||||||
return { ...fetchResult, width: canvasResult.width, height: canvasResult.height, method: 'fetch' };
|
return { ...fetchResult, width: canvasResult.width, height: canvasResult.height, method: 'fetch' };
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[extractImageBase64] ⚠ 页面 fetch 失败 (${fetchResult.error}${fetchResult.detail ? ' — ' + fetchResult.detail : ''}),降级为 CDP 网络请求...`);
|
console.log(`[extractImageBase64] ⚠ 页面 fetch 失败 (${fetchResult.error}${fetchResult.detail ? ' — ' + fetchResult.detail : ''}),尝试 CDP 缓存读取...`);
|
||||||
|
|
||||||
// ── 阶段 3: CDP Network.loadNetworkResource(终极兜底,绕过 CORS) ──
|
// ── 阶段 3: CDP Network.getResponseBody(从浏览器内存缓存读取,零网络开销) ──
|
||||||
|
const requestId = getImageRequestId(canvasResult.src);
|
||||||
|
if (requestId) {
|
||||||
|
try {
|
||||||
|
const client = page._client();
|
||||||
|
const { body, base64Encoded } = await client.send('Network.getResponseBody', { requestId });
|
||||||
|
const base64Data = base64Encoded ? body : Buffer.from(body, 'utf8').toString('base64');
|
||||||
|
const mime = 'image/png'; // 缓存中无法直接拿 MIME,用 png 兜底
|
||||||
|
const dataUrl = `data:${mime};base64,${base64Data}`;
|
||||||
|
|
||||||
|
console.log(`[extractImageBase64] ✅ CDP 缓存命中 (${canvasResult.width}x${canvasResult.height}, size=${(base64Data.length * 0.75 / 1024).toFixed(1)}KB)`);
|
||||||
|
return { ok: true, dataUrl, width: canvasResult.width, height: canvasResult.height, method: 'cdp-cache' };
|
||||||
|
} catch (e) {
|
||||||
|
console.log(`[extractImageBase64] ⚠ CDP 缓存读取失败 (${e.message}),降级为 CDP 网络请求...`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('[extractImageBase64] ⚠ 缓存中无该 URL 的 requestId,降级为 CDP 网络请求...');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── 阶段 4: CDP Network.loadNetworkResource(终极兜底,重新发请求,绕过 CORS) ──
|
||||||
try {
|
try {
|
||||||
const client = page._client();
|
const client = page._client();
|
||||||
const frameId = page.mainFrame()._id;
|
const frameId = page.mainFrame()._id;
|
||||||
|
|||||||
Reference in New Issue
Block a user