#!/usr/bin/env python3 """HTML -> 真矢量 SVG 转换(文字保留为可编辑 元素) 核心方案:Puppeteer + dom-to-svg - Puppeteer 在 headless 浏览器中打开 HTML - dom-to-svg 直接将 DOM 树转为 SVG,保留 元素 - 不经过 PDF 中转,文字不会变成 path 降级方案:Puppeteer PDF + pdf2svg(文字变 path,不可编辑) 首次运行自动安装依赖(dom-to-svg, puppeteer, esbuild)。 用法: python3 html2svg.py [-o output_dir] """ import json import os import shutil import subprocess import sys from pathlib import Path # Puppeteer + dom-to-svg bundle 注入脚本 CONVERT_SCRIPT = r""" const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); (async () => { const config = JSON.parse(process.argv[2]); const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu', '--font-render-hinting=none'] }); for (const item of config.files) { const page = await browser.newPage(); await page.setViewport({ width: 1280, height: 720 }); await page.goto('file://' + item.html, { waitUntil: 'networkidle0', timeout: 30000 }); await new Promise(r => setTimeout(r, 500)); // 注入预打包的 dom-to-svg bundle await page.addScriptTag({ path: config.bundlePath }); // 预处理:在 Node.js 端读取图片文件转 base64,传给浏览器替换 src // (浏览器端 canvas.toDataURL 会因 file:// CORS 被阻止) const imgSrcs = await page.evaluate(() => { const imgs = document.querySelectorAll('img'); return Array.from(imgs).map(img => img.getAttribute('src') || ''); }); const imgDataMap = {}; const htmlDir = path.dirname(item.html); // HTML文件所在目录 for (const src of imgSrcs) { if (!src) continue; if (src.startsWith('data:')) continue; // 跳过已内联的 // 处理 file:// 和绝对/相对路径 let filePath = src; if (filePath.startsWith('file://')) filePath = filePath.slice(7); // 相对路径以HTML文件所在目录为基准resolve if (!path.isAbsolute(filePath)) { filePath = path.resolve(htmlDir, filePath); } if (fs.existsSync(filePath)) { const data = fs.readFileSync(filePath); const ext = path.extname(filePath).slice(1) || 'png'; const mime = ext === 'jpg' ? 'image/jpeg' : `image/${ext}`; imgDataMap[src] = `data:${mime};base64,${data.toString('base64')}`; } else { console.warn('Image not found:', filePath, '(src:', src, ')'); } } if (Object.keys(imgDataMap).length > 0) { await page.evaluate((dataMap) => { const imgs = document.querySelectorAll('img'); for (const img of imgs) { const origSrc = img.getAttribute('src'); if (origSrc && dataMap[origSrc]) { img.src = dataMap[origSrc]; } } }, imgDataMap); // 等待图片重新渲染 await new Promise(r => setTimeout(r, 300)); } // === 预处理:将 dom-to-svg 不支持的 CSS 特性转为真实 DOM === await page.evaluate(() => { // 1. 物化伪元素 ::before / ::after -> 真实 span // dom-to-svg 无法读取 CSS 伪元素,导致箭头/装饰丢失 const all = document.querySelectorAll('*'); for (const el of all) { for (const pseudo of ['::before', '::after']) { const style = getComputedStyle(el, pseudo); const content = style.content; if (!content || content === 'none' || content === '""' || content === "''") continue; const w = parseFloat(style.width) || 0; const h = parseFloat(style.height) || 0; const bg = style.backgroundColor; const border = style.borderTopWidth; const borderColor = style.borderTopColor; // 只处理有尺寸或有边框的伪元素(箭头/装饰块) if ((w > 0 || h > 0 || parseFloat(border) > 0) && content !== 'normal') { const span = document.createElement('span'); span.style.display = style.display === 'none' ? 'none' : 'inline-block'; span.style.position = style.position; span.style.width = style.width; span.style.height = style.height; span.style.backgroundColor = bg; span.style.borderTop = style.borderTop; span.style.borderRight = style.borderRight; span.style.borderBottom = style.borderBottom; span.style.borderLeft = style.borderLeft; span.style.transform = style.transform; span.style.top = style.top; span.style.left = style.left; span.style.right = style.right; span.style.bottom = style.bottom; span.style.borderRadius = style.borderRadius; span.setAttribute('data-pseudo', pseudo); // 文本内容(去掉引号) const textContent = content.replace(/^["']|["']$/g, ''); if (textContent && textContent !== 'normal' && textContent !== 'none') { span.textContent = textContent; span.style.color = style.color; span.style.fontSize = style.fontSize; span.style.fontWeight = style.fontWeight; } if (pseudo === '::before') { el.insertBefore(span, el.firstChild); } else { el.appendChild(span); } } } } // 2. 将 conic-gradient 环形图转为 SVG // 查找带有 conic-gradient 背景的元素 for (const el of document.querySelectorAll('*')) { const bg = el.style.background || el.style.backgroundImage || ''; const computed = getComputedStyle(el); const bgImage = computed.backgroundImage || ''; if (!bgImage.includes('conic-gradient')) continue; const rect = el.getBoundingClientRect(); const size = Math.min(rect.width, rect.height); if (size <= 0) continue; // 解析 conic-gradient 的百分比和颜色 const match = bgImage.match(/conic-gradient\(([^)]+)\)/); if (!match) continue; const gradStr = match[1]; // 提取百分比(典型格式: #color 0% 75%, #color2 75% 100%) const percMatch = gradStr.match(/([\d.]+)%/g); let percentage = 75; // 默认 if (percMatch && percMatch.length >= 2) { percentage = parseFloat(percMatch[1]); } // 提取颜色 const colorMatch = gradStr.match(/(#[0-9a-fA-F]{3,8}|rgb[a]?\([^)]+\))/g); const mainColor = colorMatch ? colorMatch[0] : '#4CAF50'; const bgColor = colorMatch && colorMatch.length > 1 ? colorMatch[1] : '#e0e0e0'; // 创建 SVG 替换 const svgNS = 'http://www.w3.org/2000/svg'; const svg = document.createElementNS(svgNS, 'svg'); svg.setAttribute('width', String(size)); svg.setAttribute('height', String(size)); svg.setAttribute('viewBox', `0 0 ${size} ${size}`); svg.style.display = el.style.display || 'block'; svg.style.position = computed.position; svg.style.top = computed.top; svg.style.left = computed.left; const cx = size / 2, cy = size / 2; const r = size * 0.4; const circumference = 2 * Math.PI * r; const strokeWidth = size * 0.15; // 背景圆环 const bgCircle = document.createElementNS(svgNS, 'circle'); bgCircle.setAttribute('cx', String(cx)); bgCircle.setAttribute('cy', String(cy)); bgCircle.setAttribute('r', String(r)); bgCircle.setAttribute('fill', 'none'); bgCircle.setAttribute('stroke', bgColor); bgCircle.setAttribute('stroke-width', String(strokeWidth)); // 进度圆环 const fgCircle = document.createElementNS(svgNS, 'circle'); fgCircle.setAttribute('cx', String(cx)); fgCircle.setAttribute('cy', String(cy)); fgCircle.setAttribute('r', String(r)); fgCircle.setAttribute('fill', 'none'); fgCircle.setAttribute('stroke', mainColor); fgCircle.setAttribute('stroke-width', String(strokeWidth)); fgCircle.setAttribute('stroke-dasharray', `${circumference * percentage / 100} ${circumference}`); fgCircle.setAttribute('stroke-linecap', 'round'); fgCircle.setAttribute('transform', `rotate(-90 ${cx} ${cy})`); svg.appendChild(bgCircle); svg.appendChild(fgCircle); // 保留子元素(如百分比文字),放到 foreignObject 不行 // 直接添加 SVG text const textEl = el.querySelector('*'); if (el.textContent && el.textContent.trim()) { const svgText = document.createElementNS(svgNS, 'text'); svgText.setAttribute('x', String(cx)); svgText.setAttribute('y', String(cy)); svgText.setAttribute('text-anchor', 'middle'); svgText.setAttribute('dominant-baseline', 'central'); svgText.setAttribute('fill', computed.color); svgText.setAttribute('font-size', computed.fontSize); svgText.setAttribute('font-weight', computed.fontWeight); svgText.textContent = el.textContent.trim(); svg.appendChild(svgText); } el.style.background = 'none'; el.style.backgroundImage = 'none'; el.insertBefore(svg, el.firstChild); } // 3. 将 CSS border 三角形箭头修复 // 查找宽高为 0 但有 border 的元素(CSS 三角形技巧) for (const el of document.querySelectorAll('*')) { const cs = getComputedStyle(el); const w = parseFloat(cs.width); const h = parseFloat(cs.height); if (w !== 0 || h !== 0) continue; const bt = parseFloat(cs.borderTopWidth) || 0; const br = parseFloat(cs.borderRightWidth) || 0; const bb = parseFloat(cs.borderBottomWidth) || 0; const bl = parseFloat(cs.borderLeftWidth) || 0; // 至少两个边框有宽度才是三角形 const borders = [bt, br, bb, bl].filter(v => v > 0); if (borders.length < 2) continue; const btc = cs.borderTopColor; const brc = cs.borderRightColor; const bbc = cs.borderBottomColor; const blc = cs.borderLeftColor; // 找有色边框(非 transparent) const nonTransparent = []; if (bt > 0 && !btc.includes('0)') && btc !== 'transparent') nonTransparent.push({dir: 'top', size: bt, color: btc}); if (br > 0 && !brc.includes('0)') && brc !== 'transparent') nonTransparent.push({dir: 'right', size: br, color: brc}); if (bb > 0 && !bbc.includes('0)') && bbc !== 'transparent') nonTransparent.push({dir: 'bottom', size: bb, color: bbc}); if (bl > 0 && !blc.includes('0)') && blc !== 'transparent') nonTransparent.push({dir: 'left', size: bl, color: blc}); if (nonTransparent.length !== 1) continue; // 用实际尺寸的 div 替换 const arrow = nonTransparent[0]; const totalW = bl + br; const totalH = bt + bb; el.style.width = totalW + 'px'; el.style.height = totalH + 'px'; el.style.border = 'none'; // 用 SVG 绘制三角形 const svgNS = 'http://www.w3.org/2000/svg'; const svg = document.createElementNS(svgNS, 'svg'); svg.setAttribute('width', String(totalW)); svg.setAttribute('height', String(totalH)); svg.style.display = 'block'; svg.style.overflow = 'visible'; const polygon = document.createElementNS(svgNS, 'polygon'); let points = ''; if (arrow.dir === 'bottom') points = `0,0 ${totalW},0 ${totalW/2},${totalH}`; else if (arrow.dir === 'top') points = `${totalW/2},0 0,${totalH} ${totalW},${totalH}`; else if (arrow.dir === 'right') points = `0,0 ${totalW},${totalH/2} 0,${totalH}`; else if (arrow.dir === 'left') points = `${totalW},0 0,${totalH/2} ${totalW},${totalH}`; polygon.setAttribute('points', points); polygon.setAttribute('fill', arrow.color); svg.appendChild(polygon); el.appendChild(svg); } // 4. 修复 background-clip: text 渐变文字 // dom-to-svg 不支持此特性,导致渐变背景变成色块、文字变白 for (const el of document.querySelectorAll('*')) { const cs = getComputedStyle(el); const bgClip = cs.webkitBackgroundClip || cs.backgroundClip || ''; if (bgClip !== 'text') continue; // 提取渐变/背景中的主色作为文字颜色 const bgImage = cs.backgroundImage || ''; let mainColor = '#FF6900'; // fallback const colorMatch = bgImage.match(/(#[0-9a-fA-F]{3,8}|rgb[a]?\([^)]+\))/); if (colorMatch) mainColor = colorMatch[1]; // 清除渐变背景效果,改用直接 color el.style.backgroundImage = 'none'; el.style.background = 'none'; el.style.webkitBackgroundClip = 'border-box'; el.style.backgroundClip = 'border-box'; el.style.webkitTextFillColor = 'unset'; el.style.color = mainColor; console.warn('html2svg fallback: background-clip:text -> color:' + mainColor, el.tagName); } // 5. 修复 -webkit-text-fill-color(非 background-clip:text 的独立使用) for (const el of document.querySelectorAll('*')) { const cs = getComputedStyle(el); const fillColor = cs.webkitTextFillColor; if (!fillColor || fillColor === cs.color) continue; // 如果 text-fill-color 与 color 不同,SVG 中会丢失 // 将 text-fill-color 值应用到 color if (fillColor !== 'rgba(0, 0, 0, 0)' && fillColor !== 'transparent') { el.style.color = fillColor; el.style.webkitTextFillColor = 'unset'; } } // 6. 修复 mask-image / -webkit-mask-image(SVG 不支持) // 根据元素层级智能降级:底层图片降透明度,前景元素直接移除蒙版 for (const el of document.querySelectorAll('*')) { const cs = getComputedStyle(el); const maskImg = cs.maskImage || cs.webkitMaskImage || ''; if (!maskImg || maskImg === 'none') continue; // 清除 mask el.style.maskImage = 'none'; el.style.webkitMaskImage = 'none'; // 判断是否为底层装饰图片(通过 z-index、pointer-events、opacity 推断) const zIndex = parseInt(cs.zIndex) || 0; const pointerEvents = cs.pointerEvents; const isImg = el.tagName === 'IMG'; const currentOpacity = parseFloat(cs.opacity) || 1; if (isImg || pointerEvents === 'none' || zIndex <= 0) { // 底层氛围图:降低透明度 + 限制尺寸,不要遮挡内容 const newOpacity = Math.min(currentOpacity, 0.15); el.style.opacity = String(newOpacity); // 如果图片过大,限制为容器的合理比例 if (isImg) { const parent = el.parentElement; if (parent) { const parentRect = parent.getBoundingClientRect(); const elRect = el.getBoundingClientRect(); if (elRect.width > parentRect.width * 0.8) { el.style.maxWidth = '60%'; el.style.maxHeight = '60%'; } } } console.warn('html2svg fallback: mask-image -> opacity:' + newOpacity + ' (background layer)', el.tagName); } else { // 前景元素:只移除蒙版,保持原样 console.warn('html2svg fallback: mask-image removed (foreground)', el.tagName); } } }); await new Promise(r => setTimeout(r, 300)); // === 执行 DOM -> SVG 转换 === let svgString = await page.evaluate(async () => { const { documentToSVG, inlineResources } = window.__domToSvg; const svgDoc = documentToSVG(document); await inlineResources(svgDoc.documentElement); // 后处理:将 的 color 属性转为 fill(SVG 标准) const texts = svgDoc.querySelectorAll('text'); for (const t of texts) { const c = t.getAttribute('color'); if (c && !t.getAttribute('fill')) { t.setAttribute('fill', c); t.removeAttribute('color'); } } return new XMLSerializer().serializeToString(svgDoc); }); fs.writeFileSync(item.svg, svgString, 'utf-8'); console.log('SVG: ' + path.basename(item.html)); await page.close(); } await browser.close(); console.log('Done: ' + config.files.length + ' SVGs'); })(); """ # 降级 PDF 方案脚本 FALLBACK_SCRIPT = r""" const puppeteer = require('puppeteer'); const fs = require('fs'); const path = require('path'); (async () => { const config = JSON.parse(process.argv[2]); const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] }); for (const item of config.files) { const page = await browser.newPage(); await page.setViewport({ width: 1280, height: 720 }); await page.goto('file://' + item.html, { waitUntil: 'networkidle0', timeout: 30000 }); await new Promise(r => setTimeout(r, 500)); await page.pdf({ path: item.pdf, width: '1280px', height: '720px', printBackground: true, preferCSSPageSize: true }); console.log('PDF: ' + path.basename(item.html)); await page.close(); } await browser.close(); console.log('Done: ' + config.files.length + ' PDFs'); })(); """ # esbuild 打包入口 BUNDLE_ENTRY = """ import { documentToSVG, elementToSVG, inlineResources } from 'dom-to-svg'; window.__domToSvg = { documentToSVG, elementToSVG, inlineResources }; """ def ensure_deps(work_dir: Path) -> tuple: """安装依赖,返回 (方案名, bundle路径)""" # puppeteer r = subprocess.run( ["node", "-e", "require('puppeteer')"], capture_output=True, text=True, timeout=10, cwd=str(work_dir) ) if r.returncode != 0: print("Installing puppeteer...") subprocess.run(["npm", "install", "puppeteer"], capture_output=True, text=True, timeout=180, cwd=str(work_dir)) # dom-to-svg r = subprocess.run( ["node", "-e", "require('dom-to-svg')"], capture_output=True, text=True, timeout=10, cwd=str(work_dir) ) if r.returncode != 0: print("Installing dom-to-svg...") subprocess.run(["npm", "install", "dom-to-svg"], capture_output=True, text=True, timeout=60, cwd=str(work_dir)) r = subprocess.run( ["node", "-e", "require('dom-to-svg')"], capture_output=True, text=True, timeout=10, cwd=str(work_dir) ) if r.returncode != 0: print("dom-to-svg unavailable, using pdf2svg fallback", file=sys.stderr) return ("pdf2svg", None) # 打包 dom-to-svg 为浏览器 bundle bundle_path = work_dir / "dom-to-svg.bundle.js" if not bundle_path.exists(): print("Building dom-to-svg browser bundle...") entry_path = work_dir / ".bundle_entry.js" entry_path.write_text(BUNDLE_ENTRY) r = subprocess.run( ["npx", "-y", "esbuild", str(entry_path), "--bundle", "--format=iife", f"--outfile={bundle_path}", "--platform=browser"], capture_output=True, text=True, timeout=60, cwd=str(work_dir) ) if entry_path.exists(): entry_path.unlink() if r.returncode != 0: print(f"esbuild failed: {r.stderr}", file=sys.stderr) return ("pdf2svg", None) return ("dom-to-svg", str(bundle_path)) def convert_dom_to_svg(html_files, output_dir, work_dir, bundle_path): """用 dom-to-svg 方案转换""" config = { "bundlePath": bundle_path, "files": [ {"html": str(f), "svg": str(output_dir / (f.stem + ".svg"))} for f in html_files ] } script_path = work_dir / ".dom2svg_tmp.js" script_path.write_text(CONVERT_SCRIPT) try: print(f"Converting {len(html_files)} HTML files (dom-to-svg, text editable)...") r = subprocess.run( ["node", str(script_path), json.dumps(config)], cwd=str(work_dir), timeout=300 ) if r.returncode != 0: return False # 验证是否有 元素 first_svg = output_dir / (html_files[0].stem + ".svg") if first_svg.exists(): content = first_svg.read_text(errors="ignore") text_count = content.count(" PDF ({len(html_files)} files)...") r = subprocess.run( ["node", str(script_path), json.dumps(config)], cwd=str(work_dir), timeout=300 ) if r.returncode != 0: return False print("Step 2/2: PDF -> SVG (WARNING: text becomes paths, NOT editable)...") success = 0 for item in config["files"]: svg_name = Path(item["pdf"]).stem + ".svg" svg_path = output_dir / svg_name r = subprocess.run( ["pdf2svg", item["pdf"], str(svg_path)], capture_output=True, text=True, timeout=30 ) if r.returncode == 0: print(f" OK {svg_name}") success += 1 return success > 0 finally: if script_path.exists(): script_path.unlink() if pdf_tmp.exists(): shutil.rmtree(pdf_tmp) def convert(html_dir: Path, output_dir: Path) -> bool: """主转换入口""" if html_dir.is_file(): html_files = [html_dir] work_dir = html_dir.parent.parent else: html_files = sorted(html_dir.glob("*.html")) work_dir = html_dir.parent if not html_files: print(f"No HTML files in {html_dir}", file=sys.stderr) return False output_dir.mkdir(parents=True, exist_ok=True) method, bundle_path = ensure_deps(work_dir) if method == "dom-to-svg" and bundle_path: ok = convert_dom_to_svg(html_files, output_dir, work_dir, bundle_path) if ok: print(f"\nDone! {len(html_files)} SVGs -> {output_dir}") return True print("dom-to-svg failed, falling back to pdf2svg...") return convert_pdf2svg(html_files, output_dir, work_dir) def main(): if len(sys.argv) < 2: print("Usage: python3 html2svg.py [-o output_dir]") sys.exit(1) html_path = Path(sys.argv[1]).resolve() if "-o" in sys.argv: idx = sys.argv.index("-o") output_dir = Path(sys.argv[idx + 1]).resolve() else: output_dir = (html_path.parent if html_path.is_file() else html_path.parent) / "svg" success = convert(html_path, output_dir) sys.exit(0 if success else 1) if __name__ == "__main__": main()