#!/usr/bin/env python3 """HTML 打包工具 -- 将多页 HTML 合并为可翻页的单文件预览 每页 HTML 放在独立的 iframe srcdoc 中,CSS 完全隔离,零冲突。 用法: python html_packager.py [-o output.html] [--title "Title"] python html_packager.py ppt-output/slides/ -o ppt-output/preview.html """ import argparse import base64 import html as html_module import os import re import sys from pathlib import Path def inline_images(html_content: str, html_dir: Path) -> str: """将 HTML 中引用的本地图片转为 base64 内联。""" def replace_src(match): attr = match.group(1) # src= or url( path_str = match.group(2) closing = match.group(3) # " or ) # 处理绝对路径和相对路径 img_path = Path(path_str) if not img_path.is_absolute(): img_path = html_dir / path_str if img_path.exists() and img_path.is_file(): ext = img_path.suffix.lower().lstrip('.') mime = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', 'png': 'image/png', 'gif': 'image/gif', 'svg': 'image/svg+xml', 'webp': 'image/webp' }.get(ext, f'image/{ext}') data = base64.b64encode(img_path.read_bytes()).decode() return f'{attr}data:{mime};base64,{data}{closing}' return match.group(0) # 匹配 src="..." 和 url(...) html_content = re.sub( r'(src=["\'])([^"\']+?)(["\'])', replace_src, html_content) html_content = re.sub( r'(url\(["\']?)([^"\')\s]+?)(["\']?\))', replace_src, html_content) return html_content def build_preview(slide_files: list, title: str = "PPT Preview") -> str: """构建可翻页的预览 HTML,每页用独立 iframe 实现 CSS 隔离。""" slides_srcdoc = [] for f in slide_files: html_dir = Path(f).parent with open(f, "r", encoding="utf-8") as fh: content = fh.read() # 内联图片为 base64 content = inline_images(content, html_dir) # 转义为 srcdoc 安全内容(& -> & " -> ") escaped = html_module.escape(content, quote=True) slides_srcdoc.append(escaped) total = len(slides_srcdoc) escaped_title = html_module.escape(title) # 生成 iframe 列表 iframes = [] for i, srcdoc in enumerate(slides_srcdoc): display = "block" if i == 0 else "none" iframes.append( f'' ) iframes_block = '\n'.join(iframes) return f""" {escaped_title}
1 / {total}
{iframes_block}
""" def main(): parser = argparse.ArgumentParser(description="HTML Packager for PPT Agent") parser.add_argument("path", help="Directory containing slide HTML files") parser.add_argument("-o", "--output", default=None, help="Output HTML file") parser.add_argument("--title", default="PPT Preview", help="Title") args = parser.parse_args() slides_dir = Path(args.path) if not slides_dir.is_dir(): print(f"Error: {slides_dir} is not a directory", file=sys.stderr) sys.exit(1) html_files = sorted(slides_dir.glob("*.html")) if not html_files: print(f"Error: No HTML files in {slides_dir}", file=sys.stderr) sys.exit(1) output_path = args.output or str(slides_dir.parent / "preview.html") result = build_preview(html_files, title=args.title) with open(output_path, "w", encoding="utf-8") as f: f.write(result) print(f"Created: {output_path} ({len(html_files)} slides)") if __name__ == "__main__": main()