#!/usr/bin/env python3
"""HTML 打包工具 -- 将多页 HTML 合并为可翻页的单文件预览
每页 HTML 放在独立的 iframe srcdoc 中,CSS 完全隔离,零冲突。
用法:
python html_packager.py [-o output.html] [--title "Title"]
python html_packager.py ppt-output/slides/ -o ppt-output/preview.html
"""
import argparse
import base64
import html as html_module
import os
import re
import sys
from pathlib import Path
def inline_images(html_content: str, html_dir: Path) -> str:
"""将 HTML 中引用的本地图片转为 base64 内联。"""
def replace_src(match):
attr = match.group(1) # src= or url(
path_str = match.group(2)
closing = match.group(3) # " or )
# 处理绝对路径和相对路径
img_path = Path(path_str)
if not img_path.is_absolute():
img_path = html_dir / path_str
if img_path.exists() and img_path.is_file():
ext = img_path.suffix.lower().lstrip('.')
mime = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg',
'png': 'image/png', 'gif': 'image/gif',
'svg': 'image/svg+xml', 'webp': 'image/webp'
}.get(ext, f'image/{ext}')
data = base64.b64encode(img_path.read_bytes()).decode()
return f'{attr}data:{mime};base64,{data}{closing}'
return match.group(0)
# 匹配 src="..." 和 url(...)
html_content = re.sub(
r'(src=["\'])([^"\']+?)(["\'])',
replace_src, html_content)
html_content = re.sub(
r'(url\(["\']?)([^"\')\s]+?)(["\']?\))',
replace_src, html_content)
return html_content
def build_preview(slide_files: list, title: str = "PPT Preview") -> str:
"""构建可翻页的预览 HTML,每页用独立 iframe 实现 CSS 隔离。"""
slides_srcdoc = []
for f in slide_files:
html_dir = Path(f).parent
with open(f, "r", encoding="utf-8") as fh:
content = fh.read()
# 内联图片为 base64
content = inline_images(content, html_dir)
# 转义为 srcdoc 安全内容(& -> & " -> ")
escaped = html_module.escape(content, quote=True)
slides_srcdoc.append(escaped)
total = len(slides_srcdoc)
escaped_title = html_module.escape(title)
# 生成 iframe 列表
iframes = []
for i, srcdoc in enumerate(slides_srcdoc):
display = "block" if i == 0 else "none"
iframes.append(
f''
)
iframes_block = '\n'.join(iframes)
return f"""
{escaped_title}
1 / {total}
{iframes_block}
Arrow keys to navigate
"""
def main():
parser = argparse.ArgumentParser(description="HTML Packager for PPT Agent")
parser.add_argument("path", help="Directory containing slide HTML files")
parser.add_argument("-o", "--output", default=None, help="Output HTML file")
parser.add_argument("--title", default="PPT Preview", help="Title")
args = parser.parse_args()
slides_dir = Path(args.path)
if not slides_dir.is_dir():
print(f"Error: {slides_dir} is not a directory", file=sys.stderr)
sys.exit(1)
html_files = sorted(slides_dir.glob("*.html"))
if not html_files:
print(f"Error: No HTML files in {slides_dir}", file=sys.stderr)
sys.exit(1)
output_path = args.output or str(slides_dir.parent / "preview.html")
result = build_preview(html_files, title=args.title)
with open(output_path, "w", encoding="utf-8") as f:
f.write(result)
print(f"Created: {output_path} ({len(html_files)} slides)")
if __name__ == "__main__":
main()