| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 增强版PDF生成器 V2
- 基于HTML文件解析和Markdown内容提取的PDF生成系统
- 支持中英文双语版本,自动生成目录和正文
- """
- import os
- import sys
- import json
- import re
- import tempfile
- import shutil
- import subprocess
- from pathlib import Path
- from typing import List, Dict, Optional, Tuple
- import argparse
- from datetime import datetime
- from bs4 import BeautifulSoup
- import markdown
- from markdown.extensions import codehilite, tables, toc
- class HTMLParser:
- """HTML文件解析器"""
-
- def __init__(self, html_file: Path):
- self.html_file = html_file
- self.soup = None
- self._parse_html()
-
- def _parse_html(self):
- """解析HTML文件"""
- try:
- with open(self.html_file, 'r', encoding='utf-8') as f:
- content = f.read()
- self.soup = BeautifulSoup(content, 'html.parser')
- except Exception as e:
- print(f"警告: 无法解析HTML文件 {self.html_file}: {e}")
- self.soup = None
-
- def extract_title(self) -> str:
- """提取页面标题"""
- if not self.soup:
- return "未知标题"
-
- # 尝试多种方式获取标题
- title_selectors = [
- 'h1',
- '.rst-content h1',
- 'title',
- '.document h1'
- ]
-
- for selector in title_selectors:
- element = self.soup.select_one(selector)
- if element:
- title = element.get_text().strip()
- if title and title != "Titan-Board SDK 1.0.0 文档":
- return title
-
- return "未知标题"
-
- def extract_content(self) -> str:
- """提取主要内容(保留HTML结构,并将资源链接改为绝对路径)"""
- if not self.soup:
- return ""
-
- # 查找主要内容区域
- content_selectors = [
- '.rst-content',
- '.document',
- '.body',
- 'main',
- 'article'
- ]
-
- content_element = None
- for selector in content_selectors:
- content_element = self.soup.select_one(selector)
- if content_element:
- break
-
- if not content_element:
- # 如果没有找到特定容器,使用body
- content_element = self.soup.find('body')
-
- if not content_element:
- return ""
- # 移除不需要的元素(脚本、样式、侧栏、版本菜单、编辑/PDF按钮、搜索框等)
- remove_selectors = [
- 'script', 'style', 'nav',
- '.wy-nav-side', '.rst-versions', '.version-menu',
- '.edit-button', '.pdf-button', '[role="search"]',
- 'a.headerlink'
- ]
- for sel in remove_selectors:
- for node in content_element.select(sel):
- node.decompose()
- # 将相对资源路径转换为绝对 file:// 路径
- base_dir = self.html_file.parent
- def _abs_url(url: str) -> str:
- if not url:
- return url
- u = url.strip()
- if u.startswith(('http://', 'https://', 'data:', 'mailto:', '#')):
- return u
- try:
- return (base_dir / u).resolve().as_uri()
- except Exception:
- return u
- for tag in content_element.select('[src]'):
- tag['src'] = _abs_url(tag.get('src'))
- for tag in content_element.select('a[href]'):
- href = tag.get('href')
- # 锚点保持不变
- if href and not href.startswith('#'):
- tag['href'] = _abs_url(href)
- # 返回主要内容的内部HTML,保留原有结构与样式类名
- return content_element.decode_contents()
-
- def extract_toc(self) -> List[Dict]:
- """提取目录结构"""
- if not self.soup:
- return []
-
- toc_items = []
-
- # 查找目录元素
- toc_selectors = [
- '.wy-menu .toctree-l1',
- '.toctree-wrapper .toctree-l1',
- '.rst-content .toctree-l1'
- ]
-
- for selector in toc_selectors:
- elements = self.soup.select(selector)
- if elements:
- for element in elements:
- link = element.find('a')
- if link:
- title = link.get_text().strip()
- href = link.get('href', '')
- if title and href:
- toc_items.append({
- 'title': title,
- 'href': href,
- 'level': 1
- })
- break
-
- return toc_items
- class MarkdownProcessor:
- """Markdown处理器"""
-
- def __init__(self):
- self.md = markdown.Markdown(
- extensions=[
- 'codehilite',
- 'tables',
- 'toc',
- 'fenced_code',
- 'attr_list',
- 'nl2br',
- 'sane_lists',
- 'footnotes'
- ],
- extension_configs={
- 'codehilite': {
- 'css_class': 'highlight',
- 'use_pygments': False
- },
- 'toc': {
- 'permalink': False,
- 'baselevel': 1
- }
- }
- )
-
- def process_markdown(self, md_content: str) -> str:
- """处理Markdown内容"""
- try:
- # Markdown 实例是有状态的,转换前需重置以避免交叉污染
- self.md.reset()
- html = self.md.convert(md_content)
- return html
- except Exception as e:
- print(f"警告: Markdown处理失败: {e}")
- return md_content
- class DocumentScanner:
- """文档扫描器(从原始Markdown项目目录读取)"""
-
- def __init__(self, html_dir: Path, projects_root: Path):
- self.html_dir = html_dir
- self.projects_root = projects_root
- # 分类与顺序:从 config.yaml 的 generation.output_structure 读取,如果缺省则按默认顺序
- self.categories = {}
- self.category_name_map = {}
- self.category_order: List[str] = []
- try:
- cfg_path = Path(__file__).parent / 'config.yaml'
- if cfg_path.exists():
- import yaml
- with open(cfg_path, 'r', encoding='utf-8') as f:
- cfg = yaml.safe_load(f) or {}
- cfg_cats = (cfg.get('categories') or {})
- out_struct = ((cfg.get('generation', {}) or {}).get('output_structure', []) or [])
- if out_struct:
- self.category_order = list(out_struct)
- else:
- self.category_order = ['start', 'basic', 'driver', 'component', 'multimedia', 'multcore']
- # 名称映射 + 填充 categories 字典用于扫描
- for key in self.category_order:
- node = cfg_cats.get(key) or {}
- name_cn = node.get('name') or key
- name_en = node.get('name_en') or name_cn
- self.category_name_map[key] = {'name': name_cn, 'name_en': name_en}
- # categories 值用中文默认名作占位
- self.categories[key] = name_cn
- except Exception:
- # 回退默认
- self.category_order = ['start', 'basic', 'driver', 'component', 'multimedia', 'multcore']
- for key, default_cn in {
- 'start': '快速上手',
- 'basic': '基础篇',
- 'driver': '驱动篇',
- 'component': '组件篇',
- 'multimedia': '多媒体显示篇',
- 'multcore': '多核通信篇'
- }.items():
- self.category_name_map[key] = {'name': default_cn, 'name_en': default_cn}
- self.categories[key] = default_cn
-
- def scan_documents(self, language: str = 'zh') -> Dict[str, List[Dict]]:
- """扫描文档结构(只使用 Markdown 文件)"""
- documents = {}
-
- for category, category_name in self.categories.items():
- # 用 html_dir 列出该分类下有哪些项目目录(递归),以确定项目名
- category_dir = self.html_dir / category
- if not category_dir.exists():
- continue
-
- category_docs = []
-
- # 递归查找该分类下的所有项目目录:判定标准为目录中存在任意 HTML(README*.html 等)
- try:
- candidates = []
- for html_file in category_dir.rglob('*.html'):
- rel = html_file.parent.relative_to(category_dir) # 支持多级路径
- if str(rel) == '.':
- continue
- candidates.append(rel)
- except Exception:
- candidates = []
- # 去重并按路径稳定排序
- seen = set()
- uniq_candidates = []
- for rel in sorted(candidates, key=lambda p: str(p).lower()):
- key = str(rel).replace('\\','/')
- if key in seen:
- continue
- seen.add(key)
- uniq_candidates.append(rel)
- for rel_path in uniq_candidates:
- project_dir = self.projects_root / rel_path
- if not project_dir.exists():
- # 回退到最后一段匹配
- project_dir = self.projects_root / rel_path.name
- if not project_dir.exists():
- continue
- readme_file = project_dir / ('README_zh.md' if language == 'zh' else 'README.md')
- if not readme_file.exists():
- # 若指定语言缺失,尝试另一种
- alt_readme = project_dir / ('README.md' if language == 'zh' else 'README_zh.md')
- if alt_readme.exists():
- readme_file = alt_readme
- else:
- continue
- title = self._extract_markdown_title(readme_file)
- category_docs.append({
- 'title': title,
- 'file': readme_file,
- 'project_name': str(rel_path).replace('\\','/'),
- 'project_dir': project_dir,
- 'category': category,
- 'category_name': self.category_name_map.get(category, {}).get('name', category),
- 'category_name_en': self.category_name_map.get(category, {}).get('name_en', self.category_name_map.get(category, {}).get('name', category))
- })
- if category_docs:
- documents[category] = category_docs
-
- return documents
-
- def _extract_markdown_title(self, md_file: Path) -> str:
- """从Markdown文件提取标题"""
- try:
- with open(md_file, 'r', encoding='utf-8') as f:
- content = f.read()
-
- # 查找第一个一级标题
- lines = content.split('\n')
- for line in lines:
- line = line.strip()
- if line.startswith('# '):
- title = line[2:].strip()
- # 清理标题中的Markdown格式
- title = re.sub(r'\*\*(.*?)\*\*', r'\1', title) # 移除粗体
- title = re.sub(r'\*(.*?)\*', r'\1', title) # 移除斜体
- title = re.sub(r'`(.*?)`', r'\1', title) # 移除代码格式
- return title
-
- # 如果没有找到一级标题,尝试二级标题
- for line in lines:
- line = line.strip()
- if line.startswith('## '):
- title = line[3:].strip()
- title = re.sub(r'\*\*(.*?)\*\*', r'\1', title)
- title = re.sub(r'\*(.*?)\*', r'\1', title)
- title = re.sub(r'`(.*?)`', r'\1', title)
- return title
-
- return md_file.stem
- except Exception as e:
- print(f"警告: 无法读取Markdown文件 {md_file}: {e}")
- return md_file.stem
- class PDFGeneratorV2:
- """增强版PDF生成器V2"""
-
- def __init__(self, html_dir: Path, output_dir: Path, keep_temp: bool = False, browser_path: Optional[str] = None):
- self.html_dir = html_dir
- self.output_dir = output_dir
- self.temp_dir = Path(tempfile.mkdtemp())
- self.keep_temp = keep_temp
- self.browser_path = browser_path
- self.scanner = DocumentScanner(html_dir, self._derive_projects_root())
- self.md_processor = MarkdownProcessor()
- self.toc_entries = [] # [{'level':1,'title':'1. Title','anchor':'id'}]
- self.assets_dir: Optional[Path] = None
- def __del__(self):
- """清理临时文件"""
- try:
- if not getattr(self, 'keep_temp', False) and self.temp_dir.exists():
- shutil.rmtree(self.temp_dir, ignore_errors=True)
- except Exception:
- pass
-
- def generate_pdf(self, title: str = "SDK文档", language: str = "zh") -> bool:
- """生成PDF的主流程"""
- print("=" * 60)
- print(f"开始生成PDF文档 V2 - 语言: {language}")
- print("=" * 60)
- # 为每次生成创建独立的临时目录,避免跨语言复用导致清理困难
- try:
- import tempfile as _tempfile
- self.temp_dir = Path(_tempfile.mkdtemp())
- # 重置目录收集,避免多语言生成时相互污染
- self.toc_entries = []
- # 0. 加载项目信息(版本、版权等)
- self.project_meta = self._load_project_meta()
- # 1. 扫描文档结构
- print("1. 扫描文档结构...")
- documents = self.scanner.scan_documents(language)
- if not documents:
- print("✗ 未找到文档文件")
- return False
-
- total_docs = sum(len(docs) for docs in documents.values())
- print(f"✓ 找到 {total_docs} 个文档文件")
-
- # 准备资源输出目录(用于相对路径拷贝)
- self.assets_dir = self.temp_dir / 'assets'
- self.assets_dir.mkdir(exist_ok=True)
- # 记录章节结构来源(动态/硬编码)
- order = getattr(self.scanner, 'category_order', None)
- if order:
- print("✓ 章节结构: 动态 (来自 config.yaml:generation.output_structure)")
- print(" 顺序: " + ", ".join(order))
- else:
- print("✓ 章节结构: 硬编码回退 (未在 config.yaml 中找到 output_structure)")
- # 2. 生成正文内容(先生成正文以便收集目录项)
- print("2. 生成正文内容...")
- content_html = self._generate_content(documents, language)
- # 3. 生成目录(依赖已收集的 toc_entries)
- print("3. 生成目录结构...")
- toc_html = self._generate_toc(documents, language)
-
- # 4. 创建完整的HTML文件
- print("4. 创建完整HTML文件...")
- full_html = self._create_full_html(title, toc_html, content_html, language)
-
- # 5. 生成PDF
- print("5. 生成PDF文件...")
- success = self._generate_pdf_from_html(full_html, title, language)
-
- if success:
- print("=" * 60)
- print("PDF生成完成!")
- print(f"📁 输出位置: {self.output_dir}")
- print("=" * 60)
- if getattr(self.scanner, 'category_order', None):
- print("总结: 本次 PDF 章节名称与顺序根据 docs/source/config.yaml 动态生成")
- else:
- print("总结: 本次 PDF 章节名称与顺序使用硬编码回退顺序")
- if getattr(self, 'keep_temp', False):
- try:
- print(f"临时目录保留: {self.temp_dir}")
- except Exception:
- pass
-
- return success
-
- except Exception as e:
- print(f"✗ PDF生成过程出错: {e}")
- return False
- finally:
- # 结束后清理本次生成的临时目录(除非要求保留)
- try:
- if not getattr(self, 'keep_temp', False) and getattr(self, 'temp_dir', None) and self.temp_dir.exists():
- shutil.rmtree(self.temp_dir, ignore_errors=True)
- except Exception:
- pass
-
- def _generate_toc(self, documents: Dict[str, List[Dict]], language: str) -> str:
- """根据已收集的多级标题生成目录HTML(带点线引导)"""
- import re
- list_items = []
- for entry in self.toc_entries:
- indent = (entry['level'] - 1) * 18
- title_text = entry["title"]
- # 清洗标题中的前导列表符号或编号后的星号:"* ", "- ", "• "、以及诸如 "1.2. * Title"
- title_text = re.sub(r'^[\s\*\-\u2022]+', '', title_text)
- title_text = re.sub(r'^(\d+(?:\.\d+)*)\.\s*[\*\-\u2022]+\s*', r'\1. ', title_text)
- # 再次去除可能残留的单独星号包围
- title_text = re.sub(r'\s+[\*\u2022]+\s+', ' ', title_text)
- list_items.append(
- f'<li class="toc-item level-{entry["level"]}" style="margin-left:{indent}px">'
- f'<a href="#{entry["anchor"]}"><span class="toc-text">{title_text}</span><span class="toc-dots"></span><span class="toc-page"></span></a>'
- f'</li>'
- )
- toc_title = '目录' if language == 'zh' else 'Contents'
- return (
- '<div class="toc">'
- f'<h2>{toc_title}</h2>'
- '<ul>' + ''.join(list_items) + '</ul>'
- '</div>'
- )
-
- def _generate_content(self, documents: Dict[str, List[Dict]], language: str) -> str:
- """生成正文内容HTML(分类、文档二级编号 + 文档内多级编号)"""
- content_sections = []
-
- category_index = 0
- # 按配置顺序渲染分类
- for category in getattr(self, 'category_order', []) or documents.keys():
- docs = documents.get(category, [])
- if not docs:
- continue
-
- # 本地化分类名
- # 根据语言选择分类名
- category_name = docs[0]['category_name'] if language == 'zh' else docs[0].get('category_name_en', docs[0]['category_name'])
- if language == 'en':
- cn_to_en = {
- '快速上手': 'Getting Started',
- '基础篇': 'Basics',
- '驱动篇': 'Drivers',
- '组件篇': 'Components',
- '多媒体显示篇': 'Multimedia Display',
- '多核通信篇': 'Multicore Communication',
- }
- category_name = cn_to_en.get(category_name, category_name)
- category_anchor = f'cat-{category}'
- category_index += 1
- category_number_text = f'{category_index}. '
- content_sections.append(f'<h1 id="{category_anchor}" class="category-title">{category_number_text}{category_name}</h1>')
- self.toc_entries.append({'level': 1, 'title': f'{category_number_text}{category_name}', 'anchor': category_anchor})
-
- doc_counter = 0
- for doc in docs:
- doc_id = f"doc-{self._slugify(doc['project_name'])}"
- content_sections.append(f'<section id="{doc_id}" class="document-section">')
-
- # 文档标题(二级)
- doc_counter += 1
- doc_anchor = f'{doc_id}-title'
- doc_number_text = f'{category_index}.{doc_counter}. '
- content_sections.append(f'<h2 id="{doc_anchor}" class="document-title">{doc_number_text}{doc["title"]}</h2>')
- self.toc_entries.append({'level': 2, 'title': f'{doc_number_text}{doc["title"]}', 'anchor': doc_anchor})
-
- # 文档内容与内部编号(三级及以下)
- doc_content = self._extract_document_content(doc, language)
- if doc_content:
- numbered_html = self._auto_number_and_collect_toc(doc_content, base_numbers=[category_index, doc_counter], doc_title=doc["title"])
- content_sections.append(numbered_html)
- else:
- content_sections.append('<p>文档内容加载失败</p>')
-
- content_sections.append('</section>')
-
- return '\n'.join(content_sections)
-
- def _extract_document_content(self, doc_meta: Dict, language: str) -> str:
- """提取文档内容,并修复资源路径"""
- try:
- doc_file: Path = doc_meta['file']
- # 仅从Markdown文件提取内容
- with open(doc_file, 'r', encoding='utf-8') as f:
- md_content = f.read()
-
- # 预处理Markdown内容
- md_content = self._preprocess_markdown(md_content)
-
- # 处理Markdown
- html_content = self.md_processor.process_markdown(md_content)
- # 修复图片/链接等资源路径
- html_content = self._rewrite_resource_paths(html_content, doc_meta)
- return html_content
-
- except Exception as e:
- print(f"警告: 无法提取文档内容 {doc_file}: {e}")
- return ""
-
- def _text_to_html(self, text: str) -> str:
- """将纯文本转换为HTML格式"""
- if not text:
- return ""
-
- # 简单的文本到HTML转换
- lines = text.split('\n')
- html_lines = []
-
- for line in lines:
- line = line.strip()
- if not line:
- html_lines.append('<br>')
- elif line.startswith('#'):
- # 标题
- level = len(line) - len(line.lstrip('#'))
- title_text = line.lstrip('# ').strip()
- html_lines.append(f'<h{level}>{title_text}</h{level}>')
- elif line.startswith('- ') or line.startswith('* '):
- # 列表项
- html_lines.append(f'<li>{line[2:].strip()}</li>')
- elif line.startswith('```'):
- # 代码块
- html_lines.append('<pre><code>')
- elif line.endswith('```'):
- html_lines.append('</code></pre>')
- else:
- # 普通段落
- html_lines.append(f'<p>{line}</p>')
-
- return '\n'.join(html_lines)
- def _derive_projects_root(self) -> Path:
- """从 docs/source/config.yaml 读取项目根目录,找不到则回退到 ../../project"""
- try:
- cfg_path = Path(__file__).parent / 'config.yaml'
- if cfg_path.exists():
- import yaml
- with open(cfg_path, 'r', encoding='utf-8') as f:
- cfg = yaml.safe_load(f) or {}
- pdir = ((cfg.get('repository', {}) or {}).get('projects_dir', '../../project') or '../../project')
- return (Path(__file__).parent / pdir).resolve()
- except Exception:
- pass
- return (Path(__file__).parent / '../../project').resolve()
- def _slugify(self, text: str) -> str:
- slug = re.sub(r'[^\w\-\.]+', '-', text, flags=re.UNICODE).strip('-').lower()
- return slug or 'section'
- def _rewrite_resource_paths(self, html: str, doc_meta: Dict) -> str:
- """将文档内相对资源路径改写为相对于合并HTML的相对路径。
- 策略:把源图片复制到临时目录 temp/assets/<category>/<project>/,并将链接改为相对路径 assets/...。
- """
- try:
- soup = BeautifulSoup(html, 'html.parser')
- project_dir: Path = doc_meta.get('project_dir')
- category: str = doc_meta.get('category', '')
- project_name: str = doc_meta.get('project_name', '')
- docs_source_root = Path(__file__).parent
- # 目标相对目录(相对于合并HTML)
- rel_target_dir = Path('assets') / category / project_name
- abs_target_dir = (self.assets_dir or (self.temp_dir / 'assets')) / category / project_name
- abs_target_dir.mkdir(parents=True, exist_ok=True)
- def resolve_url(u: str) -> str:
- if not u:
- return u
- s = u.strip()
- if s.startswith(('http://', 'https://', 'data:', 'mailto:', '#')):
- return s
- # 尝试项目源码目录
- if project_dir is not None:
- p1 = (project_dir / s)
- try:
- if p1.exists():
- # 复制到目标 assets 目录并返回相对路径
- dest = abs_target_dir / p1.name
- try:
- if p1.is_file():
- import shutil
- shutil.copy2(p1, dest)
- return str((rel_target_dir / p1.name).as_posix())
- except Exception:
- pass
- except Exception:
- pass
- # 尝试 docs/source/<category>/<project_name>/
- p2 = (docs_source_root / category / project_name / s)
- try:
- if p2.exists():
- dest = abs_target_dir / p2.name
- try:
- if p2.is_file():
- import shutil
- shutil.copy2(p2, dest)
- return str((rel_target_dir / p2.name).as_posix())
- except Exception:
- pass
- except Exception:
- pass
- return s
- for tag in soup.select('img[src]'):
- tag['src'] = resolve_url(tag.get('src'))
- for tag in soup.select('a[href]'):
- href = tag.get('href')
- if href and not href.startswith('#'):
- tag['href'] = resolve_url(href)
- return str(soup)
- except Exception:
- return html
- def _auto_number_and_collect_toc(self, html: str, base_numbers: List[int], doc_title: Optional[str] = None) -> str:
- """为文档内部标题自动编号并收集目录(最大深度到第3级)。
- 规则:目录深度仅到 3 级:分类(1) -> 文档(2) -> 文档内首级标题(3)。
- base_numbers: 分类号与文档号作为前缀,例如 [2,1] -> 2.1.x
- """
- try:
- soup = BeautifulSoup(html, 'html.parser')
- # 避免 Sphinx headerlink 图标导致的方框,移除
- for node in soup.select('a.headerlink'):
- node.decompose()
- # 规范化:如果文档首个标题不是 h1,则将其作为 h1 起始
- headers = soup.find_all([f'h{i}' for i in range(1, 4)])
- first_level = None
- for h in headers:
- first_level = int(h.name[1])
- break
- if first_level and first_level > 1:
- for h in headers:
- lvl = int(h.name[1])
- h.name = f'h{max(1, lvl - first_level + 1)}'
- # 如果文档首个 h1 与外层文档标题相同,则删除该 h1,避免目录重复(例如 1.1 与 1.1.1 重复)
- if doc_title:
- # 取第一个 h1
- first_h1 = soup.find('h1')
- if first_h1:
- def _normalize(text: str) -> str:
- import re as _re
- t = (text or '').strip()
- t = _re.sub(r'[\s\*\u2022]+', ' ', t)
- t = _re.sub(r'\s+', ' ', t)
- return t
- if _normalize(first_h1.get_text()) == _normalize(doc_title):
- # 删除该 h1,不计入目录
- first_h1.decompose()
- # 仅对文档内首级标题(h1)编号,保证总深度不超过 3
- local_counter = 0
- for header in soup.find_all('h1'):
- local_counter += 1
- parts = [str(n) for n in base_numbers] + [str(local_counter)]
- number_text = '.'.join(parts) + '. '
- # 原标题文本
- original_text = header.get_text(strip=True)
- # 写回:前置编号
- # 清空子节点,仅保留纯文本(保留强调等会复杂,这里简化)
- header.clear()
- header.string = number_text + original_text
- # 设置锚点 id
- anchor = header.get('id') or self._slugify(original_text)
- header['id'] = anchor
- # 收集目录项:层级等于数字段数
- toc_level = len(parts)
- self.toc_entries.append({'level': toc_level, 'title': number_text + original_text, 'anchor': anchor})
- return str(soup)
- except Exception:
- return html
-
- def _preprocess_markdown(self, md_content: str) -> str:
- """预处理Markdown内容"""
- if not md_content:
- return ""
-
- lines = md_content.split('\n')
- processed_lines = []
- in_code_block = False
- code_block_language = ""
-
- for line in lines:
- # 处理代码块
- if line.strip().startswith('```'):
- if not in_code_block:
- # 开始代码块
- in_code_block = True
- code_block_language = line.strip()[3:].strip()
- processed_lines.append(f'```{code_block_language}')
- else:
- # 结束代码块
- in_code_block = False
- code_block_language = ""
- processed_lines.append('```')
- continue
-
- # 在代码块内,保持原样
- if in_code_block:
- processed_lines.append(line)
- continue
-
- # 处理标题
- if line.strip().startswith('#'):
- # 确保标题前后有空行
- if processed_lines and processed_lines[-1].strip():
- processed_lines.append('')
- processed_lines.append(line)
- processed_lines.append('')
- continue
-
- # 处理列表项
- if line.strip().startswith(('- ', '* ', '+ ')):
- # 确保列表前有空行
- if processed_lines and processed_lines[-1].strip() and not processed_lines[-1].strip().startswith(('- ', '* ', '+ ')):
- processed_lines.append('')
- processed_lines.append(line)
- continue
-
- # 处理数字列表
- if re.match(r'^\s*\d+\.\s+', line):
- # 确保列表前有空行
- if processed_lines and processed_lines[-1].strip() and not re.match(r'^\s*\d+\.\s+', processed_lines[-1]):
- processed_lines.append('')
- processed_lines.append(line)
- continue
-
- # 处理表格
- if '|' in line and line.strip():
- # 确保表格前有空行
- if processed_lines and processed_lines[-1].strip() and '|' not in processed_lines[-1]:
- processed_lines.append('')
- processed_lines.append(line)
- continue
-
- # 处理空行
- if not line.strip():
- # 避免连续的空行
- if processed_lines and processed_lines[-1].strip():
- processed_lines.append('')
- continue
-
- # 处理普通段落
- processed_lines.append(line)
-
- # 清理末尾的空行
- while processed_lines and not processed_lines[-1].strip():
- processed_lines.pop()
-
- return '\n'.join(processed_lines)
-
- def _create_full_html(self, title: str, toc_html: str, content_html: str, language: str) -> Path:
- """创建完整的HTML文件"""
- html_file = self.temp_dir / f"merged_{language}.html"
-
- # 根据语言设置字体
- if language == 'zh':
- # 扩充中文字体回退链,兼容 Linux CI(Noto/WenQuanYi/Droid)
- font_family = (
- '"Microsoft YaHei", "SimSun", '
- '"Noto Sans CJK SC", "Noto Sans CJK", "Noto Sans SC", '
- '"WenQuanYi Zen Hei", "Droid Sans Fallback", sans-serif'
- )
- lang_attr = "zh-CN"
- else:
- font_family = '"Arial", "Helvetica", sans-serif'
- lang_attr = "en"
-
- cover_subtitle = ('开发文档' if language == 'zh' else 'Documentation')
- label_version = ('版本' if language == 'zh' else 'Version')
- label_language = ('语言' if language == 'zh' else 'Language')
- label_generated = ('生成时间' if language == 'zh' else 'Generated on')
-
- # 获取项目描述
- project_description = ''
- if language == 'zh':
- project_description = self.project_meta.get('description', '')
- else:
- project_description = self.project_meta.get('description_en', '') or self.project_meta.get('description', '')
- # 封面信息(徽章 + 详情行)
- badge_lang = ('中文' if language == 'zh' else 'English')
- badge_version = (self.project_meta.get('version') or '1.0.0')
- badge_date = datetime.now().strftime('%Y年%m月%d日' if language == 'zh' else '%B %d, %Y')
- meta_badges_html = (
- '<div class="meta-badges">'
- f'<span class="pill">Version {badge_version}</span>'
- '<span class="sep">|</span>'
- f'<span class="pill">{badge_lang}</span>'
- '<span class="sep">|</span>'
- f'<span class="pill">{badge_date}</span>'
- '</div>'
- )
- detail_items = []
- author = (self.project_meta.get('author') or '').strip()
- website = (self.project_meta.get('website') or '').strip()
- copyright_txt = (self.project_meta.get('copyright') or '').strip()
- if author:
- detail_items.append(( '作者' if language == 'zh' else 'Author', author ))
- if website:
- website_display = website.replace('官网: ', '').replace('Website: ', '')
- detail_items.append(('', website_display)) # 空键名,不显示标签
- if copyright_txt:
- copyright_display = copyright_txt.replace('版权: ', '').replace('Copyright: ', '')
- detail_items.append(('', copyright_display)) # 空键名,不显示标签
- if detail_items:
- lines = []
- for key, val in detail_items:
- if not key: # 如果键名为空,只显示值
- if val.startswith(('http://', 'https://')):
- lines.append(f'<div class="meta-line"><a href="{val}">{val}</a></div>')
- else:
- lines.append(f'<div class="meta-line">{val}</div>')
- else:
- if key.lower().startswith(('官网','website')) and (val.startswith('http://') or val.startswith('https://')):
- lines.append(f'<div class="meta-line"><span class="k">{key}:</span> <a href="{val}">{val}</a></div>')
- else:
- lines.append(f'<div class="meta-line"><span class="k">{key}:</span> {val}</div>')
- meta_details_html = '<div class="meta-details">' + ''.join(lines) + '</div>'
- else:
- meta_details_html = ''
- # 生成项目描述HTML
- project_description_html = ''
- if project_description:
- project_description_html = f'<div class="cover-description">{project_description}</div>'
- html_template = f'''<!DOCTYPE html>
- <html lang="{lang_attr}">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>{title}</title>
- <style>
- @page {{
- size: A4;
- margin-top: 1.50cm;
- margin-bottom: 1.75cm;
- margin-left: 1.10cm;
- margin-right: 1.10cm;
- }}
-
- body {{
- font-family: {font_family};
- line-height: 1.5;
- margin: 0;
- padding: 0;
- color: #222;
- font-size: 12pt;
- }}
-
- /* 封面样式 */
- .cover-page {{
- /* 避免封面后出现空白页 */
- page-break-after: avoid;
- text-align: center;
- padding: 2.2cm 2cm 1.2cm 2cm;
- height: auto;
- min-height: calc(100vh - 3.0cm);
- display: flex;
- flex-direction: column;
- justify-content: flex-start;
- align-items: center;
- margin-bottom: 0.8cm;
- }}
-
- .cover-title {{
- font-size: 2.5em;
- font-weight: bold;
- color: #2c3e50;
- margin-bottom: 1em;
- border-bottom: 3px solid #3498db;
- padding-bottom: 0.5em;
- }}
-
- .cover-subtitle {{
- font-size: 1.2em;
- color: #7f8c8d;
- margin-top: 0.6em;
- margin-bottom: 1.6em;
- }}
-
- .cover-description {{
- font-size: 1em;
- color: #5a6c7d;
- margin: 0 auto 1.5em auto;
- max-width: 80%;
- text-align: center;
- line-height: 1.6;
- font-style: italic;
- display: block;
- width: 100%;
- }}
- .cover-footer {{
- position: absolute;
- left: 0;
- right: 0;
- bottom: 1.8cm;
- text-align: center;
- }}
- .meta-badges {{
- display: inline-flex;
- align-items: center;
- gap: 10px;
- margin-bottom: 0.6em;
- }}
- .meta-badges .pill {{
- display: inline-block;
- padding: 4px 10px;
- border-radius: 9999px;
- font-size: 0.9em;
- color: #2c3e50;
- background: #f3f6fa;
- border: 1px solid #dde5ee;
- }}
- .meta-badges .sep {{
- color: #95a5a6;
- font-size: 0.95em;
- }}
- .meta-details {{
- font-size: 0.95em;
- color: #6b7280;
- }}
- .meta-details .meta-line {{ margin: 4px 0; }}
- .meta-details .k {{
- color: #374151;
- font-weight: 600;
- }}
- .meta-details a {{
- color: #2563eb;
- text-decoration: none;
- }}
- .meta-details a:hover {{
- text-decoration: underline;
- }}
-
- .cover-info {{
- font-size: 1em;
- color: #95a5a6;
- margin-top: auto;
- }}
-
- /* 目录样式 */
- .toc {{
- page-break-after: always;
- padding: 1.3cm 2cm 1.6cm 2cm;
- }}
-
- .toc h2 {{
- font-size: 1.8em;
- color: #2c3e50;
- text-align: left;
- margin: 0 0 0.8em 0;
- border-left: 4px solid #3498db;
- padding-left: 0.6em;
- }}
-
- .toc ul {{
- list-style: none;
- padding: 0;
- margin: 0;
- max-width: 16cm;
- }}
-
- .toc-category {{
- margin: 0.6em 0 0.35em 0;
- font-size: 1.04em;
- color: #334155;
- border-bottom: 1px solid #e2e8f0;
- padding-bottom: 0.25em;
- letter-spacing: 0.2px;
- }}
-
- .toc-item {{
- margin: 0.12em 0;
- padding: 0.12em 0;
- line-height: 1.38;
- }}
-
- .toc a {{
- text-decoration: none;
- color: #2c3e50;
- font-size: 0.95em;
- display: block;
- padding: 0.1em 0;
- }}
- .toc a {{ display: grid; grid-template-columns: auto 1fr auto; align-items: baseline; column-gap: 8px; }}
- .toc a .toc-text {{ white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }}
- .toc a .toc-dots {{ border-bottom: 1px dotted #cbd5e1; height: 0; margin-top: 0.55em; }}
- .toc a .toc-page {{ color: #475569; font-variant-numeric: tabular-nums; padding-left: 4px; }}
-
- .toc a:hover {{
- color: #2563eb;
- background: #f8fafc;
- padding-left: 0.4em;
- transition: all 0.2s ease;
- }}
-
- /* 正文样式 */
- .content {{
- padding: 2cm;
- }}
- /* 脚注样式 */
- .footnote-ref {{
- vertical-align: super;
- font-size: 0.8em;
- }}
- .footnotes {{
- font-size: 0.9em;
- color: #555;
- }}
- .footnotes hr {{
- border: none;
- border-top: 1px solid #ddd;
- margin: 1em 0;
- }}
- .footnotes ol {{
- padding-left: 1.2em;
- }}
- .footnotes li {{
- margin: 0.4em 0;
- }}
-
- .category-title {{
- color: #2c3e50;
- border-bottom: 2px solid #3498db;
- padding-bottom: 10px;
- margin-top: 2em;
- margin-bottom: 1em;
- font-size: 1.8em;
- page-break-before: always;
- }}
-
- .category-title:first-child {{
- page-break-before: auto;
- }}
-
- .document-section {{
- margin-bottom: 1.2em;
- page-break-inside: avoid;
- }}
-
- .document-title {{
- color: #2c3e50;
- border-bottom: 1px solid #bdc3c7;
- padding-bottom: 8px;
- margin-top: 1.5em;
- margin-bottom: 1em;
- font-size: 1.4em;
- }}
-
- h1, h2, h3, h4, h5, h6 {{
- color: #2c3e50;
- margin-top: 1.5em;
- margin-bottom: 0.8em;
- }}
-
- h3 {{
- font-size: 1.2em;
- color: #7f8c8d;
- }}
-
- p {{
- margin: 0.6em 0;
- text-align: justify;
- }}
-
- code {{
- background: #f8f9fa;
- padding: 2px 6px;
- border-radius: 3px;
- font-family: "Consolas", "Monaco", monospace;
- border: 1px solid #e9ecef;
- font-size: 0.9em;
- color: #e83e8c;
- }}
-
- pre {{
- background: #f8f9fa;
- padding: 0.8em;
- border-radius: 5px;
- overflow-x: auto;
- margin: 0.6em 0;
- border: 1px solid #e9ecef;
- font-family: "Consolas", "Monaco", monospace;
- font-size: 0.9em;
- line-height: 1.4;
- }}
-
- pre code {{
- background: none;
- padding: 0;
- border: none;
- color: #333;
- font-size: inherit;
- }}
-
- ul, ol {{
- margin: 0.6em 0;
- padding-left: 2em;
- }}
-
- li {{
- margin: 0.5em 0;
- line-height: 1.6;
- }}
-
- ul ul, ol ol, ul ol, ol ul {{
- margin: 0.5em 0;
- }}
-
- /* 改进列表样式 */
- ul li {{
- list-style-type: disc;
- }}
-
- ul ul li {{
- list-style-type: circle;
- }}
-
- ul ul ul li {{
- list-style-type: square;
- }}
-
- ol li {{
- list-style-type: decimal;
- }}
-
- ol ol li {{
- list-style-type: lower-alpha;
- }}
-
- ol ol ol li {{
- list-style-type: lower-roman;
- }}
-
- img {{
- max-width: 100%;
- height: auto;
- margin: 1em 0;
- border: 1px solid #ddd;
- border-radius: 3px;
- }}
-
- table {{
- border-collapse: collapse;
- width: 100%;
- margin: 1.5em 0;
- border: 1px solid #ddd;
- }}
-
- th, td {{
- border: 1px solid #ddd;
- padding: 0.8em 1em;
- text-align: left;
- }}
-
- th {{
- background: #f8f9fa;
- font-weight: bold;
- color: #2c3e50;
- }}
-
- blockquote {{
- border-left: 4px solid #e74c3c;
- margin: 1.5em 0;
- padding: 1em 1.5em;
- background: #fdf2f2;
- font-style: italic;
- }}
-
- /* 改进表格样式 */
- table {{
- border-collapse: collapse;
- width: 100%;
- margin: 1.5em 0;
- border: 1px solid #ddd;
- font-size: 0.9em;
- }}
-
- th, td {{
- border: 1px solid #ddd;
- padding: 0.8em 1em;
- text-align: left;
- vertical-align: top;
- }}
-
- th {{
- background: #f8f9fa;
- font-weight: bold;
- color: #2c3e50;
- }}
-
- tr:nth-child(even) {{
- background: #f9f9f9;
- }}
-
- /* 改进链接样式 */
- a {{
- color: #3498db;
- text-decoration: none;
- }}
-
- a:hover {{
- text-decoration: underline;
- }}
-
- /* 改进强调样式 */
- strong, b {{
- font-weight: bold;
- color: #2c3e50;
- }}
-
- em, i {{
- font-style: italic;
- color: #7f8c8d;
- }}
-
- /* 改进水平线样式 */
- hr {{
- border: none;
- border-top: 2px solid #bdc3c7;
- margin: 2em 0;
- }}
-
- /* 改进引用块样式 */
- .highlight {{
- background: #f8f9fa;
- border: 1px solid #e9ecef;
- border-radius: 5px;
- padding: 1em;
- margin: 1em 0;
- }}
-
- /* 改进警告框样式 */
- .admonition {{
- border: 1px solid #ddd;
- border-radius: 5px;
- padding: 1em;
- margin: 1em 0;
- background: #f9f9f9;
- }}
-
- .admonition-title {{
- font-weight: bold;
- margin-bottom: 0.5em;
- color: #2c3e50;
- }}
-
- /* 打印样式 */
- @media print {{
- .cover-page {{
- page-break-after: avoid;
- }}
-
- .toc {{
- page-break-after: always;
- }}
-
- .category-title {{
- page-break-before: always;
- }}
-
- .document-section {{
- page-break-inside: avoid;
- }}
-
- h1, h2, h3 {{
- page-break-after: avoid;
- }}
-
- body {{
- margin: 0;
- padding: 0;
- }}
- }}
- </style>
- </head>
- <body>
-
- <!-- 封面页 -->
- <div class="cover-page">
- <div class="cover-title">{title}</div>
- <div class="cover-subtitle">{cover_subtitle}</div>
- <div class="cover-footer">
- {project_description_html}
- {meta_badges_html}
- {meta_details_html}
- </div>
- </div>
-
-
- <!-- 目录页 -->
- {toc_html}
-
- <!-- 正文内容 -->
- <div class="content">
- {content_html}
- </div>
- </body>
- </html>'''
-
- with open(html_file, 'w', encoding='utf-8') as f:
- f.write(html_template)
-
- print(f"✓ 已创建完整HTML文件: {html_file}")
- return html_file
- def _load_project_meta(self) -> Dict[str, str]:
- """从 docs/source/config.yaml 读取项目信息(名称、版本、版权等)"""
- meta = {"name": "", "version": "", "copyright": "", "website": "", "description": "", "description_en": ""}
- try:
- cfg_path = Path(__file__).parent / 'config.yaml'
- if cfg_path.exists():
- import yaml
- with open(cfg_path, 'r', encoding='utf-8') as f:
- cfg = yaml.safe_load(f) or {}
- proj = (cfg.get('project') or {})
- meta["name"] = proj.get('name', '')
- meta["version"] = proj.get('version', '')
- meta["website"] = proj.get('website', '')
- meta["description"] = proj.get('description', '')
- meta["description_en"] = proj.get('description_en', '')
- meta["copyright"] = (proj.get('copyright')
- or (cfg.get('project', {}).get('copyright'))
- or '')
- except Exception:
- pass
- return meta
-
- def _generate_pdf_from_html(self, html_file: Path, title: str, language: str) -> bool:
- """从HTML文件生成PDF"""
- try:
- # 确保输出目录存在
- self.output_dir.mkdir(parents=True, exist_ok=True)
-
- # 生成PDF文件名
- if language == 'zh':
- pdf_filename = f"{title}.pdf"
- else:
- # 英文版:将空格替换为下划线,再追加 _EN
- safe_title = (title or '').replace(' ', '_')
- pdf_filename = f"{safe_title}_EN.pdf"
-
- output_pdf = self.output_dir / pdf_filename
-
- # 尝试使用Chrome/Edge的headless模式生成PDF
- if self._try_chrome_pdf(html_file, output_pdf):
- print(f"✓ PDF生成成功: {output_pdf}")
- return True
-
- # 回退到手动方式
- print("⚠️ 自动PDF生成失败,回退到手动方式...")
- return self._generate_pdf_manual(html_file, output_pdf)
-
- except Exception as e:
- print(f"✗ PDF生成失败: {e}")
- return False
-
- def _try_chrome_pdf(self, html_file: Path, output_pdf: Path) -> bool:
- """尝试使用Chrome生成PDF"""
- try:
- # 优先使用用户提供的浏览器路径
- if getattr(self, 'browser_path', None):
- if os.path.exists(self.browser_path):
- browser_cmd = self.browser_path
- else:
- browser_cmd = None
- else:
- browser_cmd = None
- # 优先使用环境变量指定的浏览器路径
- env_browser = os.environ.get('CHROME_PATH') or os.environ.get('BROWSER')
- browser_paths = [
- r"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
- r"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
- r"C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
- r"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
- ]
- if env_browser:
- browser_paths.insert(0, env_browser)
- # 也尝试通过 PATH 查找
- try:
- import shutil as _shutil
- for candidate in [
- "chrome.exe", "msedge.exe", "chrome", "msedge",
- "google-chrome", "google-chrome-stable", "chromium", "chromium-browser"
- ]:
- p = _shutil.which(candidate)
- if p and os.path.exists(p):
- browser_paths.insert(0, p)
- break
- except Exception:
- pass
-
- if not browser_cmd:
- for path in browser_paths:
- if os.path.exists(path):
- browser_cmd = path
- break
-
- if not browser_cmd:
- return False
-
- # 构建命令
- cmd = [
- browser_cmd,
- "--headless=new",
- "--disable-gpu",
- "--no-sandbox",
- "--disable-dev-shm-usage",
- "--no-first-run",
- "--disable-extensions",
- "--disable-background-networking",
- "--disable-default-apps",
- "--disable-sync",
- "--disable-translate",
- "--disable-features=PrintingPdfHeaderFooter,TranslateUI",
- f"--print-to-pdf={str(output_pdf.resolve().as_posix())}",
- "--print-to-pdf-no-header",
- "--no-pdf-header-footer",
- "--disable-print-preview",
- "--run-all-compositor-stages-before-draw",
- "--virtual-time-budget=20000",
- f"file:///{html_file.resolve().as_posix()}"
- ]
-
- # 执行命令(以二进制模式捕获输出,避免控制台编码问题)
- result = subprocess.run(cmd, capture_output=True, text=False, timeout=60)
- ok = result.returncode == 0 and output_pdf.exists()
- if not ok:
- try:
- stderr_text = (result.stderr or b"").decode(errors="ignore")
- stdout_text = (result.stdout or b"").decode(errors="ignore")
- print("Chrome/Edge 无头打印失败: ")
- if stdout_text:
- print(stdout_text[:800])
- if stderr_text:
- print(stderr_text[:800])
- except Exception:
- pass
- return ok
-
- except Exception as e:
- print(f"Chrome PDF生成失败: {e}")
- return False
-
- def _generate_pdf_manual(self, html_file: Path, output_pdf: Path) -> bool:
- """手动生成PDF(回退方案)"""
- try:
- # 打开浏览器
- import webbrowser
- url = f"file:///{html_file.absolute()}"
- webbrowser.open(url)
-
- print("\n" + "="*60)
- print("📄 PDF生成说明:")
- print("1. 浏览器已自动打开HTML页面")
- print("2. 使用浏览器打印(Ctrl+P),目标选择“保存为 PDF”")
- print("3. 重要:取消勾选“页眉和页脚”(Headers and footers)以移除浏览器默认的时间、URL、页码")
- print("4. 如需保持版式,建议边距使用默认或无")
- print(f"5. 保存到: {output_pdf}")
- print("6. 完成后按回车键继续...")
- print("="*60)
-
- try:
- input("按回车键继续...")
- except EOFError:
- # 非交互环境下没有标准输入,直接继续
- pass
- return True
-
- except Exception as e:
- print(f"手动PDF生成失败: {e}")
- return False
- def main():
- """主函数"""
- parser = argparse.ArgumentParser(description="增强版PDF生成器V2")
- parser.add_argument('--html-dir', type=str, default='source_build/html/latest',
- help='HTML文件目录 (默认: source_build/html/latest)')
- parser.add_argument('--output-dir', type=str, default='source_build/html/latest/_static',
- help='PDF输出目录 (默认: source_build/html/latest/_static)')
- parser.add_argument('--title', type=str, default='Titan-Board SDK文档',
- help='PDF标题 (默认: Titan-Board SDK文档)')
- parser.add_argument('--language', type=str, default='zh',
- choices=['zh', 'en'],
- help='文档语言 (默认: zh)')
- parser.add_argument('--both', action='store_true',
- help='同时生成中英文版本')
- parser.add_argument('--keep-temp', action='store_true',
- help='保留临时目录以便调试(输出merged_*.html路径)')
- parser.add_argument('--browser', type=str, default='',
- help='指定 Chrome/Edge 浏览器可执行文件路径')
-
- args = parser.parse_args()
-
- # 转换为Path对象
- html_dir = Path(args.html_dir)
- output_dir = Path(args.output_dir)
-
- if not html_dir.exists():
- print(f"✗ HTML目录不存在: {html_dir}")
- sys.exit(1)
-
- # 创建PDF生成器
- generator = PDFGeneratorV2(html_dir, output_dir, keep_temp=args.keep_temp, browser_path=(args.browser or None))
-
- if args.both:
- # 生成中英文两个版本
- success_zh = generator.generate_pdf(args.title, "zh")
- success_en = generator.generate_pdf(args.title, "en")
- success = success_zh and success_en
- else:
- # 生成指定语言版本
- success = generator.generate_pdf(args.title, args.language)
-
- sys.exit(0 if success else 1)
- if __name__ == "__main__":
- main()
|