| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 国际化配置管理器
- 处理中英文文档的配置、目录结构和内容生成
- """
- import os
- import re
- import yaml
- from pathlib import Path
- from typing import Dict, List, Optional, Tuple, Any
- from dataclasses import dataclass
- @dataclass
- class CategoryConfig:
- """分类配置"""
- name: str
- name_en: str
- description: str
- patterns: List[str]
- @dataclass
- class ProjectConfig:
- """项目配置"""
- name: str
- title: str
- description: str
- description_en: str
- version: str
- author: str
- copyright: str
- website: str
- @dataclass
- class I18nConfig:
- """国际化配置"""
- project: ProjectConfig
- categories: Dict[str, CategoryConfig]
- repository: Dict[str, Any]
- generation: Dict[str, Any]
- sphinx: Dict[str, Any]
- class I18nConfigManager:
- """国际化配置管理器"""
-
- def __init__(self, config_path: Path):
- self.config_path = config_path
- self.config = self._load_config()
-
- def _load_config(self) -> I18nConfig:
- """加载配置文件"""
- with open(self.config_path, 'r', encoding='utf-8') as f:
- data = yaml.safe_load(f) or {}
-
- # 解析项目配置
- project_data = data.get('project', {})
- project = ProjectConfig(
- name=project_data.get('name', ''),
- title=project_data.get('title', ''),
- description=project_data.get('description', ''),
- description_en=project_data.get('description_en', ''),
- version=project_data.get('version', ''),
- author=project_data.get('author', ''),
- copyright=project_data.get('copyright', ''),
- website=project_data.get('website', '')
- )
-
- # 解析分类配置
- categories = {}
- for key, cat_data in data.get('categories', {}).items():
- categories[key] = CategoryConfig(
- name=cat_data.get('name', ''),
- name_en=cat_data.get('name_en', ''),
- description=cat_data.get('description', ''),
- patterns=cat_data.get('patterns', [])
- )
-
- return I18nConfig(
- project=project,
- categories=categories,
- repository=data.get('repository', {}),
- generation=data.get('generation', {}),
- sphinx=data.get('sphinx', {})
- )
-
- def get_category_name(self, category_key: str, language: str = 'zh') -> str:
- """获取分类名称"""
- if category_key not in self.config.categories:
- return category_key
-
- category = self.config.categories[category_key]
- if language == 'en':
- return category.name_en or category.name
- return category.name
-
- def get_project_config(self, language: str = 'zh') -> Dict[str, str]:
- """获取项目配置"""
- project = self.config.project
- if language == 'en':
- return {
- 'name': project.name,
- 'title': f"{project.name} Documentation",
- 'description': project.description_en or project.description,
- 'version': project.version,
- 'author': project.author,
- 'copyright': project.copyright,
- 'website': project.website,
- 'language': 'en'
- }
- else:
- return {
- 'name': project.name,
- 'title': project.title,
- 'description': project.description,
- 'version': project.version,
- 'author': project.author,
- 'copyright': project.copyright,
- 'website': project.website,
- 'language': 'zh_CN'
- }
-
- def get_sphinx_config(self, language: str = 'zh') -> Dict[str, Any]:
- """获取Sphinx配置"""
- base_config = self.config.sphinx.copy()
-
- if language == 'en':
- base_config.update({
- 'language': 'en',
- 'html_lang': 'en',
- 'html_title': f"{self.config.project.name} Documentation",
- 'html_short_title': self.config.project.name,
- 'html_search_language': 'en',
- 'html_search_options': {
- 'dict': 'en'
- }
- })
- else:
- base_config.update({
- 'language': 'zh_CN',
- 'html_lang': 'zh-CN',
- 'html_title': self.config.project.title,
- 'html_short_title': self.config.project.title,
- 'html_search_language': 'zh',
- 'html_search_options': {
- 'dict': 'zh'
- }
- })
-
- return base_config
-
- def get_toc_structure(self, language: str = 'zh') -> List[Dict[str, Any]]:
- """获取目录结构"""
- structure = []
-
- for category_key in self.config.generation.get('output_structure', []):
- if category_key in self.config.categories:
- category = self.config.categories[category_key]
- structure.append({
- 'key': category_key,
- 'name': self.get_category_name(category_key, language),
- 'description': category.description,
- 'patterns': category.patterns
- })
-
- return structure
-
- def get_file_suffix(self, language: str = 'zh') -> str:
- """获取文件后缀"""
- return '_zh' if language == 'zh' else ''
-
- def get_index_filename(self, language: str = 'zh') -> str:
- """获取索引文件名"""
- return f"index{self.get_file_suffix(language)}.rst"
-
- def get_html_filename(self, base_name: str, language: str = 'zh') -> str:
- """获取HTML文件名"""
- if language == 'zh':
- return f"{base_name}_zh.html"
- return f"{base_name}.html"
-
- def get_markdown_filename(self, base_name: str, language: str = 'zh') -> str:
- """获取Markdown文件名"""
- if language == 'zh':
- return f"{base_name}_zh.md"
- return f"{base_name}.md"
-
- def extract_headings_from_markdown(self, file_path: Path) -> List[Dict[str, Any]]:
- """从Markdown文件中提取标题结构"""
- headings = []
-
- if not file_path.exists():
- return headings
-
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
-
- # 匹配标题的正则表达式
- heading_pattern = r'^(#{1,6})\s+(.+)$'
-
- for line_num, line in enumerate(content.split('\n'), 1):
- match = re.match(heading_pattern, line.strip())
- if match:
- level = len(match.group(1))
- title = match.group(2).strip()
-
- # 生成锚点ID
- anchor_id = self._generate_anchor_id(title)
-
- headings.append({
- 'level': level,
- 'title': title,
- 'anchor_id': anchor_id,
- 'line_num': line_num
- })
-
- return headings
-
- def _generate_anchor_id(self, title: str) -> str:
- """生成锚点ID"""
- # 移除特殊字符,转换为小写,用连字符连接
- anchor_id = re.sub(r'[^\w\s-]', '', title.lower())
- anchor_id = re.sub(r'[-\s]+', '-', anchor_id)
- return anchor_id.strip('-')
-
- def generate_toc_content(self, category_key: str, language: str = 'zh') -> str:
- """生成目录内容"""
- category = self.config.categories.get(category_key)
- if not category:
- return ""
-
- category_name = self.get_category_name(category_key, language)
-
- # 查找该分类下的所有项目
- projects = []
- docs_dir = self.config_path.parent
-
- for pattern in category.patterns:
- # 在docs目录下查找匹配的项目
- for item in docs_dir.glob(f"**/{pattern}"):
- if item.is_dir():
- # 查找README文件
- readme_file = None
- if language == 'zh':
- readme_file = item / "README_zh.md"
- if not readme_file.exists():
- readme_file = item / "README.md"
- else:
- readme_file = item / "README.md"
- if not readme_file.exists():
- readme_file = item / "README_zh.md"
-
- if readme_file and readme_file.exists():
- # 提取项目名称
- project_name = self._extract_project_name(readme_file, language)
- projects.append({
- 'name': project_name,
- 'path': readme_file.relative_to(docs_dir),
- 'headings': self.extract_headings_from_markdown(readme_file)
- })
-
- # 生成目录内容
- toc_lines = [f"{category_name}", "=" * len(category_name), ""]
-
- for project in projects:
- toc_lines.append(f".. toctree::")
- toc_lines.append(f" :maxdepth: 3")
- toc_lines.append(f" :caption: {project['name']}")
- toc_lines.append(f"")
- toc_lines.append(f" {project['path'].with_suffix('')}")
- toc_lines.append(f"")
-
- return "\n".join(toc_lines)
-
- def _extract_project_name(self, readme_file: Path, language: str = 'zh') -> str:
- """从README文件中提取项目名称"""
- try:
- with open(readme_file, 'r', encoding='utf-8') as f:
- first_line = f.readline().strip()
-
- # 移除Markdown标题标记
- if first_line.startswith('#'):
- return first_line.lstrip('#').strip()
-
- return first_line
- except Exception:
- return readme_file.parent.name
-
- def get_language_config(self, language: str = 'zh') -> Dict[str, Any]:
- """获取语言特定配置"""
- return {
- 'language': language,
- 'file_suffix': self.get_file_suffix(language),
- 'index_filename': self.get_index_filename(language),
- 'project_config': self.get_project_config(language),
- 'sphinx_config': self.get_sphinx_config(language),
- 'toc_structure': self.get_toc_structure(language)
- }
- def main():
- """测试函数"""
- config_path = Path(__file__).parent.parent / 'config.yaml'
- manager = I18nConfigManager(config_path)
-
- print("中文配置:")
- zh_config = manager.get_language_config('zh')
- print(f" 语言: {zh_config['language']}")
- print(f" 文件后缀: {zh_config['file_suffix']}")
- print(f" 索引文件: {zh_config['index_filename']}")
- print(f" 项目名称: {zh_config['project_config']['title']}")
-
- print("\n英文配置:")
- en_config = manager.get_language_config('en')
- print(f" 语言: {en_config['language']}")
- print(f" 文件后缀: {en_config['file_suffix']}")
- print(f" 索引文件: {en_config['index_filename']}")
- print(f" 项目名称: {en_config['project_config']['title']}")
-
- print("\n分类结构:")
- for category in en_config['toc_structure']:
- print(f" {category['key']}: {category['name']}")
- if __name__ == "__main__":
- main()
|