validate_skills.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. #!/usr/bin/env python3
  2. """Validate RyanJson skills metadata, routes, and markdown references."""
  3. from __future__ import annotations
  4. import re
  5. import sys
  6. from pathlib import Path
  7. from typing import Optional
  8. REPO_ROOT = Path(__file__).resolve().parents[2]
  9. SKILLS_ROOT = REPO_ROOT / "skills"
  10. SKILL_NAME_RE = re.compile(r"^[a-z0-9-]{1,64}$")
  11. HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
  12. FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n?", re.DOTALL)
  13. FRONTMATTER_FIELD_RE = re.compile(r"^([A-Za-z0-9_-]+):\s*(.*)$")
  14. OPENAI_FIELD_RE = re.compile(r"^[ \t]*([A-Za-z0-9_]+):[ \t]*(.+)$")
  15. GEMINI_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
  16. ROOT_PREFIXES = (
  17. "skills/",
  18. "RyanJson/",
  19. "test/",
  20. "scripts/",
  21. ".agent/",
  22. ".github/",
  23. "example/",
  24. "reports/",
  25. "localLogs/",
  26. "coverage/",
  27. "build/",
  28. "xmake/",
  29. "run_local_",
  30. )
  31. LOCAL_PREFIXES = (
  32. "./",
  33. "../",
  34. "references/",
  35. "agents/",
  36. )
  37. ROOT_FILES = {
  38. "AGENTS.md",
  39. "README.md",
  40. "Makefile",
  41. "SConscript",
  42. "xmake.lua",
  43. }
  44. REFERENCE_SUFFIXES = {
  45. ".md",
  46. ".yaml",
  47. ".yml",
  48. ".sh",
  49. ".py",
  50. }
  51. class ValidationError(RuntimeError):
  52. """Raised when validation fails."""
  53. def load_text(path: Path) -> str:
  54. return path.read_text(encoding="utf-8")
  55. def parse_frontmatter(skill_file: Path) -> dict[str, str]:
  56. text = load_text(skill_file)
  57. match = FRONTMATTER_RE.match(text)
  58. if match is None:
  59. raise ValidationError(f"{skill_file}: missing YAML frontmatter")
  60. fields: dict[str, str] = {}
  61. for raw_line in match.group(1).splitlines():
  62. line = raw_line.strip()
  63. if not line or line.startswith("#"):
  64. continue
  65. field_match = FRONTMATTER_FIELD_RE.match(line)
  66. if field_match is None:
  67. continue
  68. key = field_match.group(1)
  69. value = field_match.group(2).strip().strip("'\"")
  70. fields[key] = value
  71. return fields
  72. def validate_frontmatter(skill_dir: Path) -> list[str]:
  73. warnings: list[str] = []
  74. skill_file = skill_dir / "SKILL.md"
  75. if not skill_file.exists():
  76. raise ValidationError(f"{skill_dir}: missing SKILL.md")
  77. fields = parse_frontmatter(skill_file)
  78. name = fields.get("name", "")
  79. description = fields.get("description", "")
  80. if not name:
  81. raise ValidationError(f"{skill_file}: missing frontmatter field 'name'")
  82. if not SKILL_NAME_RE.fullmatch(name):
  83. raise ValidationError(
  84. f"{skill_file}: invalid name '{name}' (expected lowercase letters/digits/hyphens, <= 64 chars)"
  85. )
  86. if "anthropic" in name or "claude" in name:
  87. raise ValidationError(f"{skill_file}: invalid reserved substring in name '{name}'")
  88. if skill_dir.name != name:
  89. raise ValidationError(f"{skill_file}: directory name '{skill_dir.name}' does not match skill name '{name}'")
  90. if not description:
  91. raise ValidationError(f"{skill_file}: missing frontmatter field 'description'")
  92. if len(description) > 1024:
  93. raise ValidationError(f"{skill_file}: description exceeds 1024 characters")
  94. if "<" in description or ">" in description:
  95. raise ValidationError(f"{skill_file}: description contains XML-like angle brackets")
  96. body_lines = load_text(skill_file).splitlines()
  97. if len(body_lines) > 500:
  98. warnings.append(f"{skill_file}: body is longer than recommended 500 lines")
  99. if len(description) > 200:
  100. warnings.append(f"{skill_file}: description exceeds recommended 200 characters for Claude.ai compatibility")
  101. return warnings
  102. def validate_openai_yaml(skill_dir: Path) -> None:
  103. skill_file = skill_dir / "SKILL.md"
  104. skill_name = parse_frontmatter(skill_file)["name"]
  105. openai_file = skill_dir / "agents" / "openai.yaml"
  106. if not openai_file.exists():
  107. raise ValidationError(f"{skill_dir}: missing agents/openai.yaml")
  108. fields: dict[str, str] = {}
  109. for raw_line in load_text(openai_file).splitlines():
  110. match = OPENAI_FIELD_RE.match(raw_line)
  111. if match is None:
  112. continue
  113. key = match.group(1)
  114. value = match.group(2).strip().strip("'\"")
  115. fields[key] = value
  116. for key in ("display_name", "short_description", "default_prompt"):
  117. value = fields.get(key, "")
  118. if not value:
  119. raise ValidationError(f"{openai_file}: missing interface field '{key}'")
  120. if f"${skill_name}" not in fields["default_prompt"]:
  121. raise ValidationError(f"{openai_file}: default_prompt must reference ${skill_name}")
  122. def should_check_path(ref: str) -> bool:
  123. if not ref or any(char in ref for char in "*?[]"):
  124. return False
  125. if "://" in ref:
  126. return False
  127. if ref in {"references/", "agents/"}:
  128. return False
  129. if ref in ROOT_FILES:
  130. return True
  131. if ref.startswith(ROOT_PREFIXES) or ref.startswith(LOCAL_PREFIXES):
  132. return True
  133. return Path(ref).suffix.lower() in REFERENCE_SUFFIXES
  134. def iter_reference_candidates(code_span: str) -> list[str]:
  135. candidates: list[str] = []
  136. seen: set[str] = set()
  137. for raw_token in code_span.split():
  138. token = raw_token.strip("`'\"(),;:")
  139. if not should_check_path(token):
  140. continue
  141. if token in seen:
  142. continue
  143. seen.add(token)
  144. candidates.append(token)
  145. if candidates:
  146. return candidates
  147. token = code_span.strip("`'\"(),;:")
  148. if should_check_path(token):
  149. return [token]
  150. return []
  151. def find_repo_matches(ref: str) -> list[Path]:
  152. normalized = ref.rstrip("/")
  153. if normalized.startswith("./"):
  154. normalized = normalized[2:]
  155. while normalized.startswith("../"):
  156. normalized = normalized[3:]
  157. if not normalized:
  158. return []
  159. matches: list[Path] = []
  160. if "/" in normalized:
  161. for path in REPO_ROOT.rglob(Path(normalized).name):
  162. rel = path.relative_to(REPO_ROOT).as_posix()
  163. if rel.endswith(normalized):
  164. matches.append(path)
  165. return sorted(set(matches))
  166. return sorted(set(REPO_ROOT.rglob(normalized)))
  167. def resolve_reference(doc_path: Path, ref: str) -> Optional[Path]:
  168. if ref in ROOT_FILES or ref.startswith(ROOT_PREFIXES):
  169. target = (REPO_ROOT / ref).resolve()
  170. return target if target.exists() else None
  171. if ref.startswith(LOCAL_PREFIXES):
  172. target = (doc_path.parent / ref).resolve()
  173. return target if target.exists() else None
  174. if Path(ref).suffix.lower() in REFERENCE_SUFFIXES:
  175. target = (doc_path.parent / ref).resolve()
  176. if target.exists():
  177. return target
  178. matches = find_repo_matches(ref)
  179. if len(matches) == 1:
  180. return matches[0].resolve()
  181. return None
  182. def validate_markdown_references(doc_path: Path) -> list[str]:
  183. errors: list[str] = []
  184. seen: set[tuple[int, str]] = set()
  185. for lineno, line in enumerate(load_text(doc_path).splitlines(), start=1):
  186. for ref in re.findall(r"`([^`]+)`", line):
  187. for candidate in iter_reference_candidates(ref):
  188. key = (lineno, candidate)
  189. if key in seen:
  190. continue
  191. seen.add(key)
  192. target = resolve_reference(doc_path, candidate)
  193. if target is None and not find_repo_matches(candidate):
  194. rel_doc = doc_path.relative_to(REPO_ROOT)
  195. errors.append(f"{rel_doc}:{lineno}: missing markdown reference `{candidate}`")
  196. return errors
  197. def iter_skill_dirs() -> list[Path]:
  198. return sorted(path for path in SKILLS_ROOT.iterdir() if path.is_dir() and (path / "SKILL.md").exists())
  199. def iter_markdown_files() -> list[Path]:
  200. files = [REPO_ROOT / "AGENTS.md"]
  201. files.extend(sorted(SKILLS_ROOT.rglob("*.md")))
  202. return [path for path in files if path.exists()]
  203. def validate_agents_routes() -> list[str]:
  204. errors: list[str] = []
  205. agents_doc = load_text(REPO_ROOT / "AGENTS.md")
  206. for skill_dir in iter_skill_dirs():
  207. route = f"skills/{skill_dir.name}/SKILL.md"
  208. if route not in agents_doc:
  209. errors.append(f"AGENTS.md: missing route for `{route}`")
  210. return errors
  211. def validate_no_gemini_assets() -> list[str]:
  212. errors: list[str] = []
  213. forbidden_files = [
  214. REPO_ROOT / ".agent" / "rules" / "gemini.md",
  215. ]
  216. forbidden_files.extend(SKILLS_ROOT.rglob("agents/gemini.md"))
  217. forbidden_files.extend(SKILLS_ROOT.rglob("references/geminiCompat.md"))
  218. for path in forbidden_files:
  219. if path.exists():
  220. errors.append(f"{path.relative_to(REPO_ROOT)}: Gemini-specific asset is not allowed")
  221. scan_files = [REPO_ROOT / "AGENTS.md"]
  222. scan_files.extend(sorted(SKILLS_ROOT.rglob("*.md")))
  223. scan_files.extend(sorted((REPO_ROOT / ".agent").rglob("*.md")) if (REPO_ROOT / ".agent").exists() else [])
  224. for path in scan_files:
  225. text = load_text(path)
  226. if GEMINI_RE.search(text):
  227. errors.append(f"{path.relative_to(REPO_ROOT)}: Gemini-specific text is not allowed")
  228. return errors
  229. def main() -> int:
  230. warnings: list[str] = []
  231. errors: list[str] = []
  232. if not SKILLS_ROOT.exists():
  233. print(f"[ERROR] missing skills directory: {SKILLS_ROOT}", file=sys.stderr)
  234. return 1
  235. for skill_dir in iter_skill_dirs():
  236. try:
  237. warnings.extend(validate_frontmatter(skill_dir))
  238. validate_openai_yaml(skill_dir)
  239. except ValidationError as exc:
  240. errors.append(str(exc))
  241. errors.extend(validate_no_gemini_assets())
  242. errors.extend(validate_agents_routes())
  243. for doc_path in iter_markdown_files():
  244. errors.extend(validate_markdown_references(doc_path))
  245. for warning in warnings:
  246. print(f"[WARN] {warning}")
  247. if errors:
  248. for error in errors:
  249. print(f"[ERROR] {error}", file=sys.stderr)
  250. return 1
  251. print(f"[OK] validated {len(iter_skill_dirs())} skills and markdown references")
  252. return 0
  253. if __name__ == "__main__":
  254. sys.exit(main())