#!/usr/bin/env python3 """ extract_api.py Scan C++ headers in ./src for public interfaces (public class/struct methods and free function declarations) and emit markdown snippets mirroring the src/ tree into ./docs. Each interface line is rendered as: ### `signature` Usage: cd dofs python3 tools/extract_api.py """ import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] SRC_DIR = REPO_ROOT / "src" DOCS_DIR = REPO_ROOT / "docs" HEADER_EXTS = {".h", ".hh", ".hpp", ".hxx"} # ---------- Utilities ---------- def read_text(p: Path) -> str: try: return p.read_text(encoding="utf-8", errors="ignore") except Exception as e: print(f"[WARN] failed reading {p}: {e}", file=sys.stderr) return "" def strip_comments(code: str) -> str: # Remove /* ... */ (including multiline) and // ... to end of line no_block = re.sub(r"/\*.*?\*/", "", code, flags=re.S) no_line = re.sub(r"//.*?$", "", no_block, flags=re.M) return no_line def collapse_ws(s: str) -> str: return re.sub(r"\s+", " ", s).strip() def ensure_dir(path: Path): path.parent.mkdir(parents=True, exist_ok=True) # ---------- Heuristic extractors ---------- ACCESS_RE = re.compile(r"^\s*(public|private|protected)\s*:\s*$") CLASS_START_RE = re.compile(r"^\s*(class|struct)\s+([A-Za-z_]\w*)\b") POSSIBLE_FUNC_DECL_RE = re.compile( r"""^[^;{}()]*\b (?!typedef\b)(?!using\b)(?!friend\b) [A-Za-z_~]\w*\s* \( [^;]* # params \) (?:\s*(?:const|noexcept|override|final|=0|=\s*default|=\s*delete))*\s* ; \s*$""", re.X, ) POSSIBLE_INLINE_DEF_RE = re.compile( r"""^[^;{}()]*\b (?!typedef\b)(?!using\b)(?!friend\b) [A-Za-z_~]\w*\s* \( [^{;]* # params \) (?:\s*(?:const|noexcept|override|final))*\s* \{""", re.X, ) SKIP_PREFIXES = ("#define", "#include", "static_assert", "enum ", "enum class ", "template<", "namespace ", "using ", "typedef ", "friend ", "struct ", "class ") def extract_public_methods(lines, is_struct_default_public: bool): public = is_struct_default_public out = [] depth = 0 for raw in lines: line = raw.strip() if not line: continue # Track nested braces to avoid confusing nested scopes depth += raw.count("{") depth -= raw.count("}") m = ACCESS_RE.match(line) if m and depth >= 0: public = (m.group(1) == "public") continue if not public: continue if line.startswith(SKIP_PREFIXES) or line.endswith(":"): continue if POSSIBLE_FUNC_DECL_RE.match(line): out.append(collapse_ws(line)) continue if POSSIBLE_INLINE_DEF_RE.match(line): sig = line.split("{", 1)[0].rstrip() out.append(collapse_ws(sig) + " { ... }") continue return out def extract_free_function_decls(code: str): # Remove class/struct bodies to avoid capturing methods scrubbed = [] toks = code.splitlines() in_class = False brace_balance = 0 for line in toks: if not in_class: if CLASS_START_RE.match(line): in_class = True brace_balance = line.count("{") - line.count("}") scrubbed.append("") continue else: brace_balance += line.count("{") - line.count("}") if brace_balance <= 0: in_class = False scrubbed.append("") continue scrubbed.append(line) text = "\n".join(scrubbed) out = [] for raw in text.splitlines(): line = raw.strip() if not line or line.startswith(SKIP_PREFIXES): continue if POSSIBLE_FUNC_DECL_RE.match(line): out.append(collapse_ws(line)) elif POSSIBLE_INLINE_DEF_RE.match(line): sig = line.split("{", 1)[0].rstrip() out.append(collapse_ws(sig) + " { ... }") return out def split_top_level_classes(code: str): lines = code.splitlines() results = [] i = 0 while i < len(lines): m = CLASS_START_RE.match(lines[i]) if not m: i += 1 continue kind, name = m.group(1), m.group(2) # Find opening brace on same or subsequent lines j = i if "{" not in lines[j]: j += 1 while j < len(lines) and "{" not in lines[j]: j += 1 if j >= len(lines): i += 1 continue # Capture until matching close depth = 0 body = [] while j < len(lines): depth += lines[j].count("{") depth -= lines[j].count("}") body.append(lines[j]) if depth <= 0 and "}" in lines[j]: break j += 1 body_inner = body[1:-1] if body else [] results.append((name, kind == "struct", body_inner)) i = j + 1 return results # ---------- Main per-file processing ---------- def process_header(path: Path): raw = read_text(path) if not raw: return None code = strip_comments(raw) # Collect classes class_entries = [] for cname, is_struct, body in split_top_level_classes(code): methods = extract_public_methods(body, is_struct_default_public=is_struct) if methods: class_entries.append((cname, methods)) # Collect free function decls free_funcs = extract_free_function_decls(code) if not class_entries and not free_funcs: return None # Build markdown with ### `signature` items rel = path.relative_to(SRC_DIR) md_lines = [] md_lines.append(f"# {rel.as_posix()}") md_lines.append("") if free_funcs: md_lines.append("## Free functions") md_lines.append("") for sig in free_funcs: md_lines.append(f"### `{sig}`") md_lines.append("") for cname, methods in class_entries: md_lines.append(f"## class {cname} — public interface") md_lines.append("") for sig in methods: md_lines.append(f"### `{sig}`") md_lines.append("") return "\n".join(md_lines) def write_markdown(src_header: Path, content: str): rel = src_header.relative_to(SRC_DIR) out_path = DOCS_DIR / rel out_path = out_path.with_suffix(".md") ensure_dir(out_path) out_path.write_text(content, encoding="utf-8") return out_path def main(): if not SRC_DIR.exists(): print(f"[ERR] src/ not found at {SRC_DIR}", file=sys.stderr) sys.exit(1) generated = 0 for path in SRC_DIR.rglob("*"): if not path.is_file(): continue if path.suffix.lower() not in HEADER_EXTS: continue result = process_header(path) if result: out = write_markdown(path, result) generated += 1 print(f"[OK] {out.relative_to(REPO_ROOT)}") if generated == 0: print("[INFO] no public interfaces detected (heuristics may have filtered everything)]") if __name__ == "__main__": main()