Files
dofs/tools/extract_api.py

256 lines
7.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
extract_api.py
Scan C++ headers in ./src for public interfaces (public class/struct methods
and free function declarations) and emit markdown snippets mirroring the src/
tree into ./docs.
Each interface line is rendered as:
### `signature`
Usage:
cd dofs
python3 tools/extract_api.py
"""
import re
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
SRC_DIR = REPO_ROOT / "src"
DOCS_DIR = REPO_ROOT / "docs"
HEADER_EXTS = {".h", ".hh", ".hpp", ".hxx"}
# ---------- Utilities ----------
def read_text(p: Path) -> str:
try:
return p.read_text(encoding="utf-8", errors="ignore")
except Exception as e:
print(f"[WARN] failed reading {p}: {e}", file=sys.stderr)
return ""
def strip_comments(code: str) -> str:
# Remove /* ... */ (including multiline) and // ... to end of line
no_block = re.sub(r"/\*.*?\*/", "", code, flags=re.S)
no_line = re.sub(r"//.*?$", "", no_block, flags=re.M)
return no_line
def collapse_ws(s: str) -> str:
return re.sub(r"\s+", " ", s).strip()
def ensure_dir(path: Path):
path.parent.mkdir(parents=True, exist_ok=True)
# ---------- Heuristic extractors ----------
ACCESS_RE = re.compile(r"^\s*(public|private|protected)\s*:\s*$")
CLASS_START_RE = re.compile(r"^\s*(class|struct)\s+([A-Za-z_]\w*)\b")
POSSIBLE_FUNC_DECL_RE = re.compile(
r"""^[^;{}()]*\b
(?!typedef\b)(?!using\b)(?!friend\b)
[A-Za-z_~]\w*\s*
\(
[^;]* # params
\)
(?:\s*(?:const|noexcept|override|final|=0|=\s*default|=\s*delete))*\s*
;
\s*$""",
re.X,
)
POSSIBLE_INLINE_DEF_RE = re.compile(
r"""^[^;{}()]*\b
(?!typedef\b)(?!using\b)(?!friend\b)
[A-Za-z_~]\w*\s*
\(
[^{;]* # params
\)
(?:\s*(?:const|noexcept|override|final))*\s*
\{""",
re.X,
)
SKIP_PREFIXES = ("#define", "#include", "static_assert", "enum ", "enum class ",
"template<", "namespace ", "using ", "typedef ", "friend ",
"struct ", "class ")
def extract_public_methods(lines, is_struct_default_public: bool):
public = is_struct_default_public
out = []
depth = 0
for raw in lines:
line = raw.strip()
if not line:
continue
# Track nested braces to avoid confusing nested scopes
depth += raw.count("{")
depth -= raw.count("}")
m = ACCESS_RE.match(line)
if m and depth >= 0:
public = (m.group(1) == "public")
continue
if not public:
continue
if line.startswith(SKIP_PREFIXES) or line.endswith(":"):
continue
if POSSIBLE_FUNC_DECL_RE.match(line):
out.append(collapse_ws(line))
continue
if POSSIBLE_INLINE_DEF_RE.match(line):
sig = line.split("{", 1)[0].rstrip()
out.append(collapse_ws(sig) + " { ... }")
continue
return out
def extract_free_function_decls(code: str):
# Remove class/struct bodies to avoid capturing methods
scrubbed = []
toks = code.splitlines()
in_class = False
brace_balance = 0
for line in toks:
if not in_class:
if CLASS_START_RE.match(line):
in_class = True
brace_balance = line.count("{") - line.count("}")
scrubbed.append("")
continue
else:
brace_balance += line.count("{") - line.count("}")
if brace_balance <= 0:
in_class = False
scrubbed.append("")
continue
scrubbed.append(line)
text = "\n".join(scrubbed)
out = []
for raw in text.splitlines():
line = raw.strip()
if not line or line.startswith(SKIP_PREFIXES):
continue
if POSSIBLE_FUNC_DECL_RE.match(line):
out.append(collapse_ws(line))
elif POSSIBLE_INLINE_DEF_RE.match(line):
sig = line.split("{", 1)[0].rstrip()
out.append(collapse_ws(sig) + " { ... }")
return out
def split_top_level_classes(code: str):
lines = code.splitlines()
results = []
i = 0
while i < len(lines):
m = CLASS_START_RE.match(lines[i])
if not m:
i += 1
continue
kind, name = m.group(1), m.group(2)
# Find opening brace on same or subsequent lines
j = i
if "{" not in lines[j]:
j += 1
while j < len(lines) and "{" not in lines[j]:
j += 1
if j >= len(lines):
i += 1
continue
# Capture until matching close
depth = 0
body = []
while j < len(lines):
depth += lines[j].count("{")
depth -= lines[j].count("}")
body.append(lines[j])
if depth <= 0 and "}" in lines[j]:
break
j += 1
body_inner = body[1:-1] if body else []
results.append((name, kind == "struct", body_inner))
i = j + 1
return results
# ---------- Main per-file processing ----------
def process_header(path: Path):
raw = read_text(path)
if not raw:
return None
code = strip_comments(raw)
# Collect classes
class_entries = []
for cname, is_struct, body in split_top_level_classes(code):
methods = extract_public_methods(body, is_struct_default_public=is_struct)
if methods:
class_entries.append((cname, methods))
# Collect free function decls
free_funcs = extract_free_function_decls(code)
if not class_entries and not free_funcs:
return None
# Build markdown with ### `signature` items
rel = path.relative_to(SRC_DIR)
md_lines = []
md_lines.append(f"# {rel.as_posix()}")
md_lines.append("")
if free_funcs:
md_lines.append("## Free functions")
md_lines.append("")
for sig in free_funcs:
md_lines.append(f"### `{sig}`")
md_lines.append("")
for cname, methods in class_entries:
md_lines.append(f"## class {cname} — public interface")
md_lines.append("")
for sig in methods:
md_lines.append(f"### `{sig}`")
md_lines.append("")
return "\n".join(md_lines)
def write_markdown(src_header: Path, content: str):
rel = src_header.relative_to(SRC_DIR)
out_path = DOCS_DIR / rel
out_path = out_path.with_suffix(".md")
ensure_dir(out_path)
out_path.write_text(content, encoding="utf-8")
return out_path
def main():
if not SRC_DIR.exists():
print(f"[ERR] src/ not found at {SRC_DIR}", file=sys.stderr)
sys.exit(1)
generated = 0
for path in SRC_DIR.rglob("*"):
if not path.is_file():
continue
if path.suffix.lower() not in HEADER_EXTS:
continue
result = process_header(path)
if result:
out = write_markdown(path, result)
generated += 1
print(f"[OK] {out.relative_to(REPO_ROOT)}")
if generated == 0:
print("[INFO] no public interfaces detected (heuristics may have filtered everything)]")
if __name__ == "__main__":
main()