627 lines
24 KiB
Python
Executable File
627 lines
24 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# dofs/tools/extract_api.py
|
|
# Extract DOFS public API from headers and emit:
|
|
# - Per-header Markdown files under docs/<same path>.md (mirrors src tree)
|
|
# - One JSON index at docs/index.json
|
|
|
|
from __future__ import annotations
|
|
import argparse
|
|
import json
|
|
import re
|
|
from dataclasses import dataclass, asdict
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple, Dict
|
|
|
|
# -------- Repo roots --------
|
|
def _detect_repo_root() -> Path:
|
|
p = Path(__file__).resolve()
|
|
for anc in [p.parent, *p.parents]:
|
|
if (anc / "src").is_dir():
|
|
return anc
|
|
return p.parent
|
|
|
|
REPO_ROOT = _detect_repo_root() # .../dofs
|
|
SRC_ROOT = REPO_ROOT / "src"
|
|
OUT_DIR_DEFAULT = REPO_ROOT / "docs" # mirror into docs/
|
|
|
|
# -------- IO helpers --------
|
|
def read_text(p: Path) -> str:
|
|
return p.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
def iter_headers(root: Path) -> List[Path]:
|
|
return sorted(root.rglob("*.h"))
|
|
|
|
def strip_comments_and_literals(code: str) -> str:
|
|
string_re = r'("([^"\\]|\\.)*")|(\'([^\'\\]|\\.)*\')'
|
|
slc_re = r'//[^\n]*'
|
|
mlc_re = r'/\*.*?\*/'
|
|
def _keep_nls(m): # keep line count stable
|
|
return re.sub(r'[^\n]', ' ', m.group(0))
|
|
code = re.sub(mlc_re, _keep_nls, code, flags=re.S)
|
|
code = re.sub(string_re, _keep_nls, code, flags=re.S)
|
|
code = re.sub(slc_re, _keep_nls, code)
|
|
return code
|
|
|
|
# -------- Data model --------
|
|
@dataclass
|
|
class Symbol:
|
|
kind: str # "free_function" | "method" | "ctor" | "dtor" | "conversion" | "macro"
|
|
qualified: str
|
|
signature: str
|
|
file: str # e.g., "src/core/simulator.h"
|
|
line: int
|
|
static: bool = False
|
|
const: bool = False
|
|
ref_qual: str = ""
|
|
template_params: str = ""
|
|
|
|
# -------- Parser (same core as before; trimmed comments) --------
|
|
class Parser:
|
|
def __init__(self, text: str, relpath: str):
|
|
self.text = text; self.relpath = relpath
|
|
self.i = 0; self.n = len(text); self.line = 1
|
|
self.ns_stack: List[str] = []
|
|
self.class_stack: List[dict] = []
|
|
self.depth_brace = 0
|
|
self.pending_template: Optional[str] = None
|
|
self.syms: List[Symbol] = []
|
|
|
|
# simple guard for bogus names when we fall into bodies
|
|
self._kw_block = {
|
|
"if", "for", "while", "switch", "return", "case", "default",
|
|
"do", "else", "break", "continue", "goto", "try", "catch"
|
|
}
|
|
|
|
def peek(self, k=0): j=self.i+k; return self.text[j] if 0<=j<self.n else ""
|
|
def advance(self, k=1):
|
|
for _ in range(k):
|
|
if self.i>=self.n: return
|
|
ch=self.text[self.i]; self.i+=1
|
|
if ch=="\n": self.line+=1
|
|
def skip_ws(self):
|
|
while self.i<self.n and self.text[self.i].isspace(): self.advance(1)
|
|
|
|
def run(self):
|
|
while self.i < self.n:
|
|
self.skip_ws()
|
|
if self.i >= self.n: break
|
|
if self.text.startswith("namespace", self.i): self._parse_namespace(); continue
|
|
if self.text.startswith("class ", self.i) or self.text.startswith("struct ", self.i): self._parse_record(); continue
|
|
if self.text.startswith("template", self.i):
|
|
self.pending_template = self._parse_template_intro(); continue
|
|
if self.text.startswith("public:", self.i): self._set_access("public"); self.advance(len("public:")); continue
|
|
if self.text.startswith("private:", self.i): self._set_access("private"); self.advance(len("private:")); continue
|
|
if self.text.startswith("protected:", self.i): self._set_access("protected"); self.advance(len("protected:")); continue
|
|
ch=self.peek()
|
|
if ch=="{": self.depth_brace+=1; self.advance(1); continue
|
|
if ch=="}": self.depth_brace-=1; self.advance(1); self._maybe_pop(); continue
|
|
self._maybe_decl_or_def()
|
|
return self.syms
|
|
|
|
def _skip_balanced_block(self):
|
|
"""
|
|
Consume a balanced {...} block starting at the current position
|
|
(which must be at '{'). This does NOT touch self.depth_brace /
|
|
class_stack, so it won't confuse outer block tracking.
|
|
"""
|
|
if self.peek() != "{":
|
|
return
|
|
depth = 0
|
|
# consume the first '{'
|
|
self.advance(1)
|
|
depth += 1
|
|
while self.i < self.n and depth > 0:
|
|
ch = self.peek()
|
|
if ch == "{":
|
|
depth += 1
|
|
elif ch == "}":
|
|
depth -= 1
|
|
self.advance(1)
|
|
|
|
# --- blocks ---
|
|
def _parse_namespace(self):
|
|
self.advance(len("namespace")); self.skip_ws()
|
|
if self.text.startswith("inline", self.i):
|
|
self.advance(len("inline")); self.skip_ws()
|
|
m = re.match(r'([A-Za-z_]\w*(::[A-Za-z_]\w*)*)?', self.text[self.i:])
|
|
name = "";
|
|
if m: name = m.group(0) or ""; self.advance(len(name))
|
|
self.skip_ws()
|
|
if self.peek() == "{":
|
|
self.advance(1); self.depth_brace += 1
|
|
self.ns_stack.append(name if name else "")
|
|
def _parse_record(self):
|
|
kw = "class" if self.text.startswith("class ", self.i) else "struct"
|
|
self.advance(len(kw)); self.skip_ws()
|
|
name = self._read_word()
|
|
if not name: return
|
|
while self.i<self.n and self.peek() not in "{;":
|
|
if self.peek()=="<": self._read_balanced("<", ">")
|
|
else: self.advance(1)
|
|
if self.peek()=="{":
|
|
self.advance(1); self.depth_brace += 1
|
|
self.class_stack.append({"name": name, "access": "public" if kw=="struct" else "private", "brace_depth": self.depth_brace})
|
|
else:
|
|
self.advance(1) # forward decl
|
|
|
|
def _parse_template_intro(self) -> str:
|
|
self.advance(len("template")); self.skip_ws()
|
|
params = self._read_balanced("<", ">") if self.peek()=="<" else ""
|
|
return f"template{params}"
|
|
|
|
def _set_access(self, acc: str):
|
|
if self.class_stack: self.class_stack[-1]["access"]=acc
|
|
|
|
def _maybe_pop(self):
|
|
if self.class_stack and self.class_stack[-1]["brace_depth"] == self.depth_brace + 1:
|
|
self.class_stack.pop(); return
|
|
if self.ns_stack: self.ns_stack.pop()
|
|
|
|
# --- helpers ---
|
|
def _read_word(self) -> str:
|
|
self.skip_ws()
|
|
m = re.match(r'[A-Za-z_]\w*', self.text[self.i:])
|
|
if not m: return ""
|
|
w = m.group(0); self.advance(len(w)); return w
|
|
def _read_balanced(self, o: str, c: str) -> str:
|
|
depth=1; out=o; self.advance(1)
|
|
while self.i<self.n and depth>0:
|
|
ch=self.peek(); out+=ch; self.advance(1)
|
|
if ch==o: depth+=1
|
|
elif ch==c: depth-=1
|
|
return out
|
|
|
|
def _current_ns_is_dofs(self) -> bool:
|
|
if not self.ns_stack: return False
|
|
chain=[p for p in self.ns_stack if p]
|
|
return bool(chain) and chain[0]=="dofs"
|
|
|
|
def _read_one_head(self) -> Tuple[str, str]:
|
|
par=ang=sq=0; start=self.i
|
|
while self.i<self.n:
|
|
ch=self.peek()
|
|
if ch=="(": par+=1
|
|
elif ch==")": par=max(0,par-1)
|
|
elif ch=="<": ang+=1
|
|
elif ch==">": ang=max(0,ang-1)
|
|
elif ch=="[": sq+=1
|
|
elif ch=="]": sq=max(0,sq-1)
|
|
elif ch==";" and par==0 and ang==0 and sq==0:
|
|
end=self.i; self.advance(1)
|
|
return self.text[start:end].strip(), ";"
|
|
elif ch=="{" and par==0 and ang==0 and sq==0:
|
|
end=self.i
|
|
return self.text[start:end].strip(), "{"
|
|
self.advance(1)
|
|
return "", ""
|
|
|
|
def _skip_brace_block(self):
|
|
"""Assumes current char is '{'; skis balanced block."""
|
|
if self.peek() != "{":
|
|
return
|
|
brace = 0
|
|
while self.i < self.n:
|
|
ch = self.peek()
|
|
self.advance(1)
|
|
if ch == "{":
|
|
brace += 1
|
|
elif ch == "}":
|
|
brace -= 1
|
|
if brace == 0:
|
|
break
|
|
|
|
|
|
def _consume_until_sep(self):
|
|
par=ang=sq=0
|
|
while self.i<self.n:
|
|
ch=self.peek(); self.advance(1)
|
|
if ch=="(": par+=1
|
|
elif ch==")": par=max(0,par-1)
|
|
elif ch=="<": ang+=1
|
|
elif ch==">": ang=max(0,ang-1)
|
|
elif ch=="[": sq+=1
|
|
elif ch=="]": sq=max(0,sq-1)
|
|
elif ch==";" and par==0 and ang==0 and sq==0: return
|
|
elif ch=="{" and par==0 and ang==0 and sq==0:
|
|
brace=1
|
|
while self.i<self.n and brace>0:
|
|
c2=self.peek(); self.advance(1)
|
|
if c2=="{": brace+=1
|
|
elif c2=="}": brace-=1
|
|
return
|
|
|
|
def _maybe_decl_or_def(self):
|
|
start_line = self.line
|
|
# skip obvious non-function starts
|
|
for bs in ("using ", "typedef ", "enum ", "namespace ", "static_assert"):
|
|
if self.text.startswith(bs, self.i):
|
|
self._consume_until_sep(); return
|
|
if self.text.startswith("template ", self.i):
|
|
self.pending_template = self._parse_template_intro(); return
|
|
|
|
decl, endch = self._read_one_head()
|
|
if not decl.strip(): return
|
|
|
|
tparams = self.pending_template or ""
|
|
self.pending_template = None
|
|
|
|
if "friend" in decl: return
|
|
if "(" not in decl or ")" not in decl: return
|
|
|
|
recorded = False
|
|
# classify: method vs free fn (inside dofs)
|
|
in_class = bool(self.class_stack)
|
|
if in_class:
|
|
if self.class_stack[-1]["access"] != "public": return
|
|
self._record_method(decl, start_line, tparams)
|
|
recorded = True
|
|
else:
|
|
if self._current_ns_is_dofs():
|
|
self._record_free_function(decl, start_line, tparams)
|
|
recorded = True
|
|
|
|
# If we just read a function head with a body, skip the body **after** recording
|
|
if endch == "{":
|
|
self._skip_brace_block()
|
|
return
|
|
|
|
# If it wasn't recorded (e.g., not in dofs namespace for free function),
|
|
# just continue; declarations ending with ';' need no additional skipping.
|
|
if recorded:
|
|
return
|
|
else:
|
|
return
|
|
|
|
# --- symbol building ---
|
|
def _normalize(self, s: str) -> str:
|
|
return re.sub(r'\s+', ' ', s).strip()
|
|
|
|
def _name_from_decl(self, decl: str) -> str:
|
|
"""
|
|
Find the function/method name robustly:
|
|
- choose the '(' that starts the *parameter list* (angle-depth == 0)
|
|
- then take the identifier immediately to its left as the name
|
|
Avoids mistaking template args like 'std::function<void()>' for a function.
|
|
"""
|
|
# Strip trailing qualifiers after param list for stability
|
|
head = re.split(r'\b(noexcept|requires)\b', decl)[0]
|
|
|
|
# Scan to find the '(' that begins the parameter list at angle-depth 0
|
|
ang = 0
|
|
par_open_idx = -1
|
|
for idx, ch in enumerate(head):
|
|
if ch == '<':
|
|
ang += 1
|
|
elif ch == '>':
|
|
ang = max(0, ang - 1)
|
|
elif ch == '(' and ang == 0:
|
|
par_open_idx = idx
|
|
break
|
|
if par_open_idx == -1:
|
|
return ""
|
|
|
|
# Walk left from par_open_idx to find the start of the name token
|
|
j = par_open_idx - 1
|
|
# Skip whitespace
|
|
while j >= 0 and head[j].isspace():
|
|
j -= 1
|
|
# Collect identifier (and allow operator forms)
|
|
# First, try operator names
|
|
m_op = re.search(r'(operator\s*""\s*_[A-Za-z_]\w*|operator\s*[^\s(]+)\s*$', head[:par_open_idx])
|
|
if m_op:
|
|
name = m_op.group(1)
|
|
else:
|
|
# Regular identifier (possibly destructor)
|
|
m_id = re.search(r'(~?[A-Za-z_]\w*)\s*$', head[:par_open_idx])
|
|
name = m_id.group(1) if m_id else ""
|
|
|
|
if not name or name in self._kw_block:
|
|
return ""
|
|
return name
|
|
|
|
def _qualify(self, name: str) -> str:
|
|
ns = [p for p in self.ns_stack if p]
|
|
q = "::".join(ns) + "::" if ns else ""
|
|
if self.class_stack:
|
|
q += "::".join([c["name"] for c in self.class_stack]) + "::"
|
|
return (q + name) if q else name
|
|
|
|
def _kind_for_method(self, name: str, cls: str) -> str:
|
|
if name == cls: return "ctor"
|
|
if name == f"~{cls}": return "dtor"
|
|
if name.startswith("operator"):
|
|
if re.match(r'operator\s+[^(\s]+', name) and "<" not in name and name != "operator()":
|
|
return "conversion"
|
|
return "method"
|
|
return "method"
|
|
|
|
def _cvref_static(self, decl: str) -> Tuple[bool,bool,str]:
|
|
is_static = bool(re.search(r'(^|\s)static\s', decl))
|
|
r = decl.rfind(")")
|
|
tail = decl[r+1:] if r!=-1 else ""
|
|
is_const = bool(re.search(r'\bconst\b', tail))
|
|
refq = "&&" if "&&" in tail else ("&" if re.search(r'(^|\s)&(\s|$)', tail) else "")
|
|
return is_static, is_const, refq
|
|
|
|
def _record_method(self, decl: str, start_line: int, tparams: str):
|
|
cls = self.class_stack[-1]["name"]
|
|
name = self._name_from_decl(decl)
|
|
if not name: return
|
|
qualified = self._qualify(name)
|
|
is_static, is_const, refq = self._cvref_static(decl)
|
|
kind = self._kind_for_method(name, cls)
|
|
sig = self._normalize((tparams + " " + decl).strip() if tparams else decl)
|
|
self.syms.append(Symbol(kind=kind, qualified=qualified, signature=sig,
|
|
file=self.relpath, line=start_line,
|
|
static=is_static, const=is_const, ref_qual=refq,
|
|
template_params=tparams or ""))
|
|
|
|
def _record_free_function(self, decl: str, start_line: int, tparams: str):
|
|
name = self._name_from_decl(decl)
|
|
if not name: return
|
|
qualified = self._qualify(name)
|
|
sig = self._normalize((tparams + " " + decl).strip() if tparams else decl)
|
|
self.syms.append(Symbol(kind="free_function", qualified=qualified, signature=sig,
|
|
file=self.relpath, line=start_line,
|
|
template_params=tparams or ""))
|
|
|
|
# -------- Rendering --------
|
|
def to_json(symbols: List[Symbol]) -> str:
|
|
items = [asdict(s) for s in symbols]
|
|
items.sort(key=lambda s: (s["file"], s["line"], s["qualified"], s["signature"]))
|
|
return json.dumps({"version": 1, "symbols": items}, indent=2)
|
|
|
|
def _markdown_for_file(rel_repo_file: str, symbols: List[Symbol]) -> str:
|
|
"""
|
|
Build per-header Markdown for exactly the symbols whose s.file == rel_repo_file.
|
|
"""
|
|
title = rel_repo_file.replace("src/", "", 1)
|
|
lines = [f"# {title}\n"]
|
|
file_syms = [s for s in symbols if s.file == rel_repo_file]
|
|
if not file_syms:
|
|
lines.append("_No public API symbols found in this header._")
|
|
lines.append("")
|
|
return "\n".join(l.rstrip() for l in lines)
|
|
|
|
# Group macros last; keep deterministic order
|
|
def _order(s: Symbol):
|
|
k = {"macro": 2}.get(s.kind, 1)
|
|
return (k, s.qualified, s.signature)
|
|
|
|
for s in sorted(file_syms, key=_order):
|
|
tprefix = (s.template_params + " ") if s.template_params else ""
|
|
if s.kind == "macro":
|
|
# H2 with macro name, then macro head; no line numbers, no bullets
|
|
lines.append(f"## `{s.qualified}`")
|
|
lines.append(f"`{s.signature}`\n")
|
|
else:
|
|
# H2 with fully qualified name (namespace::[class::]func)
|
|
# Contract/signature on the next line
|
|
fqname = s.qualified
|
|
if tprefix:
|
|
lines.append(f"## `{fqname}`")
|
|
lines.append(f"`{tprefix.strip()} {s.signature}`\n".replace(" ", " ").strip())
|
|
else:
|
|
lines.append(f"## `{fqname}`")
|
|
lines.append(f"`{s.signature}`\n")
|
|
|
|
return "\n".join(l.rstrip() for l in lines)
|
|
|
|
# -------- Robust multi-line free-function extraction --------
|
|
# Matches things like:
|
|
# inline void foo(A a,
|
|
# B b = std::nullopt) noexcept;
|
|
# std::mutex &error_mutex() noexcept;
|
|
_FREE_FN_RE = re.compile(r"""
|
|
(?P<prefix> ^ | [;\}\n] ) # anchor
|
|
(?P<head>
|
|
(?:\s*(?:inline|constexpr|consteval|constinit|static|extern)\s+)* # storage/attrs
|
|
(?:[\w:\<\>\*\&\s]+\s+)? # return type (optional for constructors, but we only accept when present)
|
|
(?P<name>[A-Za-z_]\w*)\s* # function name
|
|
\(
|
|
(?P<params>
|
|
[^()]* (?:\([^()]*\)[^()]*)* # balanced parens inside params
|
|
)
|
|
\)
|
|
(?:\s*noexcept(?:\s*\([^)]*\))?)? # optional noexcept/noexcept(expr)
|
|
(?:\s*->\s*[^;{\n]+)? # optional trailing return type
|
|
)
|
|
\s*
|
|
(?P<ender> [;{] ) # prototype or definition
|
|
""", re.VERBOSE | re.DOTALL | re.MULTILINE)
|
|
|
|
def _collapse_ws(s: str) -> str:
|
|
# Collapse all whitespace runs to a single space for clean signatures
|
|
return " ".join(s.split())
|
|
|
|
def extract_free_functions_multiline(clean_text: str, relpath: str) -> List[Symbol]:
|
|
"""
|
|
Walk the file tracking namespace blocks and pick out free-function
|
|
heads that can span multiple lines. Avoid class/struct/enum bodies.
|
|
"""
|
|
syms: List[Symbol] = []
|
|
ns_stack: List[str] = []
|
|
class_depth = 0 # crude guard: skip when inside class/struct/enum body
|
|
|
|
# Token-ish scan to maintain simple block context
|
|
i = 0
|
|
n = len(clean_text)
|
|
while i < n:
|
|
# namespace enter
|
|
if clean_text.startswith("namespace", i):
|
|
j = i + len("namespace")
|
|
while j < n and clean_text[j].isspace():
|
|
j += 1
|
|
# Parse namespace name (could be 'dofs' or anonymous)
|
|
k = j
|
|
while k < n and (clean_text[k].isalnum() or clean_text[k] in "_:"):
|
|
k += 1
|
|
ns_name = clean_text[j:k].strip()
|
|
# Find the next '{'
|
|
m = clean_text.find("{", k)
|
|
if m != -1:
|
|
if ns_name:
|
|
ns_stack.append(ns_name)
|
|
else:
|
|
ns_stack.append("") # anonymous
|
|
i = m + 1
|
|
continue
|
|
|
|
# class/struct/enum guard
|
|
if clean_text.startswith("class ", i) or clean_text.startswith("struct ", i) or clean_text.startswith("enum ", i):
|
|
# Enter body at next '{'
|
|
m = clean_text.find("{", i)
|
|
if m != -1:
|
|
class_depth += 1
|
|
i = m + 1
|
|
continue
|
|
|
|
if clean_text[i] == '}':
|
|
if class_depth > 0:
|
|
class_depth -= 1
|
|
elif ns_stack:
|
|
ns_stack.pop()
|
|
i += 1
|
|
continue
|
|
|
|
# Try a function head only if not inside a class-like body
|
|
if class_depth == 0:
|
|
m = _FREE_FN_RE.match(clean_text, i)
|
|
if m:
|
|
name = m.group("name")
|
|
head = m.group("head")
|
|
# filter obvious false positives: require a return type before name
|
|
# (very rough: there must be at least one space before name inside head)
|
|
if re.search(r"\S\s+" + re.escape(name) + r"\s*\(", head):
|
|
qualified = "::".join([ns for ns in ns_stack if ns]) # drop anonymous
|
|
qualified = f"{qualified}::{name}" if qualified else name
|
|
# Build a tidy signature
|
|
ender = m.group("ender")
|
|
signature = _collapse_ws(head) + ender
|
|
line = clean_text.count("\n", 0, m.start("head")) + 1
|
|
syms.append(Symbol(kind="free_function",
|
|
qualified=qualified,
|
|
signature=signature,
|
|
file=relpath,
|
|
line=line))
|
|
i = m.end()
|
|
continue
|
|
|
|
i += 1
|
|
return syms
|
|
|
|
# -------- Macro extraction (function-like only) --------
|
|
_MACRO_HEAD_RE = re.compile(r'^\s*#\s*define\s+([A-Za-z_]\w*)\s*\((.*)$')
|
|
|
|
def extract_function_like_macros(text: str, relpath: str) -> List[Symbol]:
|
|
"""
|
|
Capture lines of the form:
|
|
#define NAME(args) <body...>
|
|
with multi-line bodies using backslash continuations.
|
|
We record: kind="macro", qualified=NAME, signature="#define NAME(args)".
|
|
"""
|
|
syms: List[Symbol] = []
|
|
lines = text.splitlines()
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
m = _MACRO_HEAD_RE.match(line)
|
|
if not m:
|
|
i += 1
|
|
continue
|
|
name = m.group(1)
|
|
args_part = m.group(2) # may or may not contain closing ')'
|
|
start_line = i + 1
|
|
# Collect continuation lines while trailing backslash exists.
|
|
body_lines = [line]
|
|
i += 1
|
|
while i < len(lines) and body_lines[-1].rstrip().endswith("\\"):
|
|
body_lines.append(lines[i])
|
|
i += 1
|
|
# Reconstruct just the macro head (name + (...) args text).
|
|
head = "".join(body_lines)
|
|
# Try to extract the argument list reliably (balanced parens from first '(')
|
|
# without being confused by body parentheses.
|
|
head_from_paren = head[head.find("("):] if "(" in head else ""
|
|
# Minimal balanced scan to the first matching ')'
|
|
par = 0
|
|
arg_end = -1
|
|
for idx, ch in enumerate(head_from_paren):
|
|
if ch == "(":
|
|
par += 1
|
|
elif ch == ")":
|
|
par -= 1
|
|
if par == 0:
|
|
arg_end = idx
|
|
break
|
|
if arg_end != -1:
|
|
arg_text = head_from_paren[1:arg_end] # inside (...)
|
|
else:
|
|
# Fallback: whatever we saw on the first line
|
|
arg_text = args_part.split(")")[0]
|
|
signature = f"#define {name}({arg_text.strip()})"
|
|
syms.append(Symbol(kind="macro",
|
|
qualified=name,
|
|
signature=signature,
|
|
file=relpath,
|
|
line=start_line))
|
|
return syms
|
|
|
|
# -------- Driver --------
|
|
def main():
|
|
ap = argparse.ArgumentParser(description="Extract DOFS public API (per-header docs).")
|
|
ap.add_argument("--src", default=str(SRC_ROOT), help="Source root (default: repo/src)")
|
|
ap.add_argument("--out-dir", default=str(OUT_DIR_DEFAULT), help="Docs root to mirror into (default: docs)")
|
|
ap.add_argument("--stdout", action="store_true", help="Print JSON to stdout instead of writing files")
|
|
args = ap.parse_args()
|
|
|
|
src_root = Path(args.src).resolve()
|
|
out_root = Path(args.out_dir).resolve()
|
|
|
|
all_symbols: List[Symbol] = []
|
|
header_paths = iter_headers(src_root)
|
|
|
|
for hp in header_paths:
|
|
rel_repo = hp.relative_to(REPO_ROOT).as_posix() # e.g., src/core/simulator.h
|
|
raw = read_text(hp)
|
|
clean = strip_comments_and_literals(raw)
|
|
p = Parser(clean, rel_repo)
|
|
# C++ functions/methods (public) inside namespace dofs
|
|
parsed = p.run()
|
|
all_symbols.extend(parsed)
|
|
# Multi-line free functions (e.g., log_error in error.h)
|
|
extra_fns = extract_free_functions_multiline(clean, rel_repo)
|
|
# De-duplicate by (kind, qualified, signature, file, line)
|
|
seen = { (s.kind, s.qualified, s.signature, s.file, s.line) for s in all_symbols }
|
|
for s in extra_fns:
|
|
key = (s.kind, s.qualified, s.signature, s.file, s.line)
|
|
if key not in seen:
|
|
all_symbols.append(s)
|
|
seen.add(key)
|
|
# Function-like macros (global, regardless of namespace)
|
|
all_symbols.extend(extract_function_like_macros(raw, rel_repo))
|
|
|
|
if args.stdout:
|
|
print(to_json(all_symbols))
|
|
return
|
|
|
|
# Write index.json under docs/
|
|
out_root.mkdir(parents=True, exist_ok=True)
|
|
(out_root / "index.json").write_text(to_json(all_symbols), encoding="utf-8")
|
|
|
|
# Emit one markdown per header, mirroring src/ -> docs/
|
|
# src/<subpath>.h => docs/<subpath>.md
|
|
for hp in header_paths:
|
|
rel_from_repo = hp.relative_to(REPO_ROOT).as_posix() # src/...
|
|
rel_from_src = hp.relative_to(src_root).with_suffix(".md") # core/simulator.md
|
|
target_path = out_root / rel_from_src
|
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
md = _markdown_for_file(rel_from_repo, all_symbols)
|
|
target_path.write_text(md, encoding="utf-8")
|
|
|
|
print(f"[extract_api] Wrote JSON index: {out_root/'index.json'}")
|
|
print(f"[extract_api] Wrote per-header Markdown under: {out_root}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|