fixed bug in attaching nics to hosts, changed documentation format and generator (buggy but usable)
This commit is contained in:
@@ -1,255 +1,626 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
extract_api.py
|
||||
Scan C++ headers in ./src for public interfaces (public class/struct methods
|
||||
and free function declarations) and emit markdown snippets mirroring the src/
|
||||
tree into ./docs.
|
||||
|
||||
Each interface line is rendered as:
|
||||
### `signature`
|
||||
|
||||
Usage:
|
||||
cd dofs
|
||||
python3 tools/extract_api.py
|
||||
"""
|
||||
# dofs/tools/extract_api.py
|
||||
# Extract DOFS public API from headers and emit:
|
||||
# - Per-header Markdown files under docs/<same path>.md (mirrors src tree)
|
||||
# - One JSON index at docs/index.json
|
||||
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple, Dict
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = REPO_ROOT / "src"
|
||||
DOCS_DIR = REPO_ROOT / "docs"
|
||||
# -------- Repo roots --------
|
||||
def _detect_repo_root() -> Path:
|
||||
p = Path(__file__).resolve()
|
||||
for anc in [p.parent, *p.parents]:
|
||||
if (anc / "src").is_dir():
|
||||
return anc
|
||||
return p.parent
|
||||
|
||||
HEADER_EXTS = {".h", ".hh", ".hpp", ".hxx"}
|
||||
|
||||
# ---------- Utilities ----------
|
||||
REPO_ROOT = _detect_repo_root() # .../dofs
|
||||
SRC_ROOT = REPO_ROOT / "src"
|
||||
OUT_DIR_DEFAULT = REPO_ROOT / "docs" # mirror into docs/
|
||||
|
||||
# -------- IO helpers --------
|
||||
def read_text(p: Path) -> str:
|
||||
try:
|
||||
return p.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception as e:
|
||||
print(f"[WARN] failed reading {p}: {e}", file=sys.stderr)
|
||||
return ""
|
||||
return p.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
def strip_comments(code: str) -> str:
|
||||
# Remove /* ... */ (including multiline) and // ... to end of line
|
||||
no_block = re.sub(r"/\*.*?\*/", "", code, flags=re.S)
|
||||
no_line = re.sub(r"//.*?$", "", no_block, flags=re.M)
|
||||
return no_line
|
||||
def iter_headers(root: Path) -> List[Path]:
|
||||
return sorted(root.rglob("*.h"))
|
||||
|
||||
def collapse_ws(s: str) -> str:
|
||||
return re.sub(r"\s+", " ", s).strip()
|
||||
def strip_comments_and_literals(code: str) -> str:
|
||||
string_re = r'("([^"\\]|\\.)*")|(\'([^\'\\]|\\.)*\')'
|
||||
slc_re = r'//[^\n]*'
|
||||
mlc_re = r'/\*.*?\*/'
|
||||
def _keep_nls(m): # keep line count stable
|
||||
return re.sub(r'[^\n]', ' ', m.group(0))
|
||||
code = re.sub(mlc_re, _keep_nls, code, flags=re.S)
|
||||
code = re.sub(string_re, _keep_nls, code, flags=re.S)
|
||||
code = re.sub(slc_re, _keep_nls, code)
|
||||
return code
|
||||
|
||||
def ensure_dir(path: Path):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# -------- Data model --------
|
||||
@dataclass
|
||||
class Symbol:
|
||||
kind: str # "free_function" | "method" | "ctor" | "dtor" | "conversion" | "macro"
|
||||
qualified: str
|
||||
signature: str
|
||||
file: str # e.g., "src/core/simulator.h"
|
||||
line: int
|
||||
static: bool = False
|
||||
const: bool = False
|
||||
ref_qual: str = ""
|
||||
template_params: str = ""
|
||||
|
||||
# ---------- Heuristic extractors ----------
|
||||
# -------- Parser (same core as before; trimmed comments) --------
|
||||
class Parser:
|
||||
def __init__(self, text: str, relpath: str):
|
||||
self.text = text; self.relpath = relpath
|
||||
self.i = 0; self.n = len(text); self.line = 1
|
||||
self.ns_stack: List[str] = []
|
||||
self.class_stack: List[dict] = []
|
||||
self.depth_brace = 0
|
||||
self.pending_template: Optional[str] = None
|
||||
self.syms: List[Symbol] = []
|
||||
|
||||
ACCESS_RE = re.compile(r"^\s*(public|private|protected)\s*:\s*$")
|
||||
CLASS_START_RE = re.compile(r"^\s*(class|struct)\s+([A-Za-z_]\w*)\b")
|
||||
POSSIBLE_FUNC_DECL_RE = re.compile(
|
||||
r"""^[^;{}()]*\b
|
||||
(?!typedef\b)(?!using\b)(?!friend\b)
|
||||
[A-Za-z_~]\w*\s*
|
||||
\(
|
||||
[^;]* # params
|
||||
\)
|
||||
(?:\s*(?:const|noexcept|override|final|=0|=\s*default|=\s*delete))*\s*
|
||||
;
|
||||
\s*$""",
|
||||
re.X,
|
||||
)
|
||||
POSSIBLE_INLINE_DEF_RE = re.compile(
|
||||
r"""^[^;{}()]*\b
|
||||
(?!typedef\b)(?!using\b)(?!friend\b)
|
||||
[A-Za-z_~]\w*\s*
|
||||
\(
|
||||
[^{;]* # params
|
||||
\)
|
||||
(?:\s*(?:const|noexcept|override|final))*\s*
|
||||
\{""",
|
||||
re.X,
|
||||
)
|
||||
SKIP_PREFIXES = ("#define", "#include", "static_assert", "enum ", "enum class ",
|
||||
"template<", "namespace ", "using ", "typedef ", "friend ",
|
||||
"struct ", "class ")
|
||||
# simple guard for bogus names when we fall into bodies
|
||||
self._kw_block = {
|
||||
"if", "for", "while", "switch", "return", "case", "default",
|
||||
"do", "else", "break", "continue", "goto", "try", "catch"
|
||||
}
|
||||
|
||||
def extract_public_methods(lines, is_struct_default_public: bool):
|
||||
public = is_struct_default_public
|
||||
out = []
|
||||
depth = 0
|
||||
for raw in lines:
|
||||
line = raw.strip()
|
||||
if not line:
|
||||
continue
|
||||
def peek(self, k=0): j=self.i+k; return self.text[j] if 0<=j<self.n else ""
|
||||
def advance(self, k=1):
|
||||
for _ in range(k):
|
||||
if self.i>=self.n: return
|
||||
ch=self.text[self.i]; self.i+=1
|
||||
if ch=="\n": self.line+=1
|
||||
def skip_ws(self):
|
||||
while self.i<self.n and self.text[self.i].isspace(): self.advance(1)
|
||||
|
||||
# Track nested braces to avoid confusing nested scopes
|
||||
depth += raw.count("{")
|
||||
depth -= raw.count("}")
|
||||
def run(self):
|
||||
while self.i < self.n:
|
||||
self.skip_ws()
|
||||
if self.i >= self.n: break
|
||||
if self.text.startswith("namespace", self.i): self._parse_namespace(); continue
|
||||
if self.text.startswith("class ", self.i) or self.text.startswith("struct ", self.i): self._parse_record(); continue
|
||||
if self.text.startswith("template", self.i):
|
||||
self.pending_template = self._parse_template_intro(); continue
|
||||
if self.text.startswith("public:", self.i): self._set_access("public"); self.advance(len("public:")); continue
|
||||
if self.text.startswith("private:", self.i): self._set_access("private"); self.advance(len("private:")); continue
|
||||
if self.text.startswith("protected:", self.i): self._set_access("protected"); self.advance(len("protected:")); continue
|
||||
ch=self.peek()
|
||||
if ch=="{": self.depth_brace+=1; self.advance(1); continue
|
||||
if ch=="}": self.depth_brace-=1; self.advance(1); self._maybe_pop(); continue
|
||||
self._maybe_decl_or_def()
|
||||
return self.syms
|
||||
|
||||
m = ACCESS_RE.match(line)
|
||||
if m and depth >= 0:
|
||||
public = (m.group(1) == "public")
|
||||
continue
|
||||
def _skip_balanced_block(self):
|
||||
"""
|
||||
Consume a balanced {...} block starting at the current position
|
||||
(which must be at '{'). This does NOT touch self.depth_brace /
|
||||
class_stack, so it won't confuse outer block tracking.
|
||||
"""
|
||||
if self.peek() != "{":
|
||||
return
|
||||
depth = 0
|
||||
# consume the first '{'
|
||||
self.advance(1)
|
||||
depth += 1
|
||||
while self.i < self.n and depth > 0:
|
||||
ch = self.peek()
|
||||
if ch == "{":
|
||||
depth += 1
|
||||
elif ch == "}":
|
||||
depth -= 1
|
||||
self.advance(1)
|
||||
|
||||
if not public:
|
||||
continue
|
||||
|
||||
if line.startswith(SKIP_PREFIXES) or line.endswith(":"):
|
||||
continue
|
||||
|
||||
if POSSIBLE_FUNC_DECL_RE.match(line):
|
||||
out.append(collapse_ws(line))
|
||||
continue
|
||||
|
||||
if POSSIBLE_INLINE_DEF_RE.match(line):
|
||||
sig = line.split("{", 1)[0].rstrip()
|
||||
out.append(collapse_ws(sig) + " { ... }")
|
||||
continue
|
||||
|
||||
return out
|
||||
|
||||
def extract_free_function_decls(code: str):
|
||||
# Remove class/struct bodies to avoid capturing methods
|
||||
scrubbed = []
|
||||
toks = code.splitlines()
|
||||
in_class = False
|
||||
brace_balance = 0
|
||||
for line in toks:
|
||||
if not in_class:
|
||||
if CLASS_START_RE.match(line):
|
||||
in_class = True
|
||||
brace_balance = line.count("{") - line.count("}")
|
||||
scrubbed.append("")
|
||||
continue
|
||||
# --- blocks ---
|
||||
def _parse_namespace(self):
|
||||
self.advance(len("namespace")); self.skip_ws()
|
||||
if self.text.startswith("inline", self.i):
|
||||
self.advance(len("inline")); self.skip_ws()
|
||||
m = re.match(r'([A-Za-z_]\w*(::[A-Za-z_]\w*)*)?', self.text[self.i:])
|
||||
name = "";
|
||||
if m: name = m.group(0) or ""; self.advance(len(name))
|
||||
self.skip_ws()
|
||||
if self.peek() == "{":
|
||||
self.advance(1); self.depth_brace += 1
|
||||
self.ns_stack.append(name if name else "")
|
||||
def _parse_record(self):
|
||||
kw = "class" if self.text.startswith("class ", self.i) else "struct"
|
||||
self.advance(len(kw)); self.skip_ws()
|
||||
name = self._read_word()
|
||||
if not name: return
|
||||
while self.i<self.n and self.peek() not in "{;":
|
||||
if self.peek()=="<": self._read_balanced("<", ">")
|
||||
else: self.advance(1)
|
||||
if self.peek()=="{":
|
||||
self.advance(1); self.depth_brace += 1
|
||||
self.class_stack.append({"name": name, "access": "public" if kw=="struct" else "private", "brace_depth": self.depth_brace})
|
||||
else:
|
||||
brace_balance += line.count("{") - line.count("}")
|
||||
if brace_balance <= 0:
|
||||
in_class = False
|
||||
scrubbed.append("")
|
||||
self.advance(1) # forward decl
|
||||
|
||||
def _parse_template_intro(self) -> str:
|
||||
self.advance(len("template")); self.skip_ws()
|
||||
params = self._read_balanced("<", ">") if self.peek()=="<" else ""
|
||||
return f"template{params}"
|
||||
|
||||
def _set_access(self, acc: str):
|
||||
if self.class_stack: self.class_stack[-1]["access"]=acc
|
||||
|
||||
def _maybe_pop(self):
|
||||
if self.class_stack and self.class_stack[-1]["brace_depth"] == self.depth_brace + 1:
|
||||
self.class_stack.pop(); return
|
||||
if self.ns_stack: self.ns_stack.pop()
|
||||
|
||||
# --- helpers ---
|
||||
def _read_word(self) -> str:
|
||||
self.skip_ws()
|
||||
m = re.match(r'[A-Za-z_]\w*', self.text[self.i:])
|
||||
if not m: return ""
|
||||
w = m.group(0); self.advance(len(w)); return w
|
||||
def _read_balanced(self, o: str, c: str) -> str:
|
||||
depth=1; out=o; self.advance(1)
|
||||
while self.i<self.n and depth>0:
|
||||
ch=self.peek(); out+=ch; self.advance(1)
|
||||
if ch==o: depth+=1
|
||||
elif ch==c: depth-=1
|
||||
return out
|
||||
|
||||
def _current_ns_is_dofs(self) -> bool:
|
||||
if not self.ns_stack: return False
|
||||
chain=[p for p in self.ns_stack if p]
|
||||
return bool(chain) and chain[0]=="dofs"
|
||||
|
||||
def _read_one_head(self) -> Tuple[str, str]:
|
||||
par=ang=sq=0; start=self.i
|
||||
while self.i<self.n:
|
||||
ch=self.peek()
|
||||
if ch=="(": par+=1
|
||||
elif ch==")": par=max(0,par-1)
|
||||
elif ch=="<": ang+=1
|
||||
elif ch==">": ang=max(0,ang-1)
|
||||
elif ch=="[": sq+=1
|
||||
elif ch=="]": sq=max(0,sq-1)
|
||||
elif ch==";" and par==0 and ang==0 and sq==0:
|
||||
end=self.i; self.advance(1)
|
||||
return self.text[start:end].strip(), ";"
|
||||
elif ch=="{" and par==0 and ang==0 and sq==0:
|
||||
end=self.i
|
||||
return self.text[start:end].strip(), "{"
|
||||
self.advance(1)
|
||||
return "", ""
|
||||
|
||||
def _skip_brace_block(self):
|
||||
"""Assumes current char is '{'; skis balanced block."""
|
||||
if self.peek() != "{":
|
||||
return
|
||||
brace = 0
|
||||
while self.i < self.n:
|
||||
ch = self.peek()
|
||||
self.advance(1)
|
||||
if ch == "{":
|
||||
brace += 1
|
||||
elif ch == "}":
|
||||
brace -= 1
|
||||
if brace == 0:
|
||||
break
|
||||
|
||||
|
||||
def _consume_until_sep(self):
|
||||
par=ang=sq=0
|
||||
while self.i<self.n:
|
||||
ch=self.peek(); self.advance(1)
|
||||
if ch=="(": par+=1
|
||||
elif ch==")": par=max(0,par-1)
|
||||
elif ch=="<": ang+=1
|
||||
elif ch==">": ang=max(0,ang-1)
|
||||
elif ch=="[": sq+=1
|
||||
elif ch=="]": sq=max(0,sq-1)
|
||||
elif ch==";" and par==0 and ang==0 and sq==0: return
|
||||
elif ch=="{" and par==0 and ang==0 and sq==0:
|
||||
brace=1
|
||||
while self.i<self.n and brace>0:
|
||||
c2=self.peek(); self.advance(1)
|
||||
if c2=="{": brace+=1
|
||||
elif c2=="}": brace-=1
|
||||
return
|
||||
|
||||
def _maybe_decl_or_def(self):
|
||||
start_line = self.line
|
||||
# skip obvious non-function starts
|
||||
for bs in ("using ", "typedef ", "enum ", "namespace ", "static_assert"):
|
||||
if self.text.startswith(bs, self.i):
|
||||
self._consume_until_sep(); return
|
||||
if self.text.startswith("template ", self.i):
|
||||
self.pending_template = self._parse_template_intro(); return
|
||||
|
||||
decl, endch = self._read_one_head()
|
||||
if not decl.strip(): return
|
||||
|
||||
tparams = self.pending_template or ""
|
||||
self.pending_template = None
|
||||
|
||||
if "friend" in decl: return
|
||||
if "(" not in decl or ")" not in decl: return
|
||||
|
||||
recorded = False
|
||||
# classify: method vs free fn (inside dofs)
|
||||
in_class = bool(self.class_stack)
|
||||
if in_class:
|
||||
if self.class_stack[-1]["access"] != "public": return
|
||||
self._record_method(decl, start_line, tparams)
|
||||
recorded = True
|
||||
else:
|
||||
if self._current_ns_is_dofs():
|
||||
self._record_free_function(decl, start_line, tparams)
|
||||
recorded = True
|
||||
|
||||
# If we just read a function head with a body, skip the body **after** recording
|
||||
if endch == "{":
|
||||
self._skip_brace_block()
|
||||
return
|
||||
|
||||
# If it wasn't recorded (e.g., not in dofs namespace for free function),
|
||||
# just continue; declarations ending with ';' need no additional skipping.
|
||||
if recorded:
|
||||
return
|
||||
else:
|
||||
return
|
||||
|
||||
# --- symbol building ---
|
||||
def _normalize(self, s: str) -> str:
|
||||
return re.sub(r'\s+', ' ', s).strip()
|
||||
|
||||
def _name_from_decl(self, decl: str) -> str:
|
||||
"""
|
||||
Find the function/method name robustly:
|
||||
- choose the '(' that starts the *parameter list* (angle-depth == 0)
|
||||
- then take the identifier immediately to its left as the name
|
||||
Avoids mistaking template args like 'std::function<void()>' for a function.
|
||||
"""
|
||||
# Strip trailing qualifiers after param list for stability
|
||||
head = re.split(r'\b(noexcept|requires)\b', decl)[0]
|
||||
|
||||
# Scan to find the '(' that begins the parameter list at angle-depth 0
|
||||
ang = 0
|
||||
par_open_idx = -1
|
||||
for idx, ch in enumerate(head):
|
||||
if ch == '<':
|
||||
ang += 1
|
||||
elif ch == '>':
|
||||
ang = max(0, ang - 1)
|
||||
elif ch == '(' and ang == 0:
|
||||
par_open_idx = idx
|
||||
break
|
||||
if par_open_idx == -1:
|
||||
return ""
|
||||
|
||||
# Walk left from par_open_idx to find the start of the name token
|
||||
j = par_open_idx - 1
|
||||
# Skip whitespace
|
||||
while j >= 0 and head[j].isspace():
|
||||
j -= 1
|
||||
# Collect identifier (and allow operator forms)
|
||||
# First, try operator names
|
||||
m_op = re.search(r'(operator\s*""\s*_[A-Za-z_]\w*|operator\s*[^\s(]+)\s*$', head[:par_open_idx])
|
||||
if m_op:
|
||||
name = m_op.group(1)
|
||||
else:
|
||||
# Regular identifier (possibly destructor)
|
||||
m_id = re.search(r'(~?[A-Za-z_]\w*)\s*$', head[:par_open_idx])
|
||||
name = m_id.group(1) if m_id else ""
|
||||
|
||||
if not name or name in self._kw_block:
|
||||
return ""
|
||||
return name
|
||||
|
||||
def _qualify(self, name: str) -> str:
|
||||
ns = [p for p in self.ns_stack if p]
|
||||
q = "::".join(ns) + "::" if ns else ""
|
||||
if self.class_stack:
|
||||
q += "::".join([c["name"] for c in self.class_stack]) + "::"
|
||||
return (q + name) if q else name
|
||||
|
||||
def _kind_for_method(self, name: str, cls: str) -> str:
|
||||
if name == cls: return "ctor"
|
||||
if name == f"~{cls}": return "dtor"
|
||||
if name.startswith("operator"):
|
||||
if re.match(r'operator\s+[^(\s]+', name) and "<" not in name and name != "operator()":
|
||||
return "conversion"
|
||||
return "method"
|
||||
return "method"
|
||||
|
||||
def _cvref_static(self, decl: str) -> Tuple[bool,bool,str]:
|
||||
is_static = bool(re.search(r'(^|\s)static\s', decl))
|
||||
r = decl.rfind(")")
|
||||
tail = decl[r+1:] if r!=-1 else ""
|
||||
is_const = bool(re.search(r'\bconst\b', tail))
|
||||
refq = "&&" if "&&" in tail else ("&" if re.search(r'(^|\s)&(\s|$)', tail) else "")
|
||||
return is_static, is_const, refq
|
||||
|
||||
def _record_method(self, decl: str, start_line: int, tparams: str):
|
||||
cls = self.class_stack[-1]["name"]
|
||||
name = self._name_from_decl(decl)
|
||||
if not name: return
|
||||
qualified = self._qualify(name)
|
||||
is_static, is_const, refq = self._cvref_static(decl)
|
||||
kind = self._kind_for_method(name, cls)
|
||||
sig = self._normalize((tparams + " " + decl).strip() if tparams else decl)
|
||||
self.syms.append(Symbol(kind=kind, qualified=qualified, signature=sig,
|
||||
file=self.relpath, line=start_line,
|
||||
static=is_static, const=is_const, ref_qual=refq,
|
||||
template_params=tparams or ""))
|
||||
|
||||
def _record_free_function(self, decl: str, start_line: int, tparams: str):
|
||||
name = self._name_from_decl(decl)
|
||||
if not name: return
|
||||
qualified = self._qualify(name)
|
||||
sig = self._normalize((tparams + " " + decl).strip() if tparams else decl)
|
||||
self.syms.append(Symbol(kind="free_function", qualified=qualified, signature=sig,
|
||||
file=self.relpath, line=start_line,
|
||||
template_params=tparams or ""))
|
||||
|
||||
# -------- Rendering --------
|
||||
def to_json(symbols: List[Symbol]) -> str:
|
||||
items = [asdict(s) for s in symbols]
|
||||
items.sort(key=lambda s: (s["file"], s["line"], s["qualified"], s["signature"]))
|
||||
return json.dumps({"version": 1, "symbols": items}, indent=2)
|
||||
|
||||
def _markdown_for_file(rel_repo_file: str, symbols: List[Symbol]) -> str:
|
||||
"""
|
||||
Build per-header Markdown for exactly the symbols whose s.file == rel_repo_file.
|
||||
"""
|
||||
title = rel_repo_file.replace("src/", "", 1)
|
||||
lines = [f"# {title}\n"]
|
||||
file_syms = [s for s in symbols if s.file == rel_repo_file]
|
||||
if not file_syms:
|
||||
lines.append("_No public API symbols found in this header._")
|
||||
lines.append("")
|
||||
return "\n".join(l.rstrip() for l in lines)
|
||||
|
||||
# Group macros last; keep deterministic order
|
||||
def _order(s: Symbol):
|
||||
k = {"macro": 2}.get(s.kind, 1)
|
||||
return (k, s.qualified, s.signature)
|
||||
|
||||
for s in sorted(file_syms, key=_order):
|
||||
tprefix = (s.template_params + " ") if s.template_params else ""
|
||||
if s.kind == "macro":
|
||||
# H2 with macro name, then macro head; no line numbers, no bullets
|
||||
lines.append(f"## `{s.qualified}`")
|
||||
lines.append(f"`{s.signature}`\n")
|
||||
else:
|
||||
# H2 with fully qualified name (namespace::[class::]func)
|
||||
# Contract/signature on the next line
|
||||
fqname = s.qualified
|
||||
if tprefix:
|
||||
lines.append(f"## `{fqname}`")
|
||||
lines.append(f"`{tprefix.strip()} {s.signature}`\n".replace(" ", " ").strip())
|
||||
else:
|
||||
lines.append(f"## `{fqname}`")
|
||||
lines.append(f"`{s.signature}`\n")
|
||||
|
||||
return "\n".join(l.rstrip() for l in lines)
|
||||
|
||||
# -------- Robust multi-line free-function extraction --------
|
||||
# Matches things like:
|
||||
# inline void foo(A a,
|
||||
# B b = std::nullopt) noexcept;
|
||||
# std::mutex &error_mutex() noexcept;
|
||||
_FREE_FN_RE = re.compile(r"""
|
||||
(?P<prefix> ^ | [;\}\n] ) # anchor
|
||||
(?P<head>
|
||||
(?:\s*(?:inline|constexpr|consteval|constinit|static|extern)\s+)* # storage/attrs
|
||||
(?:[\w:\<\>\*\&\s]+\s+)? # return type (optional for constructors, but we only accept when present)
|
||||
(?P<name>[A-Za-z_]\w*)\s* # function name
|
||||
\(
|
||||
(?P<params>
|
||||
[^()]* (?:\([^()]*\)[^()]*)* # balanced parens inside params
|
||||
)
|
||||
\)
|
||||
(?:\s*noexcept(?:\s*\([^)]*\))?)? # optional noexcept/noexcept(expr)
|
||||
(?:\s*->\s*[^;{\n]+)? # optional trailing return type
|
||||
)
|
||||
\s*
|
||||
(?P<ender> [;{] ) # prototype or definition
|
||||
""", re.VERBOSE | re.DOTALL | re.MULTILINE)
|
||||
|
||||
def _collapse_ws(s: str) -> str:
|
||||
# Collapse all whitespace runs to a single space for clean signatures
|
||||
return " ".join(s.split())
|
||||
|
||||
def extract_free_functions_multiline(clean_text: str, relpath: str) -> List[Symbol]:
|
||||
"""
|
||||
Walk the file tracking namespace blocks and pick out free-function
|
||||
heads that can span multiple lines. Avoid class/struct/enum bodies.
|
||||
"""
|
||||
syms: List[Symbol] = []
|
||||
ns_stack: List[str] = []
|
||||
class_depth = 0 # crude guard: skip when inside class/struct/enum body
|
||||
|
||||
# Token-ish scan to maintain simple block context
|
||||
i = 0
|
||||
n = len(clean_text)
|
||||
while i < n:
|
||||
# namespace enter
|
||||
if clean_text.startswith("namespace", i):
|
||||
j = i + len("namespace")
|
||||
while j < n and clean_text[j].isspace():
|
||||
j += 1
|
||||
# Parse namespace name (could be 'dofs' or anonymous)
|
||||
k = j
|
||||
while k < n and (clean_text[k].isalnum() or clean_text[k] in "_:"):
|
||||
k += 1
|
||||
ns_name = clean_text[j:k].strip()
|
||||
# Find the next '{'
|
||||
m = clean_text.find("{", k)
|
||||
if m != -1:
|
||||
if ns_name:
|
||||
ns_stack.append(ns_name)
|
||||
else:
|
||||
ns_stack.append("") # anonymous
|
||||
i = m + 1
|
||||
continue
|
||||
|
||||
# class/struct/enum guard
|
||||
if clean_text.startswith("class ", i) or clean_text.startswith("struct ", i) or clean_text.startswith("enum ", i):
|
||||
# Enter body at next '{'
|
||||
m = clean_text.find("{", i)
|
||||
if m != -1:
|
||||
class_depth += 1
|
||||
i = m + 1
|
||||
continue
|
||||
|
||||
if clean_text[i] == '}':
|
||||
if class_depth > 0:
|
||||
class_depth -= 1
|
||||
elif ns_stack:
|
||||
ns_stack.pop()
|
||||
i += 1
|
||||
continue
|
||||
scrubbed.append(line)
|
||||
|
||||
text = "\n".join(scrubbed)
|
||||
# Try a function head only if not inside a class-like body
|
||||
if class_depth == 0:
|
||||
m = _FREE_FN_RE.match(clean_text, i)
|
||||
if m:
|
||||
name = m.group("name")
|
||||
head = m.group("head")
|
||||
# filter obvious false positives: require a return type before name
|
||||
# (very rough: there must be at least one space before name inside head)
|
||||
if re.search(r"\S\s+" + re.escape(name) + r"\s*\(", head):
|
||||
qualified = "::".join([ns for ns in ns_stack if ns]) # drop anonymous
|
||||
qualified = f"{qualified}::{name}" if qualified else name
|
||||
# Build a tidy signature
|
||||
ender = m.group("ender")
|
||||
signature = _collapse_ws(head) + ender
|
||||
line = clean_text.count("\n", 0, m.start("head")) + 1
|
||||
syms.append(Symbol(kind="free_function",
|
||||
qualified=qualified,
|
||||
signature=signature,
|
||||
file=relpath,
|
||||
line=line))
|
||||
i = m.end()
|
||||
continue
|
||||
|
||||
out = []
|
||||
for raw in text.splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith(SKIP_PREFIXES):
|
||||
continue
|
||||
if POSSIBLE_FUNC_DECL_RE.match(line):
|
||||
out.append(collapse_ws(line))
|
||||
elif POSSIBLE_INLINE_DEF_RE.match(line):
|
||||
sig = line.split("{", 1)[0].rstrip()
|
||||
out.append(collapse_ws(sig) + " { ... }")
|
||||
return out
|
||||
i += 1
|
||||
return syms
|
||||
|
||||
def split_top_level_classes(code: str):
|
||||
lines = code.splitlines()
|
||||
results = []
|
||||
# -------- Macro extraction (function-like only) --------
|
||||
_MACRO_HEAD_RE = re.compile(r'^\s*#\s*define\s+([A-Za-z_]\w*)\s*\((.*)$')
|
||||
|
||||
def extract_function_like_macros(text: str, relpath: str) -> List[Symbol]:
|
||||
"""
|
||||
Capture lines of the form:
|
||||
#define NAME(args) <body...>
|
||||
with multi-line bodies using backslash continuations.
|
||||
We record: kind="macro", qualified=NAME, signature="#define NAME(args)".
|
||||
"""
|
||||
syms: List[Symbol] = []
|
||||
lines = text.splitlines()
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
m = CLASS_START_RE.match(lines[i])
|
||||
line = lines[i]
|
||||
m = _MACRO_HEAD_RE.match(line)
|
||||
if not m:
|
||||
i += 1
|
||||
continue
|
||||
kind, name = m.group(1), m.group(2)
|
||||
# Find opening brace on same or subsequent lines
|
||||
j = i
|
||||
if "{" not in lines[j]:
|
||||
j += 1
|
||||
while j < len(lines) and "{" not in lines[j]:
|
||||
j += 1
|
||||
if j >= len(lines):
|
||||
i += 1
|
||||
continue
|
||||
# Capture until matching close
|
||||
depth = 0
|
||||
body = []
|
||||
while j < len(lines):
|
||||
depth += lines[j].count("{")
|
||||
depth -= lines[j].count("}")
|
||||
body.append(lines[j])
|
||||
if depth <= 0 and "}" in lines[j]:
|
||||
break
|
||||
j += 1
|
||||
body_inner = body[1:-1] if body else []
|
||||
results.append((name, kind == "struct", body_inner))
|
||||
i = j + 1
|
||||
return results
|
||||
|
||||
# ---------- Main per-file processing ----------
|
||||
|
||||
def process_header(path: Path):
|
||||
raw = read_text(path)
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
code = strip_comments(raw)
|
||||
|
||||
# Collect classes
|
||||
class_entries = []
|
||||
for cname, is_struct, body in split_top_level_classes(code):
|
||||
methods = extract_public_methods(body, is_struct_default_public=is_struct)
|
||||
if methods:
|
||||
class_entries.append((cname, methods))
|
||||
|
||||
# Collect free function decls
|
||||
free_funcs = extract_free_function_decls(code)
|
||||
|
||||
if not class_entries and not free_funcs:
|
||||
return None
|
||||
|
||||
# Build markdown with ### `signature` items
|
||||
rel = path.relative_to(SRC_DIR)
|
||||
md_lines = []
|
||||
md_lines.append(f"# {rel.as_posix()}")
|
||||
md_lines.append("")
|
||||
|
||||
if free_funcs:
|
||||
md_lines.append("## Free functions")
|
||||
md_lines.append("")
|
||||
for sig in free_funcs:
|
||||
md_lines.append(f"### `{sig}`")
|
||||
md_lines.append("")
|
||||
|
||||
for cname, methods in class_entries:
|
||||
md_lines.append(f"## class {cname} — public interface")
|
||||
md_lines.append("")
|
||||
for sig in methods:
|
||||
md_lines.append(f"### `{sig}`")
|
||||
md_lines.append("")
|
||||
|
||||
return "\n".join(md_lines)
|
||||
|
||||
def write_markdown(src_header: Path, content: str):
|
||||
rel = src_header.relative_to(SRC_DIR)
|
||||
out_path = DOCS_DIR / rel
|
||||
out_path = out_path.with_suffix(".md")
|
||||
ensure_dir(out_path)
|
||||
out_path.write_text(content, encoding="utf-8")
|
||||
return out_path
|
||||
name = m.group(1)
|
||||
args_part = m.group(2) # may or may not contain closing ')'
|
||||
start_line = i + 1
|
||||
# Collect continuation lines while trailing backslash exists.
|
||||
body_lines = [line]
|
||||
i += 1
|
||||
while i < len(lines) and body_lines[-1].rstrip().endswith("\\"):
|
||||
body_lines.append(lines[i])
|
||||
i += 1
|
||||
# Reconstruct just the macro head (name + (...) args text).
|
||||
head = "".join(body_lines)
|
||||
# Try to extract the argument list reliably (balanced parens from first '(')
|
||||
# without being confused by body parentheses.
|
||||
head_from_paren = head[head.find("("):] if "(" in head else ""
|
||||
# Minimal balanced scan to the first matching ')'
|
||||
par = 0
|
||||
arg_end = -1
|
||||
for idx, ch in enumerate(head_from_paren):
|
||||
if ch == "(":
|
||||
par += 1
|
||||
elif ch == ")":
|
||||
par -= 1
|
||||
if par == 0:
|
||||
arg_end = idx
|
||||
break
|
||||
if arg_end != -1:
|
||||
arg_text = head_from_paren[1:arg_end] # inside (...)
|
||||
else:
|
||||
# Fallback: whatever we saw on the first line
|
||||
arg_text = args_part.split(")")[0]
|
||||
signature = f"#define {name}({arg_text.strip()})"
|
||||
syms.append(Symbol(kind="macro",
|
||||
qualified=name,
|
||||
signature=signature,
|
||||
file=relpath,
|
||||
line=start_line))
|
||||
return syms
|
||||
|
||||
# -------- Driver --------
|
||||
def main():
|
||||
if not SRC_DIR.exists():
|
||||
print(f"[ERR] src/ not found at {SRC_DIR}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
ap = argparse.ArgumentParser(description="Extract DOFS public API (per-header docs).")
|
||||
ap.add_argument("--src", default=str(SRC_ROOT), help="Source root (default: repo/src)")
|
||||
ap.add_argument("--out-dir", default=str(OUT_DIR_DEFAULT), help="Docs root to mirror into (default: docs)")
|
||||
ap.add_argument("--stdout", action="store_true", help="Print JSON to stdout instead of writing files")
|
||||
args = ap.parse_args()
|
||||
|
||||
generated = 0
|
||||
for path in SRC_DIR.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if path.suffix.lower() not in HEADER_EXTS:
|
||||
continue
|
||||
result = process_header(path)
|
||||
if result:
|
||||
out = write_markdown(path, result)
|
||||
generated += 1
|
||||
print(f"[OK] {out.relative_to(REPO_ROOT)}")
|
||||
src_root = Path(args.src).resolve()
|
||||
out_root = Path(args.out_dir).resolve()
|
||||
|
||||
if generated == 0:
|
||||
print("[INFO] no public interfaces detected (heuristics may have filtered everything)]")
|
||||
all_symbols: List[Symbol] = []
|
||||
header_paths = iter_headers(src_root)
|
||||
|
||||
for hp in header_paths:
|
||||
rel_repo = hp.relative_to(REPO_ROOT).as_posix() # e.g., src/core/simulator.h
|
||||
raw = read_text(hp)
|
||||
clean = strip_comments_and_literals(raw)
|
||||
p = Parser(clean, rel_repo)
|
||||
# C++ functions/methods (public) inside namespace dofs
|
||||
parsed = p.run()
|
||||
all_symbols.extend(parsed)
|
||||
# Multi-line free functions (e.g., log_error in error.h)
|
||||
extra_fns = extract_free_functions_multiline(clean, rel_repo)
|
||||
# De-duplicate by (kind, qualified, signature, file, line)
|
||||
seen = { (s.kind, s.qualified, s.signature, s.file, s.line) for s in all_symbols }
|
||||
for s in extra_fns:
|
||||
key = (s.kind, s.qualified, s.signature, s.file, s.line)
|
||||
if key not in seen:
|
||||
all_symbols.append(s)
|
||||
seen.add(key)
|
||||
# Function-like macros (global, regardless of namespace)
|
||||
all_symbols.extend(extract_function_like_macros(raw, rel_repo))
|
||||
|
||||
if args.stdout:
|
||||
print(to_json(all_symbols))
|
||||
return
|
||||
|
||||
# Write index.json under docs/
|
||||
out_root.mkdir(parents=True, exist_ok=True)
|
||||
(out_root / "index.json").write_text(to_json(all_symbols), encoding="utf-8")
|
||||
|
||||
# Emit one markdown per header, mirroring src/ -> docs/
|
||||
# src/<subpath>.h => docs/<subpath>.md
|
||||
for hp in header_paths:
|
||||
rel_from_repo = hp.relative_to(REPO_ROOT).as_posix() # src/...
|
||||
rel_from_src = hp.relative_to(src_root).with_suffix(".md") # core/simulator.md
|
||||
target_path = out_root / rel_from_src
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
md = _markdown_for_file(rel_from_repo, all_symbols)
|
||||
target_path.write_text(md, encoding="utf-8")
|
||||
|
||||
print(f"[extract_api] Wrote JSON index: {out_root/'index.json'}")
|
||||
print(f"[extract_api] Wrote per-header Markdown under: {out_root}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user