from __future__ import annotations import re import markdown as md_lib import subprocess from .errors import ValidationIssue _HEADING_RE = re.compile(r"^(#{1,6})(\s+.*)$") def extract_title(markdown_text: str, level: int, strict: bool, context: str, issues: list[ValidationIssue]) -> tuple[str, str] | None: pattern = re.compile(rf"^{'#' * level}\s+(.*)$", re.MULTILINE) matches = list(pattern.finditer(markdown_text)) if strict and len(matches) != 1: issues.append( ValidationIssue( f"Expected exactly one level-{level} heading, found {len(matches)}", context=context, ) ) return None if not matches: issues.append(ValidationIssue(f"Missing level-{level} heading", context=context)) return None match = matches[0] title = match.group(1).strip() if not title: issues.append(ValidationIssue("Heading title cannot be empty", context=context)) return None lines = markdown_text.splitlines() line_index = markdown_text[: match.start()].count("\n") lines.pop(line_index) body = "\n".join(lines) body = _promote_headings(body) return title, body def _promote_headings(text: str) -> str: promoted_lines = [] for line in text.splitlines(): match = _HEADING_RE.match(line) if not match: promoted_lines.append(line) continue hashes, rest = match.groups() level = len(hashes) if level > 1: level -= 1 promoted_lines.append("#" * level + rest) return "\n".join(promoted_lines) def convert_markdown( markdown_text: str, context: str, issues: list[ValidationIssue], renderer: str = "default", hard_line_breaks: bool = False, ) -> str | None: if renderer == "default": try: extensions = ["extra"] if hard_line_breaks: extensions.append("nl2br") return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None if renderer == "py-gfm": try: import mdx_gfm except Exception as exc: # pragma: no cover - dependency missing issues.append(ValidationIssue(f"py-gfm is not available: {exc}", context=context)) return None extension_class = getattr(mdx_gfm, "GithubFlavoredMarkdownExtension", None) if extension_class is None: issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context)) return None try: extensions = [extension_class()] if hard_line_breaks: extensions.append("nl2br") return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None if renderer == "pandoc": try: result = subprocess.run( ["pandoc", f"--from={'markdown+hard_line_breaks' if hard_line_breaks else 'markdown'}", "--to=html5"], input=markdown_text, text=True, capture_output=True, check=True, ) return result.stdout except FileNotFoundError as exc: issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context)) return None except subprocess.CalledProcessError as exc: stderr = exc.stderr.strip() if exc.stderr else "" issues.append(ValidationIssue(f"Pandoc conversion failed: {stderr}", context=context)) return None issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context)) return None