wp-materialize/src/markdown_utils.py

from __future__ import annotations

import re

import markdown as md_lib
import subprocess

from .errors import ValidationIssue


_HEADING_RE = re.compile(r"^(#{1,6})(\s+.*)$")


def extract_title(markdown_text: str, level: int, strict: bool, context: str, issues: list[ValidationIssue]) -> tuple[str, str] | None:
    pattern = re.compile(rf"^{'#' * level}\s+(.*)$", re.MULTILINE)
    matches = list(pattern.finditer(markdown_text))
    if strict and len(matches) != 1:
        issues.append(
            ValidationIssue(
                f"Expected exactly one level-{level} heading, found {len(matches)}",
                context=context,
            )
        )
        return None
    if not matches:
        issues.append(ValidationIssue(f"Missing level-{level} heading", context=context))
        return None

    match = matches[0]
    title = match.group(1).strip()
    if not title:
        issues.append(ValidationIssue("Heading title cannot be empty", context=context))
        return None

    lines = markdown_text.splitlines()
    line_index = markdown_text[: match.start()].count("\n")
    lines.pop(line_index)
    body = "\n".join(lines)
    body = _promote_headings(body)
    return title, body


def _promote_headings(text: str) -> str:
    promoted_lines = []
    for line in text.splitlines():
        match = _HEADING_RE.match(line)
        if not match:
            promoted_lines.append(line)
            continue
        hashes, rest = match.groups()
        level = len(hashes)
        if level > 1:
            level -= 1
        promoted_lines.append("#" * level + rest)
    return "\n".join(promoted_lines)


def convert_markdown(
    markdown_text: str,
    context: str,
    issues: list[ValidationIssue],
    renderer: str = "default",
    hard_line_breaks: bool = False,
) -> str | None:
    if renderer == "default":
        try:
            extensions = ["extra"]
            if hard_line_breaks:
                extensions.append("nl2br")
            return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")
        except Exception as exc:  # pragma: no cover - depends on markdown internals
            issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context))
            return None
    if renderer == "py-gfm":
        try:
            import mdx_gfm
        except Exception as exc:  # pragma: no cover - dependency missing
            issues.append(ValidationIssue(f"py-gfm is not available: {exc}", context=context))
            return None
        extension_class = getattr(mdx_gfm, "GithubFlavoredMarkdownExtension", None)
        if extension_class is None:
            issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context))
            return None
        try:
            extensions = [extension_class()]
            if hard_line_breaks:
                extensions.append("nl2br")
            return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")
        except Exception as exc:  # pragma: no cover - depends on markdown internals
            issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context))
            return None
    if renderer == "pandoc":
        try:
            result = subprocess.run(
                ["pandoc", f"--from={'markdown+hard_line_breaks' if hard_line_breaks else 'markdown'}", "--to=html5"],
                input=markdown_text,
                text=True,
                capture_output=True,
                check=True,
            )
            return result.stdout
        except FileNotFoundError as exc:
            issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context))
            return None
        except subprocess.CalledProcessError as exc:
            stderr = exc.stderr.strip() if exc.stderr else ""
            issues.append(ValidationIssue(f"Pandoc conversion failed: {stderr}", context=context))
            return None
    issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context))
    return None