111 lines
4.0 KiB
Python
111 lines
4.0 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
import markdown as md_lib
|
|
import subprocess
|
|
|
|
from .errors import ValidationIssue
|
|
|
|
|
|
_HEADING_RE = re.compile(r"^(#{1,6})(\s+.*)$")
|
|
|
|
|
|
def extract_title(markdown_text: str, level: int, strict: bool, context: str, issues: list[ValidationIssue]) -> tuple[str, str] | None:
|
|
pattern = re.compile(rf"^{'#' * level}\s+(.*)$", re.MULTILINE)
|
|
matches = list(pattern.finditer(markdown_text))
|
|
if strict and len(matches) != 1:
|
|
issues.append(
|
|
ValidationIssue(
|
|
f"Expected exactly one level-{level} heading, found {len(matches)}",
|
|
context=context,
|
|
)
|
|
)
|
|
return None
|
|
if not matches:
|
|
issues.append(ValidationIssue(f"Missing level-{level} heading", context=context))
|
|
return None
|
|
|
|
match = matches[0]
|
|
title = match.group(1).strip()
|
|
if not title:
|
|
issues.append(ValidationIssue("Heading title cannot be empty", context=context))
|
|
return None
|
|
|
|
lines = markdown_text.splitlines()
|
|
line_index = markdown_text[: match.start()].count("\n")
|
|
lines.pop(line_index)
|
|
body = "\n".join(lines)
|
|
body = _promote_headings(body)
|
|
return title, body
|
|
|
|
|
|
def _promote_headings(text: str) -> str:
|
|
promoted_lines = []
|
|
for line in text.splitlines():
|
|
match = _HEADING_RE.match(line)
|
|
if not match:
|
|
promoted_lines.append(line)
|
|
continue
|
|
hashes, rest = match.groups()
|
|
level = len(hashes)
|
|
if level > 1:
|
|
level -= 1
|
|
promoted_lines.append("#" * level + rest)
|
|
return "\n".join(promoted_lines)
|
|
|
|
|
|
def convert_markdown(
|
|
markdown_text: str,
|
|
context: str,
|
|
issues: list[ValidationIssue],
|
|
renderer: str = "default",
|
|
hard_line_breaks: bool = False,
|
|
) -> str | None:
|
|
if renderer == "default":
|
|
try:
|
|
extensions = ["extra"]
|
|
if hard_line_breaks:
|
|
extensions.append("nl2br")
|
|
return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")
|
|
except Exception as exc: # pragma: no cover - depends on markdown internals
|
|
issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context))
|
|
return None
|
|
if renderer == "py-gfm":
|
|
try:
|
|
import mdx_gfm
|
|
except Exception as exc: # pragma: no cover - dependency missing
|
|
issues.append(ValidationIssue(f"py-gfm is not available: {exc}", context=context))
|
|
return None
|
|
extension_class = getattr(mdx_gfm, "GithubFlavoredMarkdownExtension", None)
|
|
if extension_class is None:
|
|
issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context))
|
|
return None
|
|
try:
|
|
extensions = [extension_class()]
|
|
if hard_line_breaks:
|
|
extensions.append("nl2br")
|
|
return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")
|
|
except Exception as exc: # pragma: no cover - depends on markdown internals
|
|
issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context))
|
|
return None
|
|
if renderer == "pandoc":
|
|
try:
|
|
result = subprocess.run(
|
|
["pandoc", f"--from={'markdown+hard_line_breaks' if hard_line_breaks else 'markdown'}", "--to=html5"],
|
|
input=markdown_text,
|
|
text=True,
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
return result.stdout
|
|
except FileNotFoundError as exc:
|
|
issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context))
|
|
return None
|
|
except subprocess.CalledProcessError as exc:
|
|
stderr = exc.stderr.strip() if exc.stderr else ""
|
|
issues.append(ValidationIssue(f"Pandoc conversion failed: {stderr}", context=context))
|
|
return None
|
|
issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context))
|
|
return None
|