From 6565a8546fe71e9e6561073563b3bebab7202bc9 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 05:30:55 -0500 Subject: [PATCH 01/12] added local export support and refined program logic --- README.md | 10 +++ configurations.md | 31 +++++++-- examples.md | 10 ++- src/apply.py | 14 +++- src/cli.py | 95 ++++++++++++++++++++++++--- src/evaluation.py | 110 +++++++++++++++++++++++++++---- src/git_utils.py | 15 +++++ src/local_export.py | 157 ++++++++++++++++++++++++++++++++++++++++++++ src/manifest.py | 28 +++++++- src/models.py | 14 +++- src/wp_cli.py | 36 ++++++++++ 11 files changed, 488 insertions(+), 32 deletions(-) create mode 100644 src/local_export.py diff --git a/README.md b/README.md index c260a37..c821142 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,16 @@ Skip git sync: wp-materialize apply --no-sync ``` +Local export (writes per-post directories with HTML, metadata, and WP command): + +```bash +wp-materialize local /path/to/output +``` + +Notes: +1. The local export assumes every post is new and generates create commands. +2. Categories must already exist in WordPress for exact commands. + ## Manifests Each managed directory must contain a `.wp-materialize.json` manifest. See `configurations.md` for the manifest guide. diff --git a/configurations.md b/configurations.md index 0e9775a..1645406 100644 --- a/configurations.md +++ b/configurations.md @@ -48,21 +48,28 @@ Top-level fields: Inherited category paths for this directory and its children. 2. `tags` (object, optional) Inherited tags for this directory and its children. -3. `subdirectories` (object, optional) +3. `author` (object, optional) + Inherited author for this directory and its children. Must resolve to a single author. +4. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -4. `files` (object, optional) +5. `files` (object, optional) Mapping of Markdown file names to file-level configuration. -`categories`, `tags`, and `subdirectories` objects: +`categories`, `tags`, `author`, and `subdirectories` objects: 1. `content` (array of strings, optional) List of values for the given field. For `categories`, each string is a hierarchical path such as `Systems/Infrastructure`. For `subdirectories`, each string is a directory name under the current directory. + For `author`, exactly one string must remain after inheritance is applied. 2. `inherit` (boolean, optional, default `true`) If `true`, append to the parent effective list. If `false`, replace the parent list entirely. +Note: Root directory manifests do not need to specify `inherit` for these top-level +fields (the default is `true`). File-level overrides inside `files` still support +inheritance via their own `inherit` fields. + `files` entries: Each key is a Markdown file name (relative to the manifest directory). @@ -73,10 +80,14 @@ Each value is an object with the following fields: 2. `use_heading_as_title` (object, optional) Extracts a heading from the Markdown as the title and removes that heading from the body while promoting remaining headings by one level. -3. `categories` (object, optional) +3. `created_on` (string, optional) + Manual override for the post creation time in `YYYY-MM-DD hh:mm` format. +4. `last_modified` (string, optional) + Manual override for the post modified time in `YYYY-MM-DD hh:mm` format. +5. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -4. `tags` (object, optional) +6. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. @@ -87,6 +98,11 @@ Each value is an object with the following fields: 2. `strict` (boolean, optional, default `true`) If `true`, exactly one matching heading must exist. +If `created_on` or `last_modified` is not provided, the system infers the value. +For `git_repositories` sources it uses git commit timestamps; for `directories` +sources it uses filesystem timestamps. The system does not auto-detect git for +entries declared under `directories`, even if the path is inside a git repo. + ## Post Identity Each post is identified with: @@ -97,3 +113,8 @@ _wp_materialize_source = : `source_name` is the `name` from the global config entry, and `relative_path` is relative to the repo or directory root used for identity resolution. + +## Tag and Category Creation + +Missing categories and tags are created automatically during apply, after a successful +dry-run evaluation and before any post updates. diff --git a/examples.md b/examples.md index 2fc4e74..63d37d9 100644 --- a/examples.md +++ b/examples.md @@ -10,6 +10,7 @@ Root directory manifest (`.wp-materialize.json`): { "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, "tags": { "content": ["automation", "wordpress"], "inherit": true }, + "author": { "content": ["editorial"], "inherit": true }, "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -18,7 +19,9 @@ Root directory manifest (`.wp-materialize.json`): "tags": { "content": ["extra"], "inherit": true } }, "essay.md": { - "use_heading_as_title": { "level": 1, "strict": true } + "use_heading_as_title": { "level": 1, "strict": true }, + "created_on": "2025-01-10 09:30", + "last_modified": "2025-02-14 16:45" } } } @@ -103,3 +106,8 @@ Subdirectory manifest (`design/.wp-materialize.json`): "directories": [] } ``` + +## Timestamp Behavior Example + +- `git_repositories` entries use git commit timestamps for `created_on`/`last_modified` inference. +- `directories` entries use filesystem timestamps even if the path is inside a git repo. diff --git a/src/apply.py b/src/apply.py index e1cc99b..cacd5e4 100644 --- a/src/apply.py +++ b/src/apply.py @@ -19,6 +19,7 @@ def apply_changes( category_map = _build_category_map(categories) _create_missing_categories(result, wp, category_map) + _create_missing_tags(result, wp) successes: Set[str] = set() try: @@ -48,7 +49,7 @@ def _create_missing_categories( wp: WordPressCLI, category_map: Dict[tuple[int, str], int], ) -> None: - paths = result.categories_to_create.missing_paths + paths = result.taxonomy_to_create.missing_categories paths = sorted(paths, key=len) seen: Set[tuple[str, ...]] = set() for segments in paths: @@ -67,6 +68,11 @@ def _create_missing_categories( parent = new_id +def _create_missing_tags(result: EvaluationResult, wp: WordPressCLI) -> None: + for tag in result.taxonomy_to_create.missing_tags: + wp.create_tag(tag) + + def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, str], int]) -> None: category_ids: List[int] = [] for path in post.categories: @@ -89,6 +95,9 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, categories=category_ids, tags=post.tags, source_identity=post.identity, + created_on=post.created_on, + last_modified=post.last_modified, + author=post.author, ) return @@ -98,4 +107,7 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, content=post.html, categories=category_ids, tags=post.tags, + created_on=post.created_on, + last_modified=post.last_modified, + author=post.author, ) diff --git a/src/cli.py b/src/cli.py index d64132f..2bed1a9 100644 --- a/src/cli.py +++ b/src/cli.py @@ -9,24 +9,81 @@ from .apply import apply_changes from .config import load_config from .errors import ConfigurationError, MaterializeError, ValidationError from .evaluation import evaluate +from .local_export import export_local from .state import load_state from .wp_cli import WordPressCLI def main() -> int: - parser = argparse.ArgumentParser(description="wp-materialize") - parser.add_argument("command", nargs="?", choices=["evaluate", "apply"], default="evaluate") - parser.add_argument("--config", type=Path, default=_default_config_path()) - parser.add_argument("--state", type=Path, default=_default_state_path()) - parser.add_argument("--no-sync", action="store_true", help="Skip git clone/pull") - parser.add_argument("--json", action="store_true", help="Output evaluation summary as JSON") + parser = argparse.ArgumentParser( + description="wp-materialize: compile Markdown manifests into WordPress posts", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog=("Command-specific help: wp-materialize --help"), + ) + common = argparse.ArgumentParser(add_help=False) + common.add_argument( + "--config", + type=Path, + default=_default_config_path(), + help="Path to the global config JSON file.", + ) + common.add_argument( + "--state", + type=Path, + default=_default_state_path(), + help="Path to the state JSON file used for incremental tracking.", + ) + common.add_argument( + "--no-sync", + action="store_true", + help="Skip git clone/pull for git_repositories entries.", + ) + common.add_argument( + "--force-new", + action="store_true", + help="Force all posts to be treated as new (ignore incremental timestamps).", + ) + common.add_argument( + "--json", + action="store_true", + help="Output evaluation summary as JSON.", + ) + + subparsers = parser.add_subparsers(dest="command", metavar="command") + + subparsers.add_parser( + "evaluate", + parents=[common], + help="Validate config/manifests and plan changes (no WP writes).", + description="Validate config/manifests, convert Markdown, and plan changes without writing to WordPress.", + ) + subparsers.add_parser( + "apply", + parents=[common], + help="Evaluate then create/update WordPress posts and taxonomy.", + description="Evaluate, then create categories/tags and create or update posts in WordPress.", + ) + local_parser = subparsers.add_parser( + "local", + parents=[common], + help="Export per-post folders with HTML, metadata, and wp command.", + description="Export per-post folders with HTML, metadata, and the exact wp command.", + ) + local_parser.add_argument( + "output_dir", + help="Output directory for local export (required).", + ) args = parser.parse_args() + if args.command is None: + parser.print_help() + return 1 + try: config = load_config(args.config) state = load_state(args.state) - result = evaluate(config, state, sync_repos=not args.no_sync) + result = evaluate(config, state, sync_repos=not args.no_sync, force_new=args.force_new) except ValidationError as exc: _print_validation_error(exc) return 1 @@ -39,6 +96,20 @@ def main() -> int: else: print(_evaluation_summary(result)) + if args.command == "local": + output_dir = getattr(args, "output_dir", None) + if not output_dir: + print("Error: local command requires an output directory", file=sys.stderr) + return 1 + wp = WordPressCLI(config.wordpress_root) + try: + export_local(result, Path(output_dir), wp) + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + print("Local export complete") + return 0 + if args.command == "apply": wp = WordPressCLI(config.wordpress_root) try: @@ -62,11 +133,13 @@ def _default_state_path() -> Path: def _evaluation_summary(result) -> str: total = len(result.posts) updates = sum(1 for post in result.posts if post.should_update) - categories = len(result.categories_to_create.missing_paths) + categories = len(result.taxonomy_to_create.missing_categories) + tags = len(result.taxonomy_to_create.missing_tags) lines = [ f"Posts: {total}", f"Posts to update: {updates}", f"Categories to create: {categories}", + f"Tags to create: {tags}", ] return "\n".join(lines) @@ -83,10 +156,14 @@ def _evaluation_json(result) -> str: "should_update": post.should_update, "categories": post.categories, "tags": post.tags, + "created_on": post.created_on, + "last_modified": post.last_modified, + "author": post.author, } for post in result.posts ], - "categories_to_create": result.categories_to_create.missing_paths, + "categories_to_create": result.taxonomy_to_create.missing_categories, + "tags_to_create": result.taxonomy_to_create.missing_tags, } return json.dumps(payload, indent=2) diff --git a/src/evaluation.py b/src/evaluation.py index af98e48..899ced2 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -1,16 +1,17 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime from pathlib import Path import shutil from typing import Dict, List, Optional, Set from .config import Config from .errors import ValidationError, ValidationIssue -from .git_utils import ensure_repo, git_timestamp +from .git_utils import ensure_repo, git_first_timestamp, git_timestamp from .manifest import load_manifest from .markdown_utils import convert_markdown, extract_title -from .models import CategoryPlan, EvaluationResult, InheritList, Manifest, PostPlan, Source +from .models import EvaluationResult, InheritList, PostPlan, Source, TaxonomyPlan from .state import State from .wp_cli import WordPressCLI @@ -19,11 +20,12 @@ from .wp_cli import WordPressCLI class _Context: categories: InheritList tags: InheritList + author: InheritList subdirectories: InheritList manifest_chain: List[Path] -def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult: +def evaluate(config: Config, state: State, sync_repos: bool, force_new: bool = False) -> EvaluationResult: issues: List[ValidationIssue] = [] sources = _load_sources(config, sync_repos, issues) @@ -36,6 +38,7 @@ def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult context=_Context( categories=InheritList(), tags=InheritList(), + author=InheritList(), subdirectories=InheritList(), manifest_chain=[], ), @@ -56,12 +59,15 @@ def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult except Exception as exc: issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) - missing_categories = _plan_categories(posts, categories, issues, tag_names) + missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) if issues: raise ValidationError(issues) - return EvaluationResult(posts=posts, categories_to_create=CategoryPlan(missing_paths=missing_categories)) + return EvaluationResult( + posts=posts, + taxonomy_to_create=TaxonomyPlan(missing_categories=missing_categories, missing_tags=missing_tags), + ) def _load_sources( @@ -123,6 +129,7 @@ def _evaluate_directory( effective_categories = _merge_inherit(context.categories, manifest.categories) effective_tags = _merge_inherit(context.tags, manifest.tags) + effective_author = _merge_inherit(context.author, manifest.author) effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -161,6 +168,7 @@ def _evaluate_directory( resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) + resolved_author = _resolve_author(effective_author.content, str(file_path), issues) html = convert_markdown(markdown_body, context=str(file_path), issues=issues) if html is None: @@ -189,7 +197,14 @@ def _evaluate_directory( identity = f"{source.name}:{relative_path}" cached_entry = state.posts.get(identity) cached_ts = cached_entry.source_timestamp if cached_entry else None - should_update = cached_ts is None or source_timestamp > cached_ts + should_update = True if force_new else (cached_ts is None or source_timestamp > cached_ts) + created_on, last_modified = _resolve_post_datetimes( + source=source, + identity_root=source.identity_root, + relative_path=relative_path, + spec=spec, + issues=issues, + ) posts.append( PostPlan( @@ -201,9 +216,12 @@ def _evaluate_directory( html=html, categories=resolved_categories, tags=resolved_tags, + author=resolved_author, source_timestamp=source_timestamp, cached_timestamp=cached_ts, should_update=should_update, + created_on=created_on, + last_modified=last_modified, ) ) @@ -218,6 +236,7 @@ def _evaluate_directory( context=_Context( categories=effective_categories, tags=effective_tags, + author=effective_author, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), @@ -263,6 +282,16 @@ def _normalize_list(values: List[str], label: str, context: str, issues: List[Va return normalized +def _resolve_author(values: List[str], context: str, issues: List[ValidationIssue]) -> Optional[str]: + normalized = _normalize_list(values, "author", context, issues) + if not normalized: + return None + if len(normalized) > 1: + issues.append(ValidationIssue("Multiple authors specified; only one is allowed", context=context)) + return None + return normalized[0] + + def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: try: return str(path.relative_to(root)) @@ -290,25 +319,84 @@ def _timestamp_for_path( return None -def _plan_categories( +def _resolve_post_datetimes( + source: Source, + identity_root: Path, + relative_path: str, + spec, + issues: List[ValidationIssue], +) -> tuple[Optional[str], Optional[str]]: + created_dt = spec.created_on + modified_dt = spec.last_modified + + if created_dt is None or modified_dt is None: + inferred = _infer_file_timestamps(source, identity_root, relative_path, issues) + if inferred is None: + return None, None + inferred_created, inferred_modified = inferred + if created_dt is None: + created_dt = datetime.fromtimestamp(inferred_created) + if modified_dt is None: + modified_dt = datetime.fromtimestamp(inferred_modified) + + if created_dt and modified_dt and modified_dt < created_dt: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=relative_path) + ) + return None, None + + created_on = _format_wp_datetime(created_dt) if created_dt else None + last_modified = _format_wp_datetime(modified_dt) if modified_dt else None + return created_on, last_modified + + +def _infer_file_timestamps( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[tuple[int, int]]: + if source.kind == "git": + try: + created_ts = git_first_timestamp(identity_root, relative_path) + modified_ts = git_timestamp(identity_root, relative_path) + return created_ts, modified_ts + except Exception: + pass + try: + stat = (identity_root / relative_path).stat() + return int(stat.st_ctime), int(stat.st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _format_wp_datetime(value: datetime) -> str: + return value.strftime("%Y-%m-%d %H:%M:%S") + + +def _plan_taxonomy( posts: List[PostPlan], categories, # list of CategoryTerm - issues: List[ValidationIssue], existing_tags: Set[str], -) -> List[List[str]]: +) -> tuple[List[List[str]], List[str]]: category_map: Dict[tuple[int, str], int] = {} for category in categories: category_map[(category.parent, category.name)] = category.term_id missing_paths: List[List[str]] = [] seen_missing: Set[tuple[str, ...]] = set() + missing_tags: List[str] = [] + seen_tags: Set[str] = set() for post in posts: if not post.should_update: continue for tag in post.tags: if tag not in existing_tags: - issues.append(ValidationIssue(f"Tag does not exist: {tag}", context=post.relative_path)) + if tag not in seen_tags: + seen_tags.add(tag) + missing_tags.append(tag) for path in post.categories: segments = [segment for segment in path.split("/") if segment] if not segments: @@ -328,4 +416,4 @@ def _plan_categories( seen_missing.add(key) missing_paths.append(list(segments)) - return missing_paths + return missing_paths, missing_tags diff --git a/src/git_utils.py b/src/git_utils.py index 9d79917..ef141f8 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -38,6 +38,21 @@ def git_timestamp(repo_root: Path, relative_path: str) -> int: raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc +def git_first_timestamp(repo_root: Path, relative_path: str) -> int: + result = _run( + ["git", "log", "--reverse", "-1", "--format=%ct", "--", relative_path], + cwd=repo_root, + capture_output=True, + ) + output = result.stdout.strip() + if not output: + raise ConfigurationError(f"No git timestamp for {relative_path}") + try: + return int(output) + except ValueError as exc: + raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc + + def _run(cmd: list[str], cwd: Path, capture_output: bool = False) -> subprocess.CompletedProcess: try: return subprocess.run( diff --git a/src/local_export.py b/src/local_export.py new file mode 100644 index 0000000..868b0a1 --- /dev/null +++ b/src/local_export.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import json +import re +import shlex +import unicodedata +from pathlib import Path +from typing import Dict, List, Set + +from .errors import MaterializeError, WordPressError +from .models import EvaluationResult, PostPlan +from .wp_cli import CategoryTerm, WordPressCLI + + +def export_local(result: EvaluationResult, output_dir: Path, wp: WordPressCLI) -> None: + if not output_dir.exists(): + output_dir.mkdir(parents=True, exist_ok=True) + if not output_dir.is_dir(): + raise MaterializeError(f"Output path is not a directory: {output_dir}") + + categories = wp.list_categories() + category_map = _build_category_map(categories) + missing_categories = _find_missing_categories(result.posts, category_map) + if missing_categories: + raise MaterializeError( + "Cannot build exact wp commands with missing categories. " + "Run apply to create categories first." + ) + + used_names: Set[str] = set() + for post in result.posts: + category_ids = _resolve_category_ids(post, category_map) + metadata = _build_metadata(post, category_ids) + command = _build_wp_command(post, category_ids) + + base_name = _normalize_name(f"{post.source.name}/{post.relative_path}") + title_name = _normalize_name(post.title) + if title_name: + dir_name = f"{base_name}-{title_name}" + else: + dir_name = base_name + dir_name = _dedupe_name(dir_name, used_names) + used_names.add(dir_name) + + target_dir = output_dir / dir_name + target_dir.mkdir(parents=True, exist_ok=True) + + (target_dir / "post.html").write_text(post.html, encoding="utf-8") + (target_dir / "metadata.json").write_text( + json.dumps(metadata, indent=2, sort_keys=True), + encoding="utf-8", + ) + (target_dir / "wp-command.txt").write_text(command + "\n", encoding="utf-8") + + +def _build_category_map(categories: List[CategoryTerm]) -> Dict[tuple[int, str], int]: + return {(category.parent, category.name): category.term_id for category in categories} + + +def _resolve_category_ids(post: PostPlan, category_map: Dict[tuple[int, str], int]) -> List[int]: + category_ids: List[int] = [] + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key not in category_map: + raise WordPressError(f"Missing category during local export: {path}") + parent = category_map[map_key] + category_ids.append(parent) + return category_ids + + +def _find_missing_categories(posts: List[PostPlan], category_map: Dict[tuple[int, str], int]) -> List[str]: + missing: Set[str] = set() + for post in posts: + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key not in category_map: + missing.add(path) + break + parent = category_map[map_key] + return sorted(missing) + + +def _build_metadata(post: PostPlan, category_ids: List[int]) -> dict: + metadata = { + "post_type": "post", + "post_status": "publish", + "post_title": post.title, + "post_content": post.html, + "post_category": category_ids, + "tags_input": post.tags, + "meta_input": {"_wp_materialize_source": post.identity}, + } + if post.created_on: + metadata["post_date"] = post.created_on + if post.last_modified: + metadata["post_modified"] = post.last_modified + if post.author: + metadata["post_author"] = post.author + return metadata + + +def _build_wp_command(post: PostPlan, category_ids: List[int]) -> str: + payload = json.dumps({"_wp_materialize_source": post.identity}) + args = [ + "wp", + "post", + "create", + "--post_type=post", + "--post_status=publish", + f"--post_title={post.title}", + f"--post_content={post.html}", + f"--post_category={','.join(str(cat) for cat in category_ids)}", + f"--tags_input={','.join(post.tags)}", + f"--meta_input={payload}", + "--porcelain", + ] + if post.created_on: + args.append(f"--post_date={post.created_on}") + if post.last_modified: + args.append(f"--post_modified={post.last_modified}") + if post.author: + args.append(f"--post_author={post.author}") + return " ".join(shlex.quote(arg) for arg in args) + + +def _normalize_name(value: str) -> str: + text = value.strip() + text = text.replace("\\", "/") + text = text.replace("/", "-") + text = unicodedata.normalize("NFKD", text) + text = text.encode("ascii", "ignore").decode("ascii") + text = text.lower() + text = re.sub(r"[^a-z0-9._-]+", "-", text) + text = re.sub(r"-+", "-", text) + text = text.strip("-_.") + return text or "post" + + +def _dedupe_name(name: str, used: Set[str]) -> str: + if name not in used: + return name + index = 2 + while True: + candidate = f"{name}-{index}" + if candidate not in used: + return candidate + index += 1 diff --git a/src/manifest.py b/src/manifest.py index 94bdba1..97ea5ca 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from datetime import datetime from pathlib import Path from typing import Dict @@ -23,7 +24,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "subdirectories", "files"} + allowed = {"categories", "tags", "author", "subdirectories", "files"} extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -31,6 +32,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories = _parse_inherit_list(data.get("categories"), issues, f"{path}:categories") tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") + author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -46,7 +48,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: if not isinstance(file_cfg, dict): issues.append(ValidationIssue(f"{file_name} must be an object", context=str(path))) continue - extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags"} + extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags", "created_on", "last_modified"} if extra_file: issues.append( ValidationIssue(f"{file_name} has unexpected keys: {sorted(extra_file)}", context=str(path)) @@ -89,6 +91,12 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories_override = _parse_inherit_list(file_cfg.get("categories"), issues, f"{path}:{file_name}:categories") tags_override = _parse_inherit_list(file_cfg.get("tags"), issues, f"{path}:{file_name}:tags") + created_on = _parse_datetime_field(file_cfg.get("created_on"), issues, f"{path}:{file_name}:created_on") + last_modified = _parse_datetime_field(file_cfg.get("last_modified"), issues, f"{path}:{file_name}:last_modified") + if created_on and last_modified and last_modified < created_on: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) + ) files[file_name] = FileSpec( title=title, @@ -96,12 +104,15 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: use_heading_strict=use_strict, categories=categories_override, tags=tags_override, + created_on=created_on, + last_modified=last_modified, ) return Manifest( path=path, categories=categories, tags=tags, + author=author, subdirectories=subdirectories, files=files, ) @@ -129,3 +140,16 @@ def _parse_inherit_list(value: object, issues: list[ValidationIssue], context: s inherit = True return InheritList(content=[item for item in content if isinstance(item, str)], inherit=inherit) + + +def _parse_datetime_field(value: object, issues: list[ValidationIssue], context: str) -> datetime | None: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + issues.append(ValidationIssue("Must be a non-empty string", context=context)) + return None + try: + return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M") + except ValueError: + issues.append(ValidationIssue("Invalid datetime format (expected YYYY-MM-DD hh:mm)", context=context)) + return None diff --git a/src/models.py b/src/models.py index c0f0288..c010b27 100644 --- a/src/models.py +++ b/src/models.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime from pathlib import Path from typing import Dict, List, Optional @@ -18,6 +19,8 @@ class FileSpec: use_heading_strict: bool categories: Optional[InheritList] tags: Optional[InheritList] + created_on: Optional[datetime] + last_modified: Optional[datetime] @dataclass(frozen=True) @@ -25,6 +28,7 @@ class Manifest: path: Path categories: InheritList tags: InheritList + author: InheritList subdirectories: InheritList files: Dict[str, FileSpec] @@ -47,17 +51,21 @@ class PostPlan: html: str categories: List[str] tags: List[str] + author: Optional[str] source_timestamp: int cached_timestamp: Optional[int] should_update: bool + created_on: Optional[str] + last_modified: Optional[str] @dataclass -class CategoryPlan: - missing_paths: List[List[str]] +class TaxonomyPlan: + missing_categories: List[List[str]] + missing_tags: List[str] @dataclass class EvaluationResult: posts: List[PostPlan] - categories_to_create: CategoryPlan + taxonomy_to_create: TaxonomyPlan diff --git a/src/wp_cli.py b/src/wp_cli.py index 6e029d7..4264776 100644 --- a/src/wp_cli.py +++ b/src/wp_cli.py @@ -60,6 +60,24 @@ class WordPressCLI: tags.append(TagTerm(term_id=int(entry["term_id"]), name=entry["name"])) return tags + def create_tag(self, name: str) -> int: + result = self._run( + [ + "wp", + "term", + "create", + "post_tag", + name, + "--porcelain", + ], + capture_output=True, + ) + output = result.stdout.strip() + try: + return int(output) + except ValueError as exc: + raise WordPressError(f"Invalid tag id from wp cli: {output}") from exc + def create_category(self, name: str, parent: int) -> int: result = self._run( [ @@ -107,6 +125,9 @@ class WordPressCLI: categories: List[int], tags: List[str], source_identity: str, + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> int: payload = json.dumps({"_wp_materialize_source": source_identity}) args = [ @@ -122,6 +143,12 @@ class WordPressCLI: f"--meta_input={payload}", "--porcelain", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") result = self._run(args, capture_output=True) output = result.stdout.strip() try: @@ -136,6 +163,9 @@ class WordPressCLI: content: str, categories: List[int], tags: List[str], + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> None: args = [ "wp", @@ -147,6 +177,12 @@ class WordPressCLI: f"--post_category={','.join(str(cat) for cat in categories)}", f"--tags_input={','.join(tags)}", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") self._run(args) def _run_json(self, cmd: List[str]): -- 2.43.0 From 122b7ea34873ee71592fde9aa60dd9a079fe5f3d Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 15:18:49 -0500 Subject: [PATCH 02/12] added new and add commands for local logic --- .gitignore | 1 + README.md | 27 +++++++- examples.md | 31 ++++++++++ src/cli.py | 100 +++++++++++++++++++++++++++++- src/evaluation.py | 38 ++++++++---- src/local_export.py | 66 +++----------------- src/scaffold.py | 148 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 337 insertions(+), 74 deletions(-) create mode 100644 src/scaffold.py diff --git a/.gitignore b/.gitignore index e8709ef..36a45d3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__/ *.egg-info/ .env .venv/ +testing/**/* diff --git a/README.md b/README.md index c821142..ebed2b4 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,27 @@ wp-materialize local /path/to/output Notes: 1. The local export assumes every post is new and generates create commands. -2. Categories must already exist in WordPress for exact commands. +2. The local export does not call WordPress or resolve category IDs. + +Create placeholder config or manifest: + +```bash +wp-materialize new --config +wp-materialize new --config /path/to/config.json +wp-materialize new --manifest /path/to/content +``` + +Add files or subdirectories to a manifest (no evaluation): + +```bash +wp-materialize add-file /path/to/content/post.md +wp-materialize add-file /path/to/content/post.md /path/to/content +wp-materialize add-file /path/to/content/post.md --current + +wp-materialize add-subdir /path/to/content/notes +wp-materialize add-subdir /path/to/content/notes /path/to/content +wp-materialize add-subdir /path/to/content/notes --current +``` ## Manifests @@ -116,6 +136,11 @@ Each managed directory must contain a `.wp-materialize.json` manifest. See `conf 2. Packages: - `Markdown>=3.6` +## System Prerequisites + +1. `wp` CLI must be installed and available in PATH for `apply`. +2. `local` does not require `wp`. + Install dependencies: ```bash diff --git a/examples.md b/examples.md index 63d37d9..c3cb519 100644 --- a/examples.md +++ b/examples.md @@ -111,3 +111,34 @@ Subdirectory manifest (`design/.wp-materialize.json`): - `git_repositories` entries use git commit timestamps for `created_on`/`last_modified` inference. - `directories` entries use filesystem timestamps even if the path is inside a git repo. + +## Scaffold Command Examples + +Create a placeholder config: + +```bash +wp-materialize new --config +wp-materialize new --config /path/to/config.json +``` + +Create a dummy manifest: + +```bash +wp-materialize new --manifest /path/to/content +``` + +Add a file to a manifest: + +```bash +wp-materialize add-file /path/to/content/post.md +wp-materialize add-file /path/to/content/post.md /path/to/content +wp-materialize add-file /path/to/content/post.md --current +``` + +Add a directory to a manifest: + +```bash +wp-materialize add-subdir /path/to/content/notes +wp-materialize add-subdir /path/to/content/notes /path/to/content +wp-materialize add-subdir /path/to/content/notes --current +``` diff --git a/src/cli.py b/src/cli.py index 2bed1a9..c9ea642 100644 --- a/src/cli.py +++ b/src/cli.py @@ -10,6 +10,7 @@ from .config import load_config from .errors import ConfigurationError, MaterializeError, ValidationError from .evaluation import evaluate from .local_export import export_local +from .scaffold import add_dir_to_manifest, add_file_to_manifest, create_config, create_manifest, resolve_manifest_dir from .state import load_state from .wp_cli import WordPressCLI @@ -74,16 +75,110 @@ def main() -> int: help="Output directory for local export (required).", ) + new_parser = subparsers.add_parser( + "new", + help="Create placeholder config or manifest files.", + description="Create a placeholder config file or a dummy manifest.", + ) + new_group = new_parser.add_mutually_exclusive_group(required=True) + new_group.add_argument( + "--config", + nargs="?", + const=str(_default_config_path()), + metavar="file", + help="Create a placeholder config file at or the default config path.", + ) + new_group.add_argument( + "--manifest", + metavar="dir", + help="Create a dummy manifest in the specified directory.", + ) + + add_file_parser = subparsers.add_parser( + "add-file", + help="Add a file entry to a manifest.", + description="Add a file entry to the manifest in a given directory.", + ) + add_file_parser.add_argument("file", help="File path to add to the manifest.") + add_file_parser.add_argument( + "manifest_dir", + nargs="?", + help="Directory containing the manifest (defaults to current directory).", + ) + add_file_parser.add_argument( + "--current", + action="store_true", + help="Find the manifest in the same directory as the file (cannot be used with manifest_dir).", + ) + + add_dir_parser = subparsers.add_parser( + "add-subdir", + help="Add a subdirectory entry to a manifest.", + description="Add a subdirectory entry to the manifest in a given directory.", + ) + add_dir_parser.add_argument("dir", help="Directory path to add to the manifest.") + add_dir_parser.add_argument( + "manifest_dir", + nargs="?", + help="Directory containing the manifest (defaults to current directory).", + ) + add_dir_parser.add_argument( + "--current", + action="store_true", + help="Find the manifest in the same directory as the target (cannot be used with manifest_dir).", + ) + args = parser.parse_args() if args.command is None: parser.print_help() return 1 + if args.command == "new": + try: + if args.config is not None: + create_config(Path(args.config)) + print(f"Created config: {args.config}") + else: + path = create_manifest(Path(args.manifest)) + print(f"Created manifest: {path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + + if args.command == "add-file": + try: + file_path = Path(args.file) + manifest_dir = resolve_manifest_dir(file_path, Path(args.manifest_dir) if args.manifest_dir else None, args.current) + add_file_to_manifest(file_path, manifest_dir) + print(f"Added file to manifest: {file_path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + + if args.command == "add-subdir": + try: + dir_path = Path(args.dir) + manifest_dir = resolve_manifest_dir(dir_path, Path(args.manifest_dir) if args.manifest_dir else None, args.current) + add_dir_to_manifest(dir_path, manifest_dir) + print(f"Added directory to manifest: {dir_path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + try: config = load_config(args.config) state = load_state(args.state) - result = evaluate(config, state, sync_repos=not args.no_sync, force_new=args.force_new) + result = evaluate( + config, + state, + sync_repos=not args.no_sync, + force_new=args.force_new, + skip_wp_checks=args.command == "local", + ) except ValidationError as exc: _print_validation_error(exc) return 1 @@ -101,9 +196,8 @@ def main() -> int: if not output_dir: print("Error: local command requires an output directory", file=sys.stderr) return 1 - wp = WordPressCLI(config.wordpress_root) try: - export_local(result, Path(output_dir), wp) + export_local(result, Path(output_dir)) except MaterializeError as exc: print(f"Error: {exc}", file=sys.stderr) return 1 diff --git a/src/evaluation.py b/src/evaluation.py index 899ced2..2220e10 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -25,7 +25,13 @@ class _Context: manifest_chain: List[Path] -def evaluate(config: Config, state: State, sync_repos: bool, force_new: bool = False) -> EvaluationResult: +def evaluate( + config: Config, + state: State, + sync_repos: bool, + force_new: bool = False, + skip_wp_checks: bool = False, +) -> EvaluationResult: issues: List[ValidationIssue] = [] sources = _load_sources(config, sync_repos, issues) @@ -45,21 +51,25 @@ def evaluate(config: Config, state: State, sync_repos: bool, force_new: bool = F state=state, issues=issues, posts=posts, + force_new=force_new, ) - if shutil.which("wp") is None: - issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) - categories = [] - tag_names: Set[str] = set() - try: - wp = WordPressCLI(config.wordpress_root) - categories = wp.list_categories() - tags = wp.list_tags() - tag_names = {tag.name for tag in tags} - except Exception as exc: - issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) + missing_categories: List[List[str]] = [] + missing_tags: List[str] = [] + if not skip_wp_checks: + if shutil.which("wp") is None: + issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) + categories = [] + tag_names: Set[str] = set() + try: + wp = WordPressCLI(config.wordpress_root) + categories = wp.list_categories() + tags = wp.list_tags() + tag_names = {tag.name for tag in tags} + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) - missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) + missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) if issues: raise ValidationError(issues) @@ -121,6 +131,7 @@ def _evaluate_directory( state: State, issues: List[ValidationIssue], posts: List[PostPlan], + force_new: bool, ) -> None: manifest_path = directory / ".wp-materialize.json" manifest = load_manifest(manifest_path, issues) @@ -243,6 +254,7 @@ def _evaluate_directory( state=state, issues=issues, posts=posts, + force_new=force_new, ) diff --git a/src/local_export.py b/src/local_export.py index 868b0a1..31096ff 100644 --- a/src/local_export.py +++ b/src/local_export.py @@ -5,33 +5,22 @@ import re import shlex import unicodedata from pathlib import Path -from typing import Dict, List, Set +from typing import List, Set -from .errors import MaterializeError, WordPressError +from .errors import MaterializeError from .models import EvaluationResult, PostPlan -from .wp_cli import CategoryTerm, WordPressCLI -def export_local(result: EvaluationResult, output_dir: Path, wp: WordPressCLI) -> None: +def export_local(result: EvaluationResult, output_dir: Path) -> None: if not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) if not output_dir.is_dir(): raise MaterializeError(f"Output path is not a directory: {output_dir}") - categories = wp.list_categories() - category_map = _build_category_map(categories) - missing_categories = _find_missing_categories(result.posts, category_map) - if missing_categories: - raise MaterializeError( - "Cannot build exact wp commands with missing categories. " - "Run apply to create categories first." - ) - used_names: Set[str] = set() for post in result.posts: - category_ids = _resolve_category_ids(post, category_map) - metadata = _build_metadata(post, category_ids) - command = _build_wp_command(post, category_ids) + metadata = _build_metadata(post) + command = _build_wp_command(post) base_name = _normalize_name(f"{post.source.name}/{post.relative_path}") title_name = _normalize_name(post.title) @@ -53,50 +42,13 @@ def export_local(result: EvaluationResult, output_dir: Path, wp: WordPressCLI) - (target_dir / "wp-command.txt").write_text(command + "\n", encoding="utf-8") -def _build_category_map(categories: List[CategoryTerm]) -> Dict[tuple[int, str], int]: - return {(category.parent, category.name): category.term_id for category in categories} - - -def _resolve_category_ids(post: PostPlan, category_map: Dict[tuple[int, str], int]) -> List[int]: - category_ids: List[int] = [] - for path in post.categories: - segments = [segment for segment in path.split("/") if segment] - if not segments: - continue - parent = 0 - for segment in segments: - map_key = (parent, segment) - if map_key not in category_map: - raise WordPressError(f"Missing category during local export: {path}") - parent = category_map[map_key] - category_ids.append(parent) - return category_ids - - -def _find_missing_categories(posts: List[PostPlan], category_map: Dict[tuple[int, str], int]) -> List[str]: - missing: Set[str] = set() - for post in posts: - for path in post.categories: - segments = [segment for segment in path.split("/") if segment] - if not segments: - continue - parent = 0 - for segment in segments: - map_key = (parent, segment) - if map_key not in category_map: - missing.add(path) - break - parent = category_map[map_key] - return sorted(missing) - - -def _build_metadata(post: PostPlan, category_ids: List[int]) -> dict: +def _build_metadata(post: PostPlan) -> dict: metadata = { "post_type": "post", "post_status": "publish", "post_title": post.title, "post_content": post.html, - "post_category": category_ids, + "post_category": post.categories, "tags_input": post.tags, "meta_input": {"_wp_materialize_source": post.identity}, } @@ -109,7 +61,7 @@ def _build_metadata(post: PostPlan, category_ids: List[int]) -> dict: return metadata -def _build_wp_command(post: PostPlan, category_ids: List[int]) -> str: +def _build_wp_command(post: PostPlan) -> str: payload = json.dumps({"_wp_materialize_source": post.identity}) args = [ "wp", @@ -119,7 +71,7 @@ def _build_wp_command(post: PostPlan, category_ids: List[int]) -> str: "--post_status=publish", f"--post_title={post.title}", f"--post_content={post.html}", - f"--post_category={','.join(str(cat) for cat in category_ids)}", + f"--post_category={','.join(post.categories)}", f"--tags_input={','.join(post.tags)}", f"--meta_input={payload}", "--porcelain", diff --git a/src/scaffold.py b/src/scaffold.py new file mode 100644 index 0000000..cbafb74 --- /dev/null +++ b/src/scaffold.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .errors import MaterializeError + + +def create_config(path: Path) -> None: + _ensure_parent_exists(path) + if path.exists(): + raise MaterializeError(f"Config already exists: {path}") + payload = { + "wordpress_root": "/path/to/wordpress", + "repo_storage_dir": "/path/to/repo-storage", + "git_repositories": [ + { + "name": "example-repo", + "url": "https://example.com/repo.git", + "branch": "main", + "root_subdir": None, + } + ], + "directories": [ + { + "name": "example-dir", + "path": "/path/to/content", + "root_subdir": None, + } + ], + } + path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + + +def create_manifest(directory: Path) -> Path: + if not directory.exists(): + raise MaterializeError(f"Directory does not exist: {directory}") + if not directory.is_dir(): + raise MaterializeError(f"Not a directory: {directory}") + manifest_path = directory / ".wp-materialize.json" + if manifest_path.exists(): + raise MaterializeError(f"Manifest already exists: {manifest_path}") + payload = { + "categories": {"content": [], "inherit": True}, + "tags": {"content": [], "inherit": True}, + "author": {"content": [], "inherit": True}, + "subdirectories": {"content": [], "inherit": True}, + "files": {}, + } + manifest_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return manifest_path + + +def add_file_to_manifest(file_path: Path, manifest_dir: Path) -> None: + if not file_path.exists(): + raise MaterializeError(f"File does not exist: {file_path}") + if not file_path.is_file(): + raise MaterializeError(f"Not a file: {file_path}") + manifest_path = _manifest_path(manifest_dir) + data = _load_manifest_json(manifest_path) + + relative = _relative_to(file_path, manifest_dir) + files = data.setdefault("files", {}) + if not isinstance(files, dict): + raise MaterializeError("Manifest files must be an object") + if relative in files: + raise MaterializeError(f"File already exists in manifest: {relative}") + + files[relative] = {"title": "TODO: Title"} + _write_manifest_json(manifest_path, data) + + +def add_dir_to_manifest(dir_path: Path, manifest_dir: Path) -> None: + if not dir_path.exists(): + raise MaterializeError(f"Directory does not exist: {dir_path}") + if not dir_path.is_dir(): + raise MaterializeError(f"Not a directory: {dir_path}") + + manifest_path = _manifest_path(manifest_dir) + data = _load_manifest_json(manifest_path) + + relative = _relative_to(dir_path, manifest_dir) + subdirs = data.setdefault("subdirectories", {"content": [], "inherit": True}) + if not isinstance(subdirs, dict): + raise MaterializeError("Manifest subdirectories must be an object") + content = subdirs.setdefault("content", []) + if not isinstance(content, list) or any(not isinstance(item, str) for item in content): + raise MaterializeError("Manifest subdirectories.content must be a list of strings") + if relative in content: + raise MaterializeError(f"Subdirectory already exists in manifest: {relative}") + + content.append(relative) + _write_manifest_json(manifest_path, data) + + +def resolve_manifest_dir(target_path: Path, manifest_dir: Optional[Path], use_current: bool) -> Path: + if manifest_dir and use_current: + raise MaterializeError("--current cannot be used with an explicit manifest directory") + if manifest_dir: + return manifest_dir + if use_current: + return target_path.parent + return Path.cwd() + + +def _manifest_path(manifest_dir: Path) -> Path: + if not manifest_dir.exists(): + raise MaterializeError(f"Manifest directory does not exist: {manifest_dir}") + if not manifest_dir.is_dir(): + raise MaterializeError(f"Not a directory: {manifest_dir}") + manifest_path = manifest_dir / ".wp-materialize.json" + if not manifest_path.exists(): + raise MaterializeError(f"Manifest not found: {manifest_path}") + return manifest_path + + +def _load_manifest_json(path: Path) -> Dict[str, Any]: + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise MaterializeError(f"Invalid JSON in manifest: {exc}") from exc + if not isinstance(data, dict): + raise MaterializeError("Manifest must be a JSON object") + return data + + +def _write_manifest_json(path: Path, data: Dict[str, Any]) -> None: + path.write_text(json.dumps(data, indent=2), encoding="utf-8") + + +def _relative_to(path: Path, base: Path) -> str: + try: + relative = path.relative_to(base) + except ValueError as exc: + raise MaterializeError(f"Path is outside manifest directory: {path}") from exc + relative_str = relative.as_posix() + if relative_str in {".", ""}: + raise MaterializeError(f"Path must be inside manifest directory: {path}") + return relative_str + + +def _ensure_parent_exists(path: Path) -> None: + parent = path.parent + if not parent.exists(): + raise MaterializeError(f"Directory does not exist: {parent}") + if not parent.is_dir(): + raise MaterializeError(f"Not a directory: {parent}") -- 2.43.0 From 02bc0ce81d676a8ff20b8ba654bb4418b1d9d469 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 17:47:35 -0500 Subject: [PATCH 03/12] added renderer selection (py-gfm as alternative) --- README.md | 1 + configurations.md | 28 +++++++++++++++++++++------- examples.md | 5 +++++ pyproject.toml | 1 + requirements.txt | 1 + src/config.py | 16 +++++++++++++++- src/evaluation.py | 12 +++++++++++- src/manifest.py | 29 +++++++++++++++++++++++++++-- src/markdown_utils.py | 35 +++++++++++++++++++++++++++++------ src/models.py | 2 ++ src/scaffold.py | 2 ++ 11 files changed, 115 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index ebed2b4..ca34fab 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ Each managed directory must contain a `.wp-materialize.json` manifest. See `conf 1. Python 3.10+ 2. Packages: - `Markdown>=3.6` + - `py_gfm` (only required when using `renderer: "py-gfm"`) ## System Prerequisites diff --git a/configurations.md b/configurations.md index 1645406..efcb8bd 100644 --- a/configurations.md +++ b/configurations.md @@ -10,9 +10,11 @@ Top-level fields: Path to the WordPress root directory where the `wp` CLI is executed. 2. `repo_storage_dir` (string, required) Directory where git repositories are cloned or updated. -3. `git_repositories` (array, optional) +3. `renderer` (string, optional) + Markdown renderer to use. Allowed values: `default`, `py-gfm`. +4. `git_repositories` (array, optional) List of git repositories to manage. Default is an empty list. -4. `directories` (array, optional) +5. `directories` (array, optional) List of non-git directories to manage. Default is an empty list. `git_repositories` entries: @@ -50,9 +52,12 @@ Top-level fields: Inherited tags for this directory and its children. 3. `author` (object, optional) Inherited author for this directory and its children. Must resolve to a single author. -4. `subdirectories` (object, optional) +4. `renderer` (string, optional) + Markdown renderer to use for this directory. Allowed values: `default`, `py-gfm`. + If omitted, it inherits from the parent scope. +5. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -5. `files` (object, optional) +6. `files` (object, optional) Mapping of Markdown file names to file-level configuration. `categories`, `tags`, `author`, and `subdirectories` objects: @@ -61,7 +66,8 @@ Top-level fields: List of values for the given field. For `categories`, each string is a hierarchical path such as `Systems/Infrastructure`. For `subdirectories`, each string is a directory name under the current directory. - For `author`, exactly one string must remain after inheritance is applied. + For `author`, exactly one string must remain after inheritance is applied and it should be + a WordPress user ID (integer as a string). 2. `inherit` (boolean, optional, default `true`) If `true`, append to the parent effective list. If `false`, replace the parent list entirely. @@ -70,6 +76,10 @@ Note: Root directory manifests do not need to specify `inherit` for these top-le fields (the default is `true`). File-level overrides inside `files` still support inheritance via their own `inherit` fields. +The `renderer` field inherits implicitly: if omitted, the renderer is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. + `files` entries: Each key is a Markdown file name (relative to the manifest directory). @@ -84,10 +94,13 @@ Each value is an object with the following fields: Manual override for the post creation time in `YYYY-MM-DD hh:mm` format. 4. `last_modified` (string, optional) Manual override for the post modified time in `YYYY-MM-DD hh:mm` format. -5. `categories` (object, optional) +5. `renderer` (string, optional) + Markdown renderer to use for this file. Allowed values: `default`, `py-gfm`. + If omitted, it inherits from the parent scope. +6. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -6. `tags` (object, optional) +7. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. @@ -102,6 +115,7 @@ If `created_on` or `last_modified` is not provided, the system infers the value. For `git_repositories` sources it uses git commit timestamps; for `directories` sources it uses filesystem timestamps. The system does not auto-detect git for entries declared under `directories`, even if the path is inside a git repo. +If `created_on` is in the future, WordPress will mark the post as scheduled. ## Post Identity diff --git a/examples.md b/examples.md index c3cb519..163d384 100644 --- a/examples.md +++ b/examples.md @@ -11,6 +11,7 @@ Root directory manifest (`.wp-materialize.json`): "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, "tags": { "content": ["automation", "wordpress"], "inherit": true }, "author": { "content": ["editorial"], "inherit": true }, + "renderer": "py-gfm", "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -20,6 +21,7 @@ Root directory manifest (`.wp-materialize.json`): }, "essay.md": { "use_heading_as_title": { "level": 1, "strict": true }, + "renderer": "default", "created_on": "2025-01-10 09:30", "last_modified": "2025-02-14 16:45" } @@ -48,6 +50,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", "git_repositories": [], "directories": [ { @@ -65,6 +68,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", "git_repositories": [ { "name": "content-repo", @@ -95,6 +99,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", "git_repositories": [ { "name": "content-repo", diff --git a/pyproject.toml b/pyproject.toml index f8e0499..b99a129 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ "Markdown>=3.6", + "py_gfm", ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index cb286b3..40d8241 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ Markdown>=3.6 +py_gfm diff --git a/src/config.py b/src/config.py index ae650a0..6471a4d 100644 --- a/src/config.py +++ b/src/config.py @@ -29,6 +29,7 @@ class Config: repo_storage_dir: Path git_repositories: List[GitRepository] directories: List[DirectorySpec] + renderer: Optional[str] def _expect_keys(obj: dict, allowed: set[str], context: str) -> None: @@ -48,10 +49,11 @@ def load_config(path: Path) -> Config: if not isinstance(data, dict): raise ConfigurationError("Config must be a JSON object") - _expect_keys(data, {"wordpress_root", "repo_storage_dir", "git_repositories", "directories"}, "config") + _expect_keys(data, {"wordpress_root", "repo_storage_dir", "git_repositories", "directories", "renderer"}, "config") wordpress_root = _require_path(data, "wordpress_root", required=True) repo_storage_dir = _require_path(data, "repo_storage_dir", required=True) + renderer = _require_renderer(data.get("renderer"), context="config.renderer") git_repositories = [] for idx, repo in enumerate(data.get("git_repositories", []) or []): @@ -85,6 +87,7 @@ def load_config(path: Path) -> Config: repo_storage_dir=repo_storage_dir, git_repositories=git_repositories, directories=directories, + renderer=renderer, ) @@ -102,3 +105,14 @@ def _require_path(data: dict, key: str, required: bool) -> Path: if not isinstance(value, str) or not value.strip(): raise ConfigurationError(f"{key} must be a non-empty string") return Path(value) + + +def _require_renderer(value: object, context: str) -> Optional[str]: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + raise ConfigurationError(f"{context} must be a non-empty string") + renderer = value.strip() + if renderer not in {"default", "py-gfm"}: + raise ConfigurationError(f"{context} must be one of: default, py-gfm") + return renderer diff --git a/src/evaluation.py b/src/evaluation.py index 2220e10..e411dc3 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -21,6 +21,7 @@ class _Context: categories: InheritList tags: InheritList author: InheritList + renderer: Optional[str] subdirectories: InheritList manifest_chain: List[Path] @@ -45,6 +46,7 @@ def evaluate( categories=InheritList(), tags=InheritList(), author=InheritList(), + renderer=config.renderer, subdirectories=InheritList(), manifest_chain=[], ), @@ -141,6 +143,7 @@ def _evaluate_directory( effective_categories = _merge_inherit(context.categories, manifest.categories) effective_tags = _merge_inherit(context.tags, manifest.tags) effective_author = _merge_inherit(context.author, manifest.author) + effective_renderer = manifest.renderer if manifest.renderer is not None else context.renderer effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -181,7 +184,13 @@ def _evaluate_directory( resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) resolved_author = _resolve_author(effective_author.content, str(file_path), issues) - html = convert_markdown(markdown_body, context=str(file_path), issues=issues) + resolved_renderer = spec.renderer if spec.renderer is not None else effective_renderer + html = convert_markdown( + markdown_body, + context=str(file_path), + issues=issues, + renderer=resolved_renderer or "default", + ) if html is None: continue @@ -248,6 +257,7 @@ def _evaluate_directory( categories=effective_categories, tags=effective_tags, author=effective_author, + renderer=effective_renderer, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), diff --git a/src/manifest.py b/src/manifest.py index 97ea5ca..d2a65a7 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -24,7 +24,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "author", "subdirectories", "files"} + allowed = {"categories", "tags", "author", "renderer", "subdirectories", "files"} extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -33,6 +33,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories = _parse_inherit_list(data.get("categories"), issues, f"{path}:categories") tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") + renderer = _parse_renderer_field(data.get("renderer"), issues, f"{path}:renderer") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -48,7 +49,15 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: if not isinstance(file_cfg, dict): issues.append(ValidationIssue(f"{file_name} must be an object", context=str(path))) continue - extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags", "created_on", "last_modified"} + extra_file = set(file_cfg.keys()) - { + "title", + "use_heading_as_title", + "categories", + "tags", + "created_on", + "last_modified", + "renderer", + } if extra_file: issues.append( ValidationIssue(f"{file_name} has unexpected keys: {sorted(extra_file)}", context=str(path)) @@ -93,6 +102,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: tags_override = _parse_inherit_list(file_cfg.get("tags"), issues, f"{path}:{file_name}:tags") created_on = _parse_datetime_field(file_cfg.get("created_on"), issues, f"{path}:{file_name}:created_on") last_modified = _parse_datetime_field(file_cfg.get("last_modified"), issues, f"{path}:{file_name}:last_modified") + renderer_override = _parse_renderer_field(file_cfg.get("renderer"), issues, f"{path}:{file_name}:renderer") if created_on and last_modified and last_modified < created_on: issues.append( ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) @@ -106,6 +116,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: tags=tags_override, created_on=created_on, last_modified=last_modified, + renderer=renderer_override, ) return Manifest( @@ -113,6 +124,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories=categories, tags=tags, author=author, + renderer=renderer, subdirectories=subdirectories, files=files, ) @@ -153,3 +165,16 @@ def _parse_datetime_field(value: object, issues: list[ValidationIssue], context: except ValueError: issues.append(ValidationIssue("Invalid datetime format (expected YYYY-MM-DD hh:mm)", context=context)) return None + + +def _parse_renderer_field(value: object, issues: list[ValidationIssue], context: str) -> str | None: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + issues.append(ValidationIssue("Must be a non-empty string", context=context)) + return None + renderer = value.strip() + if renderer not in {"default", "py-gfm"}: + issues.append(ValidationIssue("Must be one of: default, py-gfm", context=context)) + return None + return renderer diff --git a/src/markdown_utils.py b/src/markdown_utils.py index 2a734e2..240b954 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -54,9 +54,32 @@ def _promote_headings(text: str) -> str: return "\n".join(promoted_lines) -def convert_markdown(markdown_text: str, context: str, issues: list[ValidationIssue]) -> str | None: - try: - return md_lib.markdown(markdown_text, extensions=["extra"], output_format="html5") - except Exception as exc: # pragma: no cover - depends on markdown internals - issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) - return None +def convert_markdown( + markdown_text: str, + context: str, + issues: list[ValidationIssue], + renderer: str = "default", +) -> str | None: + if renderer == "default": + try: + return md_lib.markdown(markdown_text, extensions=["extra"], output_format="html5") + except Exception as exc: # pragma: no cover - depends on markdown internals + issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) + return None + if renderer == "py-gfm": + try: + import mdx_gfm + except Exception as exc: # pragma: no cover - dependency missing + issues.append(ValidationIssue(f"py-gfm is not available: {exc}", context=context)) + return None + extension_class = getattr(mdx_gfm, "GithubFlavoredMarkdownExtension", None) + if extension_class is None: + issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context)) + return None + try: + return md_lib.markdown(markdown_text, extensions=[extension_class()], output_format="html5") + except Exception as exc: # pragma: no cover - depends on markdown internals + issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) + return None + issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context)) + return None diff --git a/src/models.py b/src/models.py index c010b27..23be2f6 100644 --- a/src/models.py +++ b/src/models.py @@ -21,6 +21,7 @@ class FileSpec: tags: Optional[InheritList] created_on: Optional[datetime] last_modified: Optional[datetime] + renderer: Optional[str] @dataclass(frozen=True) @@ -29,6 +30,7 @@ class Manifest: categories: InheritList tags: InheritList author: InheritList + renderer: Optional[str] subdirectories: InheritList files: Dict[str, FileSpec] diff --git a/src/scaffold.py b/src/scaffold.py index cbafb74..77514df 100644 --- a/src/scaffold.py +++ b/src/scaffold.py @@ -14,6 +14,7 @@ def create_config(path: Path) -> None: payload = { "wordpress_root": "/path/to/wordpress", "repo_storage_dir": "/path/to/repo-storage", + "renderer": "default", "git_repositories": [ { "name": "example-repo", @@ -45,6 +46,7 @@ def create_manifest(directory: Path) -> Path: "categories": {"content": [], "inherit": True}, "tags": {"content": [], "inherit": True}, "author": {"content": [], "inherit": True}, + "renderer": "default", "subdirectories": {"content": [], "inherit": True}, "files": {}, } -- 2.43.0 From 8aee2bf81327e0bb3743c5b284b4efc4bd796840 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 17:52:18 -0500 Subject: [PATCH 04/12] posts will be published immediately --- src/apply.py | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/apply.py b/src/apply.py index cacd5e4..a2e67da 100644 --- a/src/apply.py +++ b/src/apply.py @@ -1,7 +1,8 @@ from __future__ import annotations import time -from typing import Dict, List, Set +from datetime import datetime +from typing import Dict, List, Optional, Set from .errors import WordPressError from .models import EvaluationResult, PostPlan @@ -87,6 +88,8 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, parent = category_map[map_key] category_ids.append(parent) + created_on, last_modified = _normalize_post_dates(post.created_on, post.last_modified) + post_id = wp.find_post_id(post.identity) if post_id is None: wp.create_post( @@ -95,8 +98,8 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, categories=category_ids, tags=post.tags, source_identity=post.identity, - created_on=post.created_on, - last_modified=post.last_modified, + created_on=created_on, + last_modified=last_modified, author=post.author, ) return @@ -107,7 +110,39 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, content=post.html, categories=category_ids, tags=post.tags, - created_on=post.created_on, - last_modified=post.last_modified, + created_on=created_on, + last_modified=last_modified, author=post.author, ) + + +def _normalize_post_dates( + created_on: Optional[str], + last_modified: Optional[str], +) -> tuple[Optional[str], Optional[str]]: + if not created_on and not last_modified: + return created_on, last_modified + + now = datetime.now() + created_dt = _parse_post_date(created_on) + modified_dt = _parse_post_date(last_modified) + + if created_dt and created_dt > now: + created_dt = now + if modified_dt and modified_dt > now: + modified_dt = now + if created_dt and modified_dt and modified_dt < created_dt: + modified_dt = created_dt + + created_str = created_dt.strftime("%Y-%m-%d %H:%M:%S") if created_dt else None + modified_str = modified_dt.strftime("%Y-%m-%d %H:%M:%S") if modified_dt else None + return created_str, modified_str + + +def _parse_post_date(value: Optional[str]) -> Optional[datetime]: + if not value: + return None + try: + return datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + except ValueError: + return None -- 2.43.0 From 7953bd07f7c5d7146f258990518b2acbd550d75c Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 17:56:50 -0500 Subject: [PATCH 05/12] posts will be published immediately fixed --- src/apply.py | 24 ++++++++++++++++-------- src/wp_cli.py | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/apply.py b/src/apply.py index a2e67da..cf85d2a 100644 --- a/src/apply.py +++ b/src/apply.py @@ -18,6 +18,7 @@ def apply_changes( ) -> None: categories = wp.list_categories() category_map = _build_category_map(categories) + wp_timezone = wp.get_timezone() _create_missing_categories(result, wp, category_map) _create_missing_tags(result, wp) @@ -27,7 +28,7 @@ def apply_changes( for post in result.posts: if not post.should_update: continue - _apply_post(post, wp, category_map) + _apply_post(post, wp, category_map, wp_timezone) state.posts[post.identity] = PostState( source_timestamp=post.source_timestamp, materialized_at=int(time.time()), @@ -74,7 +75,12 @@ def _create_missing_tags(result: EvaluationResult, wp: WordPressCLI) -> None: wp.create_tag(tag) -def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, str], int]) -> None: +def _apply_post( + post: PostPlan, + wp: WordPressCLI, + category_map: Dict[tuple[int, str], int], + wp_timezone, +) -> None: category_ids: List[int] = [] for path in post.categories: segments = [segment for segment in path.split("/") if segment] @@ -88,7 +94,7 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, parent = category_map[map_key] category_ids.append(parent) - created_on, last_modified = _normalize_post_dates(post.created_on, post.last_modified) + created_on, last_modified = _normalize_post_dates(post.created_on, post.last_modified, wp_timezone) post_id = wp.find_post_id(post.identity) if post_id is None: @@ -119,13 +125,14 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, def _normalize_post_dates( created_on: Optional[str], last_modified: Optional[str], + wp_timezone, ) -> tuple[Optional[str], Optional[str]]: if not created_on and not last_modified: return created_on, last_modified - now = datetime.now() - created_dt = _parse_post_date(created_on) - modified_dt = _parse_post_date(last_modified) + now = datetime.now(wp_timezone) + created_dt = _parse_post_date(created_on, wp_timezone) + modified_dt = _parse_post_date(last_modified, wp_timezone) if created_dt and created_dt > now: created_dt = now @@ -139,10 +146,11 @@ def _normalize_post_dates( return created_str, modified_str -def _parse_post_date(value: Optional[str]) -> Optional[datetime]: +def _parse_post_date(value: Optional[str], wp_timezone) -> Optional[datetime]: if not value: return None try: - return datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + parsed = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + return parsed.replace(tzinfo=wp_timezone) except ValueError: return None diff --git a/src/wp_cli.py b/src/wp_cli.py index 4264776..65f9612 100644 --- a/src/wp_cli.py +++ b/src/wp_cli.py @@ -3,8 +3,10 @@ from __future__ import annotations import json import subprocess from dataclasses import dataclass +from datetime import timedelta, timezone from pathlib import Path from typing import Dict, List, Optional +from zoneinfo import ZoneInfo from .errors import WordPressError @@ -78,6 +80,26 @@ class WordPressCLI: except ValueError as exc: raise WordPressError(f"Invalid tag id from wp cli: {output}") from exc + def get_timezone(self): + tz_name = self._run( + ["wp", "option", "get", "timezone_string"], + capture_output=True, + ).stdout.strip() + if tz_name and tz_name.upper() != "UTC": + try: + return ZoneInfo(tz_name) + except Exception: + pass + offset_value = self._run( + ["wp", "option", "get", "gmt_offset"], + capture_output=True, + ).stdout.strip() + try: + offset = float(offset_value) + except ValueError: + offset = 0.0 + return timezone(timedelta(hours=offset)) + def create_category(self, name: str, parent: int) -> int: result = self._run( [ -- 2.43.0 From a56cd665b02d0b3484fba23556eb540b87f6239e Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 17:59:55 -0500 Subject: [PATCH 06/12] posts will be published immediately fixed --- src/wp_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp_cli.py b/src/wp_cli.py index 65f9612..9e9f448 100644 --- a/src/wp_cli.py +++ b/src/wp_cli.py @@ -194,6 +194,7 @@ class WordPressCLI: "post", "update", str(post_id), + "--post_status=publish", f"--post_title={title}", f"--post_content={content}", f"--post_category={','.join(str(cat) for cat in categories)}", -- 2.43.0 From 164cb5d980d166f382ebc92157a48b67bfbf3cc2 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:03:05 -0500 Subject: [PATCH 07/12] added pandoc as renderer --- README.md | 1 + configurations.md | 11 ++++++++--- examples.md | 4 ++-- src/config.py | 4 ++-- src/manifest.py | 4 ++-- src/markdown_utils.py | 18 ++++++++++++++++++ 6 files changed, 33 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ca34fab..c08c86c 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,7 @@ Each managed directory must contain a `.wp-materialize.json` manifest. See `conf 1. `wp` CLI must be installed and available in PATH for `apply`. 2. `local` does not require `wp`. +3. `pandoc` must be installed and available in PATH when using `renderer: "pandoc"`. Install dependencies: diff --git a/configurations.md b/configurations.md index efcb8bd..d8759b1 100644 --- a/configurations.md +++ b/configurations.md @@ -11,7 +11,7 @@ Top-level fields: 2. `repo_storage_dir` (string, required) Directory where git repositories are cloned or updated. 3. `renderer` (string, optional) - Markdown renderer to use. Allowed values: `default`, `py-gfm`. + Markdown renderer to use. Allowed values: `default`, `py-gfm`, `pandoc`. 4. `git_repositories` (array, optional) List of git repositories to manage. Default is an empty list. 5. `directories` (array, optional) @@ -53,7 +53,7 @@ Top-level fields: 3. `author` (object, optional) Inherited author for this directory and its children. Must resolve to a single author. 4. `renderer` (string, optional) - Markdown renderer to use for this directory. Allowed values: `default`, `py-gfm`. + Markdown renderer to use for this directory. Allowed values: `default`, `py-gfm`, `pandoc`. If omitted, it inherits from the parent scope. 5. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. @@ -80,6 +80,11 @@ The `renderer` field inherits implicitly: if omitted, the renderer is inherited from the parent scope; if specified, it overrides the parent without an explicit `inherit` flag. +Renderer dependencies: +1. `default` uses the Python `Markdown` library. +2. `py-gfm` requires the `py_gfm` package (imported as `mdx_gfm`). +3. `pandoc` requires the `pandoc` binary to be available on PATH. + `files` entries: Each key is a Markdown file name (relative to the manifest directory). @@ -95,7 +100,7 @@ Each value is an object with the following fields: 4. `last_modified` (string, optional) Manual override for the post modified time in `YYYY-MM-DD hh:mm` format. 5. `renderer` (string, optional) - Markdown renderer to use for this file. Allowed values: `default`, `py-gfm`. + Markdown renderer to use for this file. Allowed values: `default`, `py-gfm`, `pandoc`. If omitted, it inherits from the parent scope. 6. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields diff --git a/examples.md b/examples.md index 163d384..c607b02 100644 --- a/examples.md +++ b/examples.md @@ -11,7 +11,7 @@ Root directory manifest (`.wp-materialize.json`): "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, "tags": { "content": ["automation", "wordpress"], "inherit": true }, "author": { "content": ["editorial"], "inherit": true }, - "renderer": "py-gfm", + "renderer": "pandoc", "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -21,7 +21,7 @@ Root directory manifest (`.wp-materialize.json`): }, "essay.md": { "use_heading_as_title": { "level": 1, "strict": true }, - "renderer": "default", + "renderer": "py-gfm", "created_on": "2025-01-10 09:30", "last_modified": "2025-02-14 16:45" } diff --git a/src/config.py b/src/config.py index 6471a4d..56ab02f 100644 --- a/src/config.py +++ b/src/config.py @@ -113,6 +113,6 @@ def _require_renderer(value: object, context: str) -> Optional[str]: if not isinstance(value, str) or not value.strip(): raise ConfigurationError(f"{context} must be a non-empty string") renderer = value.strip() - if renderer not in {"default", "py-gfm"}: - raise ConfigurationError(f"{context} must be one of: default, py-gfm") + if renderer not in {"default", "py-gfm", "pandoc"}: + raise ConfigurationError(f"{context} must be one of: default, py-gfm, pandoc") return renderer diff --git a/src/manifest.py b/src/manifest.py index d2a65a7..0ab44a1 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -174,7 +174,7 @@ def _parse_renderer_field(value: object, issues: list[ValidationIssue], context: issues.append(ValidationIssue("Must be a non-empty string", context=context)) return None renderer = value.strip() - if renderer not in {"default", "py-gfm"}: - issues.append(ValidationIssue("Must be one of: default, py-gfm", context=context)) + if renderer not in {"default", "py-gfm", "pandoc"}: + issues.append(ValidationIssue("Must be one of: default, py-gfm, pandoc", context=context)) return None return renderer diff --git a/src/markdown_utils.py b/src/markdown_utils.py index 240b954..e51533f 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -3,6 +3,7 @@ from __future__ import annotations import re import markdown as md_lib +import subprocess from .errors import ValidationIssue @@ -81,5 +82,22 @@ def convert_markdown( except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None + if renderer == "pandoc": + try: + result = subprocess.run( + ["pandoc", "--from=markdown", "--to=html5"], + input=markdown_text, + text=True, + capture_output=True, + check=True, + ) + return result.stdout + except FileNotFoundError as exc: + issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context)) + return None + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() if exc.stderr else "" + issues.append(ValidationIssue(f"Pandoc conversion failed: {stderr}", context=context)) + return None issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context)) return None -- 2.43.0 From 00d44090a8ecccd2b88f955769762903649ed0d9 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:09:54 -0500 Subject: [PATCH 08/12] added hard line breaks support --- configurations.md | 23 +++++++++++++++++------ examples.md | 5 +++++ src/config.py | 17 ++++++++++++++++- src/evaluation.py | 14 ++++++++++++++ src/manifest.py | 20 +++++++++++++++++++- src/markdown_utils.py | 13 ++++++++++--- src/models.py | 2 ++ src/scaffold.py | 2 ++ 8 files changed, 85 insertions(+), 11 deletions(-) diff --git a/configurations.md b/configurations.md index d8759b1..660565d 100644 --- a/configurations.md +++ b/configurations.md @@ -12,9 +12,11 @@ Top-level fields: Directory where git repositories are cloned or updated. 3. `renderer` (string, optional) Markdown renderer to use. Allowed values: `default`, `py-gfm`, `pandoc`. -4. `git_repositories` (array, optional) +4. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. +5. `git_repositories` (array, optional) List of git repositories to manage. Default is an empty list. -5. `directories` (array, optional) +6. `directories` (array, optional) List of non-git directories to manage. Default is an empty list. `git_repositories` entries: @@ -55,9 +57,12 @@ Top-level fields: 4. `renderer` (string, optional) Markdown renderer to use for this directory. Allowed values: `default`, `py-gfm`, `pandoc`. If omitted, it inherits from the parent scope. -5. `subdirectories` (object, optional) +5. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. If omitted, it inherits + from the parent scope. +6. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -6. `files` (object, optional) +7. `files` (object, optional) Mapping of Markdown file names to file-level configuration. `categories`, `tags`, `author`, and `subdirectories` objects: @@ -79,6 +84,9 @@ inheritance via their own `inherit` fields. The `renderer` field inherits implicitly: if omitted, the renderer is inherited from the parent scope; if specified, it overrides the parent without an explicit `inherit` flag. +The `hard_line_breaks` field inherits implicitly: if omitted, the value is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. Renderer dependencies: 1. `default` uses the Python `Markdown` library. @@ -102,10 +110,13 @@ Each value is an object with the following fields: 5. `renderer` (string, optional) Markdown renderer to use for this file. Allowed values: `default`, `py-gfm`, `pandoc`. If omitted, it inherits from the parent scope. -6. `categories` (object, optional) +6. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. If omitted, it inherits + from the parent scope. +7. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -7. `tags` (object, optional) +8. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. diff --git a/examples.md b/examples.md index c607b02..a3d2580 100644 --- a/examples.md +++ b/examples.md @@ -12,6 +12,7 @@ Root directory manifest (`.wp-materialize.json`): "tags": { "content": ["automation", "wordpress"], "inherit": true }, "author": { "content": ["editorial"], "inherit": true }, "renderer": "pandoc", + "hard_line_breaks": true, "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -22,6 +23,7 @@ Root directory manifest (`.wp-materialize.json`): "essay.md": { "use_heading_as_title": { "level": 1, "strict": true }, "renderer": "py-gfm", + "hard_line_breaks": false, "created_on": "2025-01-10 09:30", "last_modified": "2025-02-14 16:45" } @@ -51,6 +53,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", + "hard_line_breaks": false, "git_repositories": [], "directories": [ { @@ -69,6 +72,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", + "hard_line_breaks": false, "git_repositories": [ { "name": "content-repo", @@ -100,6 +104,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", + "hard_line_breaks": false, "git_repositories": [ { "name": "content-repo", diff --git a/src/config.py b/src/config.py index 56ab02f..7675f70 100644 --- a/src/config.py +++ b/src/config.py @@ -30,6 +30,7 @@ class Config: git_repositories: List[GitRepository] directories: List[DirectorySpec] renderer: Optional[str] + hard_line_breaks: bool def _expect_keys(obj: dict, allowed: set[str], context: str) -> None: @@ -49,11 +50,16 @@ def load_config(path: Path) -> Config: if not isinstance(data, dict): raise ConfigurationError("Config must be a JSON object") - _expect_keys(data, {"wordpress_root", "repo_storage_dir", "git_repositories", "directories", "renderer"}, "config") + _expect_keys( + data, + {"wordpress_root", "repo_storage_dir", "git_repositories", "directories", "renderer", "hard_line_breaks"}, + "config", + ) wordpress_root = _require_path(data, "wordpress_root", required=True) repo_storage_dir = _require_path(data, "repo_storage_dir", required=True) renderer = _require_renderer(data.get("renderer"), context="config.renderer") + hard_line_breaks = _require_bool_optional(data.get("hard_line_breaks"), context="config.hard_line_breaks") git_repositories = [] for idx, repo in enumerate(data.get("git_repositories", []) or []): @@ -88,6 +94,7 @@ def load_config(path: Path) -> Config: git_repositories=git_repositories, directories=directories, renderer=renderer, + hard_line_breaks=False if hard_line_breaks is None else hard_line_breaks, ) @@ -116,3 +123,11 @@ def _require_renderer(value: object, context: str) -> Optional[str]: if renderer not in {"default", "py-gfm", "pandoc"}: raise ConfigurationError(f"{context} must be one of: default, py-gfm, pandoc") return renderer + + +def _require_bool_optional(value: object, context: str) -> Optional[bool]: + if value is None: + return None + if not isinstance(value, bool): + raise ConfigurationError(f"{context} must be a boolean") + return value diff --git a/src/evaluation.py b/src/evaluation.py index e411dc3..d011181 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -22,6 +22,7 @@ class _Context: tags: InheritList author: InheritList renderer: Optional[str] + hard_line_breaks: bool subdirectories: InheritList manifest_chain: List[Path] @@ -47,6 +48,7 @@ def evaluate( tags=InheritList(), author=InheritList(), renderer=config.renderer, + hard_line_breaks=config.hard_line_breaks, subdirectories=InheritList(), manifest_chain=[], ), @@ -144,6 +146,11 @@ def _evaluate_directory( effective_tags = _merge_inherit(context.tags, manifest.tags) effective_author = _merge_inherit(context.author, manifest.author) effective_renderer = manifest.renderer if manifest.renderer is not None else context.renderer + effective_hard_line_breaks = ( + manifest.hard_line_breaks + if manifest.hard_line_breaks is not None + else context.hard_line_breaks + ) effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -185,11 +192,17 @@ def _evaluate_directory( resolved_author = _resolve_author(effective_author.content, str(file_path), issues) resolved_renderer = spec.renderer if spec.renderer is not None else effective_renderer + resolved_hard_line_breaks = ( + spec.hard_line_breaks + if spec.hard_line_breaks is not None + else effective_hard_line_breaks + ) html = convert_markdown( markdown_body, context=str(file_path), issues=issues, renderer=resolved_renderer or "default", + hard_line_breaks=resolved_hard_line_breaks, ) if html is None: continue @@ -258,6 +271,7 @@ def _evaluate_directory( tags=effective_tags, author=effective_author, renderer=effective_renderer, + hard_line_breaks=effective_hard_line_breaks, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), diff --git a/src/manifest.py b/src/manifest.py index 0ab44a1..e6a53e4 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -24,7 +24,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "author", "renderer", "subdirectories", "files"} + allowed = {"categories", "tags", "author", "renderer", "hard_line_breaks", "subdirectories", "files"} extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -34,6 +34,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") renderer = _parse_renderer_field(data.get("renderer"), issues, f"{path}:renderer") + hard_line_breaks = _parse_bool_field(data.get("hard_line_breaks"), issues, f"{path}:hard_line_breaks") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -57,6 +58,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: "created_on", "last_modified", "renderer", + "hard_line_breaks", } if extra_file: issues.append( @@ -103,6 +105,11 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: created_on = _parse_datetime_field(file_cfg.get("created_on"), issues, f"{path}:{file_name}:created_on") last_modified = _parse_datetime_field(file_cfg.get("last_modified"), issues, f"{path}:{file_name}:last_modified") renderer_override = _parse_renderer_field(file_cfg.get("renderer"), issues, f"{path}:{file_name}:renderer") + hard_line_breaks_override = _parse_bool_field( + file_cfg.get("hard_line_breaks"), + issues, + f"{path}:{file_name}:hard_line_breaks", + ) if created_on and last_modified and last_modified < created_on: issues.append( ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) @@ -117,6 +124,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: created_on=created_on, last_modified=last_modified, renderer=renderer_override, + hard_line_breaks=hard_line_breaks_override, ) return Manifest( @@ -125,6 +133,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: tags=tags, author=author, renderer=renderer, + hard_line_breaks=hard_line_breaks, subdirectories=subdirectories, files=files, ) @@ -178,3 +187,12 @@ def _parse_renderer_field(value: object, issues: list[ValidationIssue], context: issues.append(ValidationIssue("Must be one of: default, py-gfm, pandoc", context=context)) return None return renderer + + +def _parse_bool_field(value: object, issues: list[ValidationIssue], context: str) -> bool | None: + if value is None: + return None + if not isinstance(value, bool): + issues.append(ValidationIssue("Must be a boolean", context=context)) + return None + return value diff --git a/src/markdown_utils.py b/src/markdown_utils.py index e51533f..39a1743 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -60,10 +60,14 @@ def convert_markdown( context: str, issues: list[ValidationIssue], renderer: str = "default", + hard_line_breaks: bool = False, ) -> str | None: if renderer == "default": try: - return md_lib.markdown(markdown_text, extensions=["extra"], output_format="html5") + extensions = ["extra"] + if hard_line_breaks: + extensions.append("nl2br") + return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None @@ -78,14 +82,17 @@ def convert_markdown( issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context)) return None try: - return md_lib.markdown(markdown_text, extensions=[extension_class()], output_format="html5") + extensions = [extension_class()] + if hard_line_breaks: + extensions.append("nl2br") + return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None if renderer == "pandoc": try: result = subprocess.run( - ["pandoc", "--from=markdown", "--to=html5"], + ["pandoc", f"--from={'markdown+hard_line_breaks' if hard_line_breaks else 'markdown'}", "--to=html5"], input=markdown_text, text=True, capture_output=True, diff --git a/src/models.py b/src/models.py index 23be2f6..d39b873 100644 --- a/src/models.py +++ b/src/models.py @@ -22,6 +22,7 @@ class FileSpec: created_on: Optional[datetime] last_modified: Optional[datetime] renderer: Optional[str] + hard_line_breaks: Optional[bool] @dataclass(frozen=True) @@ -31,6 +32,7 @@ class Manifest: tags: InheritList author: InheritList renderer: Optional[str] + hard_line_breaks: Optional[bool] subdirectories: InheritList files: Dict[str, FileSpec] diff --git a/src/scaffold.py b/src/scaffold.py index 77514df..bea43ac 100644 --- a/src/scaffold.py +++ b/src/scaffold.py @@ -15,6 +15,7 @@ def create_config(path: Path) -> None: "wordpress_root": "/path/to/wordpress", "repo_storage_dir": "/path/to/repo-storage", "renderer": "default", + "hard_line_breaks": False, "git_repositories": [ { "name": "example-repo", @@ -47,6 +48,7 @@ def create_manifest(directory: Path) -> Path: "tags": {"content": [], "inherit": True}, "author": {"content": [], "inherit": True}, "renderer": "default", + "hard_line_breaks": False, "subdirectories": {"content": [], "inherit": True}, "files": {}, } -- 2.43.0 From 4ad90fee4c7c3a659fd394dc851641c33cfa3cb2 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:23:02 -0500 Subject: [PATCH 09/12] added block html option --- configurations.md | 24 ++++++++++++++++++------ examples.md | 5 +++++ src/config.py | 13 ++++++++++++- src/evaluation.py | 14 ++++++++++++++ src/manifest.py | 20 +++++++++++++++++++- src/markdown_utils.py | 12 +++++++++--- src/models.py | 2 ++ src/scaffold.py | 2 ++ 8 files changed, 81 insertions(+), 11 deletions(-) diff --git a/configurations.md b/configurations.md index 660565d..f302a68 100644 --- a/configurations.md +++ b/configurations.md @@ -14,9 +14,12 @@ Top-level fields: Markdown renderer to use. Allowed values: `default`, `py-gfm`, `pandoc`. 4. `hard_line_breaks` (boolean, optional) If `true`, treat single newlines as hard line breaks. -5. `git_repositories` (array, optional) +5. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting + in the visual editor. +6. `git_repositories` (array, optional) List of git repositories to manage. Default is an empty list. -6. `directories` (array, optional) +7. `directories` (array, optional) List of non-git directories to manage. Default is an empty list. `git_repositories` entries: @@ -60,9 +63,12 @@ Top-level fields: 5. `hard_line_breaks` (boolean, optional) If `true`, treat single newlines as hard line breaks. If omitted, it inherits from the parent scope. -6. `subdirectories` (object, optional) +6. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting. + If omitted, it inherits from the parent scope. +7. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -7. `files` (object, optional) +8. `files` (object, optional) Mapping of Markdown file names to file-level configuration. `categories`, `tags`, `author`, and `subdirectories` objects: @@ -87,6 +93,9 @@ from the parent scope; if specified, it overrides the parent without an explicit The `hard_line_breaks` field inherits implicitly: if omitted, the value is inherited from the parent scope; if specified, it overrides the parent without an explicit `inherit` flag. +The `block_html` field inherits implicitly: if omitted, the value is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. Renderer dependencies: 1. `default` uses the Python `Markdown` library. @@ -113,10 +122,13 @@ Each value is an object with the following fields: 6. `hard_line_breaks` (boolean, optional) If `true`, treat single newlines as hard line breaks. If omitted, it inherits from the parent scope. -7. `categories` (object, optional) +7. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting. + If omitted, it inherits from the parent scope. +8. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -8. `tags` (object, optional) +9. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. diff --git a/examples.md b/examples.md index a3d2580..9b7bbab 100644 --- a/examples.md +++ b/examples.md @@ -13,6 +13,7 @@ Root directory manifest (`.wp-materialize.json`): "author": { "content": ["editorial"], "inherit": true }, "renderer": "pandoc", "hard_line_breaks": true, + "block_html": true, "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -24,6 +25,7 @@ Root directory manifest (`.wp-materialize.json`): "use_heading_as_title": { "level": 1, "strict": true }, "renderer": "py-gfm", "hard_line_breaks": false, + "block_html": false, "created_on": "2025-01-10 09:30", "last_modified": "2025-02-14 16:45" } @@ -54,6 +56,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", "hard_line_breaks": false, + "block_html": false, "git_repositories": [], "directories": [ { @@ -73,6 +76,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", "hard_line_breaks": false, + "block_html": false, "git_repositories": [ { "name": "content-repo", @@ -105,6 +109,7 @@ Subdirectory manifest (`design/.wp-materialize.json`): "repo_storage_dir": "/home/user/wp-materialize-repos", "renderer": "default", "hard_line_breaks": false, + "block_html": false, "git_repositories": [ { "name": "content-repo", diff --git a/src/config.py b/src/config.py index 7675f70..0a164ce 100644 --- a/src/config.py +++ b/src/config.py @@ -31,6 +31,7 @@ class Config: directories: List[DirectorySpec] renderer: Optional[str] hard_line_breaks: bool + block_html: bool def _expect_keys(obj: dict, allowed: set[str], context: str) -> None: @@ -52,7 +53,15 @@ def load_config(path: Path) -> Config: _expect_keys( data, - {"wordpress_root", "repo_storage_dir", "git_repositories", "directories", "renderer", "hard_line_breaks"}, + { + "wordpress_root", + "repo_storage_dir", + "git_repositories", + "directories", + "renderer", + "hard_line_breaks", + "block_html", + }, "config", ) @@ -60,6 +69,7 @@ def load_config(path: Path) -> Config: repo_storage_dir = _require_path(data, "repo_storage_dir", required=True) renderer = _require_renderer(data.get("renderer"), context="config.renderer") hard_line_breaks = _require_bool_optional(data.get("hard_line_breaks"), context="config.hard_line_breaks") + block_html = _require_bool_optional(data.get("block_html"), context="config.block_html") git_repositories = [] for idx, repo in enumerate(data.get("git_repositories", []) or []): @@ -95,6 +105,7 @@ def load_config(path: Path) -> Config: directories=directories, renderer=renderer, hard_line_breaks=False if hard_line_breaks is None else hard_line_breaks, + block_html=False if block_html is None else block_html, ) diff --git a/src/evaluation.py b/src/evaluation.py index d011181..c71ad35 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -23,6 +23,7 @@ class _Context: author: InheritList renderer: Optional[str] hard_line_breaks: bool + block_html: bool subdirectories: InheritList manifest_chain: List[Path] @@ -49,6 +50,7 @@ def evaluate( author=InheritList(), renderer=config.renderer, hard_line_breaks=config.hard_line_breaks, + block_html=config.block_html, subdirectories=InheritList(), manifest_chain=[], ), @@ -151,6 +153,11 @@ def _evaluate_directory( if manifest.hard_line_breaks is not None else context.hard_line_breaks ) + effective_block_html = ( + manifest.block_html + if manifest.block_html is not None + else context.block_html + ) effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -197,12 +204,18 @@ def _evaluate_directory( if spec.hard_line_breaks is not None else effective_hard_line_breaks ) + resolved_block_html = ( + spec.block_html + if spec.block_html is not None + else effective_block_html + ) html = convert_markdown( markdown_body, context=str(file_path), issues=issues, renderer=resolved_renderer or "default", hard_line_breaks=resolved_hard_line_breaks, + block_html=resolved_block_html, ) if html is None: continue @@ -272,6 +285,7 @@ def _evaluate_directory( author=effective_author, renderer=effective_renderer, hard_line_breaks=effective_hard_line_breaks, + block_html=effective_block_html, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), diff --git a/src/manifest.py b/src/manifest.py index e6a53e4..4fd1be7 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -24,7 +24,16 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "author", "renderer", "hard_line_breaks", "subdirectories", "files"} + allowed = { + "categories", + "tags", + "author", + "renderer", + "hard_line_breaks", + "block_html", + "subdirectories", + "files", + } extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -35,6 +44,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") renderer = _parse_renderer_field(data.get("renderer"), issues, f"{path}:renderer") hard_line_breaks = _parse_bool_field(data.get("hard_line_breaks"), issues, f"{path}:hard_line_breaks") + block_html = _parse_bool_field(data.get("block_html"), issues, f"{path}:block_html") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -59,6 +69,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: "last_modified", "renderer", "hard_line_breaks", + "block_html", } if extra_file: issues.append( @@ -110,6 +121,11 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues, f"{path}:{file_name}:hard_line_breaks", ) + block_html_override = _parse_bool_field( + file_cfg.get("block_html"), + issues, + f"{path}:{file_name}:block_html", + ) if created_on and last_modified and last_modified < created_on: issues.append( ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) @@ -125,6 +141,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: last_modified=last_modified, renderer=renderer_override, hard_line_breaks=hard_line_breaks_override, + block_html=block_html_override, ) return Manifest( @@ -134,6 +151,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: author=author, renderer=renderer, hard_line_breaks=hard_line_breaks, + block_html=block_html, subdirectories=subdirectories, files=files, ) diff --git a/src/markdown_utils.py b/src/markdown_utils.py index 39a1743..8ae44c2 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -61,13 +61,19 @@ def convert_markdown( issues: list[ValidationIssue], renderer: str = "default", hard_line_breaks: bool = False, + block_html: bool = False, ) -> str | None: + def wrap_blocks(html: str) -> str: + if not block_html: + return html + return f\"\\n{html}\\n\" + if renderer == "default": try: extensions = ["extra"] if hard_line_breaks: extensions.append("nl2br") - return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") + return wrap_blocks(md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")) except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None @@ -85,7 +91,7 @@ def convert_markdown( extensions = [extension_class()] if hard_line_breaks: extensions.append("nl2br") - return md_lib.markdown(markdown_text, extensions=extensions, output_format="html5") + return wrap_blocks(md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")) except Exception as exc: # pragma: no cover - depends on markdown internals issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) return None @@ -98,7 +104,7 @@ def convert_markdown( capture_output=True, check=True, ) - return result.stdout + return wrap_blocks(result.stdout) except FileNotFoundError as exc: issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context)) return None diff --git a/src/models.py b/src/models.py index d39b873..64892dc 100644 --- a/src/models.py +++ b/src/models.py @@ -23,6 +23,7 @@ class FileSpec: last_modified: Optional[datetime] renderer: Optional[str] hard_line_breaks: Optional[bool] + block_html: Optional[bool] @dataclass(frozen=True) @@ -33,6 +34,7 @@ class Manifest: author: InheritList renderer: Optional[str] hard_line_breaks: Optional[bool] + block_html: Optional[bool] subdirectories: InheritList files: Dict[str, FileSpec] diff --git a/src/scaffold.py b/src/scaffold.py index bea43ac..b4e6b8d 100644 --- a/src/scaffold.py +++ b/src/scaffold.py @@ -16,6 +16,7 @@ def create_config(path: Path) -> None: "repo_storage_dir": "/path/to/repo-storage", "renderer": "default", "hard_line_breaks": False, + "block_html": False, "git_repositories": [ { "name": "example-repo", @@ -49,6 +50,7 @@ def create_manifest(directory: Path) -> Path: "author": {"content": [], "inherit": True}, "renderer": "default", "hard_line_breaks": False, + "block_html": False, "subdirectories": {"content": [], "inherit": True}, "files": {}, } -- 2.43.0 From b272c928d43a511d08a43ee1ff4d9a9e954d18df Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:23:49 -0500 Subject: [PATCH 10/12] fixed syntax issue --- src/markdown_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/markdown_utils.py b/src/markdown_utils.py index 8ae44c2..9769166 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -66,7 +66,7 @@ def convert_markdown( def wrap_blocks(html: str) -> str: if not block_html: return html - return f\"\\n{html}\\n\" + return f"\n{html}\n" if renderer == "default": try: -- 2.43.0 From 094a283ba9cc7da941720da6ac49d15507c00f35 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:30:11 -0500 Subject: [PATCH 11/12] renamed evaluate to validate --- README.md | 6 +- src/cli.py | 20 +- src/evaluation.py | 469 ---------------------------------------------- 3 files changed, 13 insertions(+), 482 deletions(-) delete mode 100644 src/evaluation.py diff --git a/README.md b/README.md index c08c86c..8ba48c9 100644 --- a/README.md +++ b/README.md @@ -78,13 +78,13 @@ State is stored separately (created on first successful apply): ## Usage -Dry-run evaluation: +Dry-run validation: ```bash -wp-materialize evaluate +wp-materialize validate ``` -Apply (evaluate, then materialize): +Apply (validate, then materialize): ```bash wp-materialize apply diff --git a/src/cli.py b/src/cli.py index c9ea642..c26b3ab 100644 --- a/src/cli.py +++ b/src/cli.py @@ -8,7 +8,7 @@ from pathlib import Path from .apply import apply_changes from .config import load_config from .errors import ConfigurationError, MaterializeError, ValidationError -from .evaluation import evaluate +from .validation import validate from .local_export import export_local from .scaffold import add_dir_to_manifest, add_file_to_manifest, create_config, create_manifest, resolve_manifest_dir from .state import load_state @@ -47,13 +47,13 @@ def main() -> int: common.add_argument( "--json", action="store_true", - help="Output evaluation summary as JSON.", + help="Output validation summary as JSON.", ) subparsers = parser.add_subparsers(dest="command", metavar="command") subparsers.add_parser( - "evaluate", + "validate", parents=[common], help="Validate config/manifests and plan changes (no WP writes).", description="Validate config/manifests, convert Markdown, and plan changes without writing to WordPress.", @@ -61,8 +61,8 @@ def main() -> int: subparsers.add_parser( "apply", parents=[common], - help="Evaluate then create/update WordPress posts and taxonomy.", - description="Evaluate, then create categories/tags and create or update posts in WordPress.", + help="Validate then create/update WordPress posts and taxonomy.", + description="Validate, then create categories/tags and create or update posts in WordPress.", ) local_parser = subparsers.add_parser( "local", @@ -172,7 +172,7 @@ def main() -> int: try: config = load_config(args.config) state = load_state(args.state) - result = evaluate( + result = validate( config, state, sync_repos=not args.no_sync, @@ -187,9 +187,9 @@ def main() -> int: return 1 if args.json: - print(_evaluation_json(result)) + print(_validation_json(result)) else: - print(_evaluation_summary(result)) + print(_validation_summary(result)) if args.command == "local": output_dir = getattr(args, "output_dir", None) @@ -224,7 +224,7 @@ def _default_state_path() -> Path: return Path.home() / ".config" / "wp-materialize" / "state.json" -def _evaluation_summary(result) -> str: +def _validation_summary(result) -> str: total = len(result.posts) updates = sum(1 for post in result.posts if post.should_update) categories = len(result.taxonomy_to_create.missing_categories) @@ -238,7 +238,7 @@ def _evaluation_summary(result) -> str: return "\n".join(lines) -def _evaluation_json(result) -> str: +def _validation_json(result) -> str: payload = { "posts": [ { diff --git a/src/evaluation.py b/src/evaluation.py deleted file mode 100644 index c71ad35..0000000 --- a/src/evaluation.py +++ /dev/null @@ -1,469 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path -import shutil -from typing import Dict, List, Optional, Set - -from .config import Config -from .errors import ValidationError, ValidationIssue -from .git_utils import ensure_repo, git_first_timestamp, git_timestamp -from .manifest import load_manifest -from .markdown_utils import convert_markdown, extract_title -from .models import EvaluationResult, InheritList, PostPlan, Source, TaxonomyPlan -from .state import State -from .wp_cli import WordPressCLI - - -@dataclass -class _Context: - categories: InheritList - tags: InheritList - author: InheritList - renderer: Optional[str] - hard_line_breaks: bool - block_html: bool - subdirectories: InheritList - manifest_chain: List[Path] - - -def evaluate( - config: Config, - state: State, - sync_repos: bool, - force_new: bool = False, - skip_wp_checks: bool = False, -) -> EvaluationResult: - issues: List[ValidationIssue] = [] - - sources = _load_sources(config, sync_repos, issues) - - posts: List[PostPlan] = [] - for source, content_root in sources: - _evaluate_directory( - source=source, - directory=content_root, - context=_Context( - categories=InheritList(), - tags=InheritList(), - author=InheritList(), - renderer=config.renderer, - hard_line_breaks=config.hard_line_breaks, - block_html=config.block_html, - subdirectories=InheritList(), - manifest_chain=[], - ), - state=state, - issues=issues, - posts=posts, - force_new=force_new, - ) - - missing_categories: List[List[str]] = [] - missing_tags: List[str] = [] - if not skip_wp_checks: - if shutil.which("wp") is None: - issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) - categories = [] - tag_names: Set[str] = set() - try: - wp = WordPressCLI(config.wordpress_root) - categories = wp.list_categories() - tags = wp.list_tags() - tag_names = {tag.name for tag in tags} - except Exception as exc: - issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) - - missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) - - if issues: - raise ValidationError(issues) - - return EvaluationResult( - posts=posts, - taxonomy_to_create=TaxonomyPlan(missing_categories=missing_categories, missing_tags=missing_tags), - ) - - -def _load_sources( - config: Config, - sync_repos: bool, - issues: List[ValidationIssue], -) -> List[tuple[Source, Path]]: - sources: List[tuple[Source, Path]] = [] - - for repo in config.git_repositories: - repo_path = config.repo_storage_dir / repo.name - try: - ensure_repo(repo_path, repo.url, repo.branch, sync=sync_repos) - except Exception as exc: - issues.append(ValidationIssue(str(exc), context=str(repo_path))) - continue - content_root = repo_path / repo.root_subdir if repo.root_subdir else repo_path - if not content_root.exists(): - issues.append(ValidationIssue("Repository content root missing", context=str(content_root))) - continue - sources.append( - ( - Source(name=repo.name, root_path=content_root, identity_root=repo_path, kind="git"), - content_root, - ) - ) - - for directory in config.directories: - root_path = directory.path - if not root_path.exists(): - issues.append(ValidationIssue("Directory not found", context=str(root_path))) - continue - content_root = root_path / directory.root_subdir if directory.root_subdir else root_path - if not content_root.exists(): - issues.append(ValidationIssue("Directory content root missing", context=str(content_root))) - continue - sources.append( - ( - Source(name=directory.name, root_path=content_root, identity_root=root_path, kind="dir"), - content_root, - ) - ) - - return sources - - -def _evaluate_directory( - source: Source, - directory: Path, - context: _Context, - state: State, - issues: List[ValidationIssue], - posts: List[PostPlan], - force_new: bool, -) -> None: - manifest_path = directory / ".wp-materialize.json" - manifest = load_manifest(manifest_path, issues) - if manifest is None: - return - - effective_categories = _merge_inherit(context.categories, manifest.categories) - effective_tags = _merge_inherit(context.tags, manifest.tags) - effective_author = _merge_inherit(context.author, manifest.author) - effective_renderer = manifest.renderer if manifest.renderer is not None else context.renderer - effective_hard_line_breaks = ( - manifest.hard_line_breaks - if manifest.hard_line_breaks is not None - else context.hard_line_breaks - ) - effective_block_html = ( - manifest.block_html - if manifest.block_html is not None - else context.block_html - ) - effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) - - manifest_chain = context.manifest_chain + [manifest.path] - - for file_name, spec in manifest.files.items(): - file_path = directory / file_name - if not file_path.exists(): - issues.append(ValidationIssue("File not found", context=str(file_path))) - continue - - try: - content = file_path.read_text(encoding="utf-8") - except Exception as exc: - issues.append(ValidationIssue(f"Failed to read file: {exc}", context=str(file_path))) - continue - - title = spec.title - markdown_body = content - if spec.use_heading_level is not None: - extracted = extract_title( - content, - level=spec.use_heading_level, - strict=spec.use_heading_strict, - context=str(file_path), - issues=issues, - ) - if extracted is None: - continue - title, markdown_body = extracted - elif not title: - issues.append(ValidationIssue("Missing title (title or use_heading_as_title required)", context=str(file_path))) - continue - - resolved_categories = _resolve_overrides(effective_categories, spec.categories) - resolved_tags = _resolve_overrides(effective_tags, spec.tags) - - resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) - resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) - resolved_author = _resolve_author(effective_author.content, str(file_path), issues) - - resolved_renderer = spec.renderer if spec.renderer is not None else effective_renderer - resolved_hard_line_breaks = ( - spec.hard_line_breaks - if spec.hard_line_breaks is not None - else effective_hard_line_breaks - ) - resolved_block_html = ( - spec.block_html - if spec.block_html is not None - else effective_block_html - ) - html = convert_markdown( - markdown_body, - context=str(file_path), - issues=issues, - renderer=resolved_renderer or "default", - hard_line_breaks=resolved_hard_line_breaks, - block_html=resolved_block_html, - ) - if html is None: - continue - - relative_path = _relative_path(file_path, source.identity_root, issues) - if relative_path is None: - continue - - timestamps = [] - ts = _timestamp_for_path(source, source.identity_root, relative_path, issues) - if ts is None: - continue - timestamps.append(ts) - - for manifest_file in manifest_chain: - manifest_rel = _relative_path(manifest_file, source.identity_root, issues) - if manifest_rel is None: - continue - ts_manifest = _timestamp_for_path(source, source.identity_root, manifest_rel, issues) - if ts_manifest is None: - continue - timestamps.append(ts_manifest) - - source_timestamp = max(timestamps) - identity = f"{source.name}:{relative_path}" - cached_entry = state.posts.get(identity) - cached_ts = cached_entry.source_timestamp if cached_entry else None - should_update = True if force_new else (cached_ts is None or source_timestamp > cached_ts) - created_on, last_modified = _resolve_post_datetimes( - source=source, - identity_root=source.identity_root, - relative_path=relative_path, - spec=spec, - issues=issues, - ) - - posts.append( - PostPlan( - source=source, - identity=identity, - relative_path=relative_path, - absolute_path=file_path, - title=title, - html=html, - categories=resolved_categories, - tags=resolved_tags, - author=resolved_author, - source_timestamp=source_timestamp, - cached_timestamp=cached_ts, - should_update=should_update, - created_on=created_on, - last_modified=last_modified, - ) - ) - - for subdir in effective_subdirs.content: - subdir_path = directory / subdir - if not subdir_path.exists(): - issues.append(ValidationIssue("Missing subdirectory", context=str(subdir_path))) - continue - _evaluate_directory( - source=source, - directory=subdir_path, - context=_Context( - categories=effective_categories, - tags=effective_tags, - author=effective_author, - renderer=effective_renderer, - hard_line_breaks=effective_hard_line_breaks, - block_html=effective_block_html, - subdirectories=effective_subdirs, - manifest_chain=manifest_chain, - ), - state=state, - issues=issues, - posts=posts, - force_new=force_new, - ) - - -def _merge_inherit(parent: InheritList, child: InheritList) -> InheritList: - if child.inherit: - content = parent.content + child.content - else: - content = child.content - return InheritList(content=content, inherit=True) - - -def _resolve_overrides(parent: InheritList, override: Optional[InheritList]) -> List[str]: - if override is None: - return list(parent.content) - if override.inherit: - return parent.content + override.content - return list(override.content) - - -def _normalize_list(values: List[str], label: str, context: str, issues: List[ValidationIssue]) -> List[str]: - normalized: List[str] = [] - seen: Set[str] = set() - for value in values: - cleaned = value.strip() - if not cleaned: - issues.append(ValidationIssue(f"Empty {label} entry", context=context)) - continue - if label == "category": - parts = [part.strip() for part in cleaned.split("/")] - if any(not part for part in parts): - issues.append(ValidationIssue(f"Invalid category path: {cleaned}", context=context)) - continue - cleaned = "/".join(parts) - if cleaned not in seen: - seen.add(cleaned) - normalized.append(cleaned) - return normalized - - -def _resolve_author(values: List[str], context: str, issues: List[ValidationIssue]) -> Optional[str]: - normalized = _normalize_list(values, "author", context, issues) - if not normalized: - return None - if len(normalized) > 1: - issues.append(ValidationIssue("Multiple authors specified; only one is allowed", context=context)) - return None - return normalized[0] - - -def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: - try: - return str(path.relative_to(root)) - except ValueError: - issues.append(ValidationIssue("Path is outside identity root", context=str(path))) - return None - - -def _timestamp_for_path( - source: Source, - identity_root: Path, - relative_path: str, - issues: List[ValidationIssue], -) -> Optional[int]: - if source.kind == "git": - try: - return git_timestamp(identity_root, relative_path) - except Exception as exc: - issues.append(ValidationIssue(str(exc), context=relative_path)) - return None - try: - return int((identity_root / relative_path).stat().st_mtime) - except Exception as exc: - issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) - return None - - -def _resolve_post_datetimes( - source: Source, - identity_root: Path, - relative_path: str, - spec, - issues: List[ValidationIssue], -) -> tuple[Optional[str], Optional[str]]: - created_dt = spec.created_on - modified_dt = spec.last_modified - - if created_dt is None or modified_dt is None: - inferred = _infer_file_timestamps(source, identity_root, relative_path, issues) - if inferred is None: - return None, None - inferred_created, inferred_modified = inferred - if created_dt is None: - created_dt = datetime.fromtimestamp(inferred_created) - if modified_dt is None: - modified_dt = datetime.fromtimestamp(inferred_modified) - - if created_dt and modified_dt and modified_dt < created_dt: - issues.append( - ValidationIssue("last_modified cannot be earlier than created_on", context=relative_path) - ) - return None, None - - created_on = _format_wp_datetime(created_dt) if created_dt else None - last_modified = _format_wp_datetime(modified_dt) if modified_dt else None - return created_on, last_modified - - -def _infer_file_timestamps( - source: Source, - identity_root: Path, - relative_path: str, - issues: List[ValidationIssue], -) -> Optional[tuple[int, int]]: - if source.kind == "git": - try: - created_ts = git_first_timestamp(identity_root, relative_path) - modified_ts = git_timestamp(identity_root, relative_path) - return created_ts, modified_ts - except Exception: - pass - try: - stat = (identity_root / relative_path).stat() - return int(stat.st_ctime), int(stat.st_mtime) - except Exception as exc: - issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) - return None - - -def _format_wp_datetime(value: datetime) -> str: - return value.strftime("%Y-%m-%d %H:%M:%S") - - -def _plan_taxonomy( - posts: List[PostPlan], - categories, # list of CategoryTerm - existing_tags: Set[str], -) -> tuple[List[List[str]], List[str]]: - category_map: Dict[tuple[int, str], int] = {} - for category in categories: - category_map[(category.parent, category.name)] = category.term_id - - missing_paths: List[List[str]] = [] - seen_missing: Set[tuple[str, ...]] = set() - missing_tags: List[str] = [] - seen_tags: Set[str] = set() - - for post in posts: - if not post.should_update: - continue - for tag in post.tags: - if tag not in existing_tags: - if tag not in seen_tags: - seen_tags.add(tag) - missing_tags.append(tag) - for path in post.categories: - segments = [segment for segment in path.split("/") if segment] - if not segments: - continue - parent = 0 - missing = False - for segment in segments: - key = (parent, segment) - if key in category_map: - parent = category_map[key] - continue - missing = True - break - if missing: - key = tuple(segments) - if key not in seen_missing: - seen_missing.add(key) - missing_paths.append(list(segments)) - - return missing_paths, missing_tags -- 2.43.0 From 977679c37f8fe0dbc8e942e542176c7997f2e188 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 18:30:33 -0500 Subject: [PATCH 12/12] renamed evaluate to validate --- src/validation.py | 469 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 469 insertions(+) create mode 100644 src/validation.py diff --git a/src/validation.py b/src/validation.py new file mode 100644 index 0000000..c7fb169 --- /dev/null +++ b/src/validation.py @@ -0,0 +1,469 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +import shutil +from typing import Dict, List, Optional, Set + +from .config import Config +from .errors import ValidationError, ValidationIssue +from .git_utils import ensure_repo, git_first_timestamp, git_timestamp +from .manifest import load_manifest +from .markdown_utils import convert_markdown, extract_title +from .models import EvaluationResult, InheritList, PostPlan, Source, TaxonomyPlan +from .state import State +from .wp_cli import WordPressCLI + + +@dataclass +class _Context: + categories: InheritList + tags: InheritList + author: InheritList + renderer: Optional[str] + hard_line_breaks: bool + block_html: bool + subdirectories: InheritList + manifest_chain: List[Path] + + +def validate( + config: Config, + state: State, + sync_repos: bool, + force_new: bool = False, + skip_wp_checks: bool = False, +) -> EvaluationResult: + issues: List[ValidationIssue] = [] + + sources = _load_sources(config, sync_repos, issues) + + posts: List[PostPlan] = [] + for source, content_root in sources: + _validate_directory( + source=source, + directory=content_root, + context=_Context( + categories=InheritList(), + tags=InheritList(), + author=InheritList(), + renderer=config.renderer, + hard_line_breaks=config.hard_line_breaks, + block_html=config.block_html, + subdirectories=InheritList(), + manifest_chain=[], + ), + state=state, + issues=issues, + posts=posts, + force_new=force_new, + ) + + missing_categories: List[List[str]] = [] + missing_tags: List[str] = [] + if not skip_wp_checks: + if shutil.which("wp") is None: + issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) + categories = [] + tag_names: Set[str] = set() + try: + wp = WordPressCLI(config.wordpress_root) + categories = wp.list_categories() + tags = wp.list_tags() + tag_names = {tag.name for tag in tags} + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) + + missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) + + if issues: + raise ValidationError(issues) + + return EvaluationResult( + posts=posts, + taxonomy_to_create=TaxonomyPlan(missing_categories=missing_categories, missing_tags=missing_tags), + ) + + +def _load_sources( + config: Config, + sync_repos: bool, + issues: List[ValidationIssue], +) -> List[tuple[Source, Path]]: + sources: List[tuple[Source, Path]] = [] + + for repo in config.git_repositories: + repo_path = config.repo_storage_dir / repo.name + try: + ensure_repo(repo_path, repo.url, repo.branch, sync=sync_repos) + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(repo_path))) + continue + content_root = repo_path / repo.root_subdir if repo.root_subdir else repo_path + if not content_root.exists(): + issues.append(ValidationIssue("Repository content root missing", context=str(content_root))) + continue + sources.append( + ( + Source(name=repo.name, root_path=content_root, identity_root=repo_path, kind="git"), + content_root, + ) + ) + + for directory in config.directories: + root_path = directory.path + if not root_path.exists(): + issues.append(ValidationIssue("Directory not found", context=str(root_path))) + continue + content_root = root_path / directory.root_subdir if directory.root_subdir else root_path + if not content_root.exists(): + issues.append(ValidationIssue("Directory content root missing", context=str(content_root))) + continue + sources.append( + ( + Source(name=directory.name, root_path=content_root, identity_root=root_path, kind="dir"), + content_root, + ) + ) + + return sources + + +def _validate_directory( + source: Source, + directory: Path, + context: _Context, + state: State, + issues: List[ValidationIssue], + posts: List[PostPlan], + force_new: bool, +) -> None: + manifest_path = directory / ".wp-materialize.json" + manifest = load_manifest(manifest_path, issues) + if manifest is None: + return + + effective_categories = _merge_inherit(context.categories, manifest.categories) + effective_tags = _merge_inherit(context.tags, manifest.tags) + effective_author = _merge_inherit(context.author, manifest.author) + effective_renderer = manifest.renderer if manifest.renderer is not None else context.renderer + effective_hard_line_breaks = ( + manifest.hard_line_breaks + if manifest.hard_line_breaks is not None + else context.hard_line_breaks + ) + effective_block_html = ( + manifest.block_html + if manifest.block_html is not None + else context.block_html + ) + effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) + + manifest_chain = context.manifest_chain + [manifest.path] + + for file_name, spec in manifest.files.items(): + file_path = directory / file_name + if not file_path.exists(): + issues.append(ValidationIssue("File not found", context=str(file_path))) + continue + + try: + content = file_path.read_text(encoding="utf-8") + except Exception as exc: + issues.append(ValidationIssue(f"Failed to read file: {exc}", context=str(file_path))) + continue + + title = spec.title + markdown_body = content + if spec.use_heading_level is not None: + extracted = extract_title( + content, + level=spec.use_heading_level, + strict=spec.use_heading_strict, + context=str(file_path), + issues=issues, + ) + if extracted is None: + continue + title, markdown_body = extracted + elif not title: + issues.append(ValidationIssue("Missing title (title or use_heading_as_title required)", context=str(file_path))) + continue + + resolved_categories = _resolve_overrides(effective_categories, spec.categories) + resolved_tags = _resolve_overrides(effective_tags, spec.tags) + + resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) + resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) + resolved_author = _resolve_author(effective_author.content, str(file_path), issues) + + resolved_renderer = spec.renderer if spec.renderer is not None else effective_renderer + resolved_hard_line_breaks = ( + spec.hard_line_breaks + if spec.hard_line_breaks is not None + else effective_hard_line_breaks + ) + resolved_block_html = ( + spec.block_html + if spec.block_html is not None + else effective_block_html + ) + html = convert_markdown( + markdown_body, + context=str(file_path), + issues=issues, + renderer=resolved_renderer or "default", + hard_line_breaks=resolved_hard_line_breaks, + block_html=resolved_block_html, + ) + if html is None: + continue + + relative_path = _relative_path(file_path, source.identity_root, issues) + if relative_path is None: + continue + + timestamps = [] + ts = _timestamp_for_path(source, source.identity_root, relative_path, issues) + if ts is None: + continue + timestamps.append(ts) + + for manifest_file in manifest_chain: + manifest_rel = _relative_path(manifest_file, source.identity_root, issues) + if manifest_rel is None: + continue + ts_manifest = _timestamp_for_path(source, source.identity_root, manifest_rel, issues) + if ts_manifest is None: + continue + timestamps.append(ts_manifest) + + source_timestamp = max(timestamps) + identity = f"{source.name}:{relative_path}" + cached_entry = state.posts.get(identity) + cached_ts = cached_entry.source_timestamp if cached_entry else None + should_update = True if force_new else (cached_ts is None or source_timestamp > cached_ts) + created_on, last_modified = _resolve_post_datetimes( + source=source, + identity_root=source.identity_root, + relative_path=relative_path, + spec=spec, + issues=issues, + ) + + posts.append( + PostPlan( + source=source, + identity=identity, + relative_path=relative_path, + absolute_path=file_path, + title=title, + html=html, + categories=resolved_categories, + tags=resolved_tags, + author=resolved_author, + source_timestamp=source_timestamp, + cached_timestamp=cached_ts, + should_update=should_update, + created_on=created_on, + last_modified=last_modified, + ) + ) + + for subdir in effective_subdirs.content: + subdir_path = directory / subdir + if not subdir_path.exists(): + issues.append(ValidationIssue("Missing subdirectory", context=str(subdir_path))) + continue + _validate_directory( + source=source, + directory=subdir_path, + context=_Context( + categories=effective_categories, + tags=effective_tags, + author=effective_author, + renderer=effective_renderer, + hard_line_breaks=effective_hard_line_breaks, + block_html=effective_block_html, + subdirectories=effective_subdirs, + manifest_chain=manifest_chain, + ), + state=state, + issues=issues, + posts=posts, + force_new=force_new, + ) + + +def _merge_inherit(parent: InheritList, child: InheritList) -> InheritList: + if child.inherit: + content = parent.content + child.content + else: + content = child.content + return InheritList(content=content, inherit=True) + + +def _resolve_overrides(parent: InheritList, override: Optional[InheritList]) -> List[str]: + if override is None: + return list(parent.content) + if override.inherit: + return parent.content + override.content + return list(override.content) + + +def _normalize_list(values: List[str], label: str, context: str, issues: List[ValidationIssue]) -> List[str]: + normalized: List[str] = [] + seen: Set[str] = set() + for value in values: + cleaned = value.strip() + if not cleaned: + issues.append(ValidationIssue(f"Empty {label} entry", context=context)) + continue + if label == "category": + parts = [part.strip() for part in cleaned.split("/")] + if any(not part for part in parts): + issues.append(ValidationIssue(f"Invalid category path: {cleaned}", context=context)) + continue + cleaned = "/".join(parts) + if cleaned not in seen: + seen.add(cleaned) + normalized.append(cleaned) + return normalized + + +def _resolve_author(values: List[str], context: str, issues: List[ValidationIssue]) -> Optional[str]: + normalized = _normalize_list(values, "author", context, issues) + if not normalized: + return None + if len(normalized) > 1: + issues.append(ValidationIssue("Multiple authors specified; only one is allowed", context=context)) + return None + return normalized[0] + + +def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: + try: + return str(path.relative_to(root)) + except ValueError: + issues.append(ValidationIssue("Path is outside identity root", context=str(path))) + return None + + +def _timestamp_for_path( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[int]: + if source.kind == "git": + try: + return git_timestamp(identity_root, relative_path) + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=relative_path)) + return None + try: + return int((identity_root / relative_path).stat().st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _resolve_post_datetimes( + source: Source, + identity_root: Path, + relative_path: str, + spec, + issues: List[ValidationIssue], +) -> tuple[Optional[str], Optional[str]]: + created_dt = spec.created_on + modified_dt = spec.last_modified + + if created_dt is None or modified_dt is None: + inferred = _infer_file_timestamps(source, identity_root, relative_path, issues) + if inferred is None: + return None, None + inferred_created, inferred_modified = inferred + if created_dt is None: + created_dt = datetime.fromtimestamp(inferred_created) + if modified_dt is None: + modified_dt = datetime.fromtimestamp(inferred_modified) + + if created_dt and modified_dt and modified_dt < created_dt: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=relative_path) + ) + return None, None + + created_on = _format_wp_datetime(created_dt) if created_dt else None + last_modified = _format_wp_datetime(modified_dt) if modified_dt else None + return created_on, last_modified + + +def _infer_file_timestamps( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[tuple[int, int]]: + if source.kind == "git": + try: + created_ts = git_first_timestamp(identity_root, relative_path) + modified_ts = git_timestamp(identity_root, relative_path) + return created_ts, modified_ts + except Exception: + pass + try: + stat = (identity_root / relative_path).stat() + return int(stat.st_ctime), int(stat.st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _format_wp_datetime(value: datetime) -> str: + return value.strftime("%Y-%m-%d %H:%M:%S") + + +def _plan_taxonomy( + posts: List[PostPlan], + categories, # list of CategoryTerm + existing_tags: Set[str], +) -> tuple[List[List[str]], List[str]]: + category_map: Dict[tuple[int, str], int] = {} + for category in categories: + category_map[(category.parent, category.name)] = category.term_id + + missing_paths: List[List[str]] = [] + seen_missing: Set[tuple[str, ...]] = set() + missing_tags: List[str] = [] + seen_tags: Set[str] = set() + + for post in posts: + if not post.should_update: + continue + for tag in post.tags: + if tag not in existing_tags: + if tag not in seen_tags: + seen_tags.add(tag) + missing_tags.append(tag) + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + missing = False + for segment in segments: + key = (parent, segment) + if key in category_map: + parent = category_map[key] + continue + missing = True + break + if missing: + key = tuple(segments) + if key not in seen_missing: + seen_missing.add(key) + missing_paths.append(list(segments)) + + return missing_paths, missing_tags -- 2.43.0