From 68bfab9c1782399cef11c39769b6d9ea4a680cd1 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Wed, 4 Feb 2026 21:29:17 -0500 Subject: [PATCH] initial commit: codex draft --- .gitignore | 6 + AGENTS.md | 281 +++++++++++++++++++++++++++++++++++ README.md | 113 ++++++++++++++ configurations.md | 99 +++++++++++++ examples.md | 105 ++++++++++++++ pyproject.toml | 20 +++ requirements.txt | 1 + src/__init__.py | 3 + src/apply.py | 101 +++++++++++++ src/cli.py | 101 +++++++++++++ src/config.py | 104 +++++++++++++ src/errors.py | 34 +++++ src/evaluation.py | 331 ++++++++++++++++++++++++++++++++++++++++++ src/git_utils.py | 52 +++++++ src/manifest.py | 131 +++++++++++++++++ src/markdown_utils.py | 62 ++++++++ src/models.py | 63 ++++++++ src/state.py | 61 ++++++++ src/wp_cli.py | 170 ++++++++++++++++++++++ 19 files changed, 1838 insertions(+) create mode 100644 .gitignore create mode 100644 AGENTS.md create mode 100644 README.md create mode 100644 configurations.md create mode 100644 examples.md create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/apply.py create mode 100644 src/cli.py create mode 100644 src/config.py create mode 100644 src/errors.py create mode 100644 src/evaluation.py create mode 100644 src/git_utils.py create mode 100644 src/manifest.py create mode 100644 src/markdown_utils.py create mode 100644 src/models.py create mode 100644 src/state.py create mode 100644 src/wp_cli.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8709ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +.env +.venv/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..0c77c69 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,281 @@ +# wp-materialize – Agents Specification + +## Purpose + +`wp-materialize` is an **automation compiler** that materializes specified Markdown files in Git repositories (or local directories) into WordPress posts. Git / filesystem state is the **single source of truth**; WordPress is a **derived, materialized view**. + +The system is **declarative, atomic, incremental, and fail-fast**. It never guesses intent, never partially updates WordPress, and never mutates state unless correctness is proven ahead of time. + +This document is the **authoritative agent-facing spec** to be fed to Codex or other build agents. + +--- + +## Core Principles (Non-Negotiable) + +1. **Git / Filesystem as Source of Truth** + WordPress content must exactly reflect declared Markdown sources and manifests. + +2. **Declarative Configuration Only** + No implicit discovery, no heuristics, no inference. + +3. **Atomic Execution** + A full dry-run validation must succeed before *any* WordPress mutation occurs. + +4. **Incremental Updates** + Only content whose source timestamp is newer than its cached materialization timestamp may be updated. + +5. **Fail Fast, Fail Loud** + Any configuration, validation, or conversion error aborts the entire run. + +--- + +## High-Level Architecture + +The system operates in two strictly separated phases: + +### Phase 1: Pure Evaluation (Dry Run) + +* Read global config +* Discover and load repositories / directories +* Load and validate all `.wp-materialize.json` manifests +* Resolve inheritance (categories, tags, subdirectories) +* Convert Markdown → HTML (in-memory only) +* Resolve titles, timestamps, taxonomy +* Determine incremental update set +* Detect *all* errors + +**If any error occurs, execution stops here.** + +### Phase 2: Side-Effect Application + +* Create missing WordPress categories +* Create or update WordPress posts +* Update cached timestamps *only for successfully applied posts* + +--- + +## Global Configuration + +Location: + +``` +.config/wp-materialize/config.json +``` + +Responsibilities: + +* WordPress root directory (where `wp` CLI is executed) +* Repository storage directory +* List of Git repositories to clone / pull +* List of non-git directories to manage + +### Runtime State (Separate from Config) + +Mutable runtime state must be stored separately, e.g.: + +``` +.config/wp-materialize/state.json +``` + +State includes: + +* Last successful materialization timestamp per post +* Cached per-post source timestamps + +Config must remain declarative and diffable. + +--- + +## Repository and Directory Rules + +* Each managed directory **must** contain a `.wp-materialize.json` manifest. +* Any directory listed under `subdirectories` **must** contain its own manifest. +* Missing manifests are **hard errors**. +* No implicit recursion is allowed. + +--- + +## Per-Directory Manifest: `.wp-materialize.json` + +Each manifest defines a **scope boundary**. + +### Top-Level Fields + +#### `categories` + +```json +{ + "content": ["Systems", "Infrastructure"], + "inherit": true +} +``` + +* `content`: array of category paths +* `inherit: true` → append to parent effective categories +* `inherit: false` → override parent categories entirely + +#### `tags` + +```json +{ + "content": ["automation", "wordpress"], + "inherit": true +} +``` + +Semantics identical to `categories`. + +#### `subdirectories` + +```json +{ + "content": ["design", "notes"], + "inherit": true +} +``` + +* Controls traversal explicitly +* Included subdirectories **must** have their own manifest +* `inherit: false` cuts traversal + +--- + +## File-Level Configuration + +Each file listed under `files` represents a WordPress post. + +```json +"files": { + "post.md": { + "title": "Explicit Title", + "categories": { "content": ["Overrides"], "inherit": false }, + "tags": { "content": ["extra"], "inherit": true } + }, + + "essay.md": { + "use_heading_as_title": { + "level": 1, + "strict": true + } + } +} +``` + +### Title Rules + +* If `use_heading_as_title` is specified: + + * Extract the specified heading level + * Use it as the WordPress post title + * Remove that heading from the body + * Promote remaining headings by one level + * If `strict: true`, exactly one matching heading must exist + +* Otherwise, `title` **must** be provided + +--- + +## Markdown → HTML Conversion + +* Conversion occurs **only during dry run** +* No HTML is written or sent to WordPress during evaluation +* Conversion errors are fatal + +--- + +## Category Materialization + +* Categories are treated as **hierarchical paths** +* If a declared category path does not exist in WordPress: + + * It is **automatically created** during the apply phase +* Category creation: + + * Must be planned during dry run + * Must occur before post updates + +Tags are **not** auto-created. + +--- + +## Timestamps and Incremental Updates + +### Timestamp Sources + +* Git repository: + + * Use Git commit timestamps +* Non-git directory: + + * Use filesystem timestamps + +The source of timestamps must be **deterministic per repository**. + +### Cached Metadata + +* Each post stores a cached source timestamp representing the last **successful** materialization +* Failed runs **must not** update cached timestamps + +### Incremental Rule + +On each run: + +* Compare current source timestamp vs cached timestamp +* Only posts where `source_timestamp > cached_timestamp` are eligible for update +* Unchanged posts are treated as no-ops + +--- + +## Post Identity + +Each WordPress post must store stable metadata: + +``` +_wp_materialize_source = : +``` + +This identity is used for: + +* Idempotent updates +* Safe renames +* Incremental comparison + +--- + +## Atomicity Guarantee + +* If **any** dry-run validation fails: + + * No WordPress calls are executed + * No categories are created + * No cached timestamps are updated + +* Apply phase executes only after full validation succeeds + +--- + +## Error Handling + +All errors are fatal: + +* Missing manifests +* Invalid inheritance +* Invalid Markdown +* Missing or ambiguous titles +* Invalid category/tag resolution +* Timestamp resolution failures + +No warnings. No partial success. + +--- + +## Implementation Notes + +* Language: **Python** +* The implementation must prioritize: + + * Determinism + * Readable error messages + * Testable pure functions for evaluation phase + +This document is the **contract**. Implementation must not relax or reinterpret it. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c260a37 --- /dev/null +++ b/README.md @@ -0,0 +1,113 @@ +# wp-materialize + +`wp-materialize` is an automation compiler that materializes specified Markdown files into WordPress posts. +Git/filesystem state is the single source of truth; WordPress is a derived view. + +## Install + +```bash +python -m pip install -e . +``` + +## Source Layout + +The `wp_materialize` package lives directly under `src/` (single-package mapping). + +## Documentation + +- [configurations.md](configurations.md) +- [examples.md](examples.md) + +## Configuration + +Global config is required and must be a JSON object with these fields: + +1. `wordpress_root` (string, required): Path where `wp` CLI is executed. +2. `repo_storage_dir` (string, required): Directory where git repos are cloned. +3. `git_repositories` (array, optional): Git repos to manage. +4. `directories` (array, optional): Non-git directories to manage. + +`git_repositories` entries: + +1. `name` (string, required): Stable identifier for the repo. +2. `url` (string, required): Git clone URL. +3. `branch` (string, optional, default `main`): Branch to checkout. +4. `root_subdir` (string, optional): Subdirectory that contains manifests/content. + +`directories` entries: + +1. `name` (string, required): Stable identifier for the directory. +2. `path` (string, required): Filesystem path. +3. `root_subdir` (string, optional): Subdirectory that contains manifests/content. + +Global config (required): + +``` +~/.config/wp-materialize/config.json +``` + +Example: + +```json +{ + "wordpress_root": "/var/www/wordpress", + "repo_storage_dir": "/home/user/wp-materialize-repos", + "git_repositories": [ + { + "name": "content-repo", + "url": "https://github.com/example/content-repo.git", + "branch": "main", + "root_subdir": "posts" + } + ], + "directories": [ + { + "name": "local-notes", + "path": "/home/user/notes", + "root_subdir": "wordpress" + } + ] +} +``` + +State is stored separately (created on first successful apply): + +``` +~/.config/wp-materialize/state.json +``` + +## Usage + +Dry-run evaluation: + +```bash +wp-materialize evaluate +``` + +Apply (evaluate, then materialize): + +```bash +wp-materialize apply +``` + +Skip git sync: + +```bash +wp-materialize apply --no-sync +``` + +## Manifests + +Each managed directory must contain a `.wp-materialize.json` manifest. See `configurations.md` for the manifest guide. + +## Python Prerequisites + +1. Python 3.10+ +2. Packages: + - `Markdown>=3.6` + +Install dependencies: + +```bash +python -m pip install -r requirements.txt +``` diff --git a/configurations.md b/configurations.md new file mode 100644 index 0000000..0e9775a --- /dev/null +++ b/configurations.md @@ -0,0 +1,99 @@ +# Configuration Reference + +This document defines every JSON field for both the global config and per-directory manifests. + +## Global Config (`~/.config/wp-materialize/config.json`) + +Top-level fields: + +1. `wordpress_root` (string, required) + Path to the WordPress root directory where the `wp` CLI is executed. +2. `repo_storage_dir` (string, required) + Directory where git repositories are cloned or updated. +3. `git_repositories` (array, optional) + List of git repositories to manage. Default is an empty list. +4. `directories` (array, optional) + List of non-git directories to manage. Default is an empty list. + +`git_repositories` entries: + +1. `name` (string, required) + Stable identifier for the repository. Used to build post identity. +2. `url` (string, required) + Git clone URL. +3. `branch` (string, optional, default `main`) + Branch to checkout and pull. +4. `root_subdir` (string, optional) + Subdirectory within the repo that contains `.wp-materialize.json` and content. + If omitted or `null`, the repo root is used. + +`directories` entries: + +1. `name` (string, required) + Stable identifier for the directory. Used to build post identity. +2. `path` (string, required) + Filesystem path to the directory. +3. `root_subdir` (string, optional) + Subdirectory within the directory that contains `.wp-materialize.json` and content. + If omitted or `null`, the directory root is used. + +## Per-Directory Manifest (`.wp-materialize.json`) + +Each managed directory must contain a manifest. Manifests define a scope boundary. +No implicit traversal is allowed; subdirectories must be listed explicitly. + +Top-level fields: + +1. `categories` (object, optional) + Inherited category paths for this directory and its children. +2. `tags` (object, optional) + Inherited tags for this directory and its children. +3. `subdirectories` (object, optional) + Explicit list of subdirectories to traverse. +4. `files` (object, optional) + Mapping of Markdown file names to file-level configuration. + +`categories`, `tags`, and `subdirectories` objects: + +1. `content` (array of strings, optional) + List of values for the given field. + For `categories`, each string is a hierarchical path such as `Systems/Infrastructure`. + For `subdirectories`, each string is a directory name under the current directory. +2. `inherit` (boolean, optional, default `true`) + If `true`, append to the parent effective list. + If `false`, replace the parent list entirely. + +`files` entries: + +Each key is a Markdown file name (relative to the manifest directory). +Each value is an object with the following fields: + +1. `title` (string, required if `use_heading_as_title` is not set) + Explicit WordPress post title. +2. `use_heading_as_title` (object, optional) + Extracts a heading from the Markdown as the title and removes that heading + from the body while promoting remaining headings by one level. +3. `categories` (object, optional) + Overrides categories for this file. Uses the same `content` and `inherit` fields + as the top-level `categories` object. +4. `tags` (object, optional) + Overrides tags for this file. Uses the same `content` and `inherit` fields + as the top-level `tags` object. + +`use_heading_as_title` object: + +1. `level` (integer, required) + Heading level to extract, from `1` to `6`. +2. `strict` (boolean, optional, default `true`) + If `true`, exactly one matching heading must exist. + +## Post Identity + +Each post is identified with: + +``` +_wp_materialize_source = : +``` + +`source_name` is the `name` from the global config entry, and `relative_path` is +relative to the repo or directory root used for identity resolution. diff --git a/examples.md b/examples.md new file mode 100644 index 0000000..2fc4e74 --- /dev/null +++ b/examples.md @@ -0,0 +1,105 @@ +# Configuration Examples + +Reference: [configurations.md](configurations.md) + +## Per-Directory Manifests + +Root directory manifest (`.wp-materialize.json`): + +```json +{ + "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, + "tags": { "content": ["automation", "wordpress"], "inherit": true }, + "subdirectories": { "content": ["design", "notes"], "inherit": true }, + "files": { + "post.md": { + "title": "Explicit Title", + "categories": { "content": ["Overrides"], "inherit": false }, + "tags": { "content": ["extra"], "inherit": true } + }, + "essay.md": { + "use_heading_as_title": { "level": 1, "strict": true } + } + } +} +``` + +Subdirectory manifest (`design/.wp-materialize.json`): + +```json +{ + "categories": { "content": ["Design"], "inherit": true }, + "tags": { "content": ["ui"], "inherit": true }, + "subdirectories": { "content": [], "inherit": false }, + "files": { + "system.md": { + "use_heading_as_title": { "level": 1, "strict": true } + } + } +} +``` + +## Minimal (Directories Only) + +```json +{ + "wordpress_root": "/var/www/wordpress", + "repo_storage_dir": "/home/user/wp-materialize-repos", + "git_repositories": [], + "directories": [ + { + "name": "local-notes", + "path": "/home/user/notes", + "root_subdir": "wordpress" + } + ] +} +``` + +## Git Repositories + Directories + +```json +{ + "wordpress_root": "/var/www/wordpress", + "repo_storage_dir": "/home/user/wp-materialize-repos", + "git_repositories": [ + { + "name": "content-repo", + "url": "https://github.com/example/content-repo.git", + "branch": "main", + "root_subdir": "posts" + }, + { + "name": "docs-repo", + "url": "git@github.com:example/docs-repo.git", + "branch": "main", + "root_subdir": null + } + ], + "directories": [ + { + "name": "local-notes", + "path": "/home/user/notes", + "root_subdir": "wordpress" + } + ] +} +``` + +## Single Git Repository + +```json +{ + "wordpress_root": "/var/www/wordpress", + "repo_storage_dir": "/home/user/wp-materialize-repos", + "git_repositories": [ + { + "name": "content-repo", + "url": "https://github.com/example/content-repo.git", + "branch": "main", + "root_subdir": "posts" + } + ], + "directories": [] +} +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f8e0499 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "wp-materialize" +version = "0.1.0" +description = "Declarative compiler that materializes Markdown into WordPress posts." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "Markdown>=3.6", +] + +[project.scripts] +wp-materialize = "wp_materialize.cli:main" + +[tool.setuptools] +package-dir = { "wp_materialize" = "src" } +packages = ["wp_materialize"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cb286b3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Markdown>=3.6 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..a05eb9a --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["__version__"] + +__version__ = "0.1.0" diff --git a/src/apply.py b/src/apply.py new file mode 100644 index 0000000..e1cc99b --- /dev/null +++ b/src/apply.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import time +from typing import Dict, List, Set + +from .errors import WordPressError +from .models import EvaluationResult, PostPlan +from .state import PostState, State, save_state +from .wp_cli import CategoryTerm, WordPressCLI + + +def apply_changes( + result: EvaluationResult, + wp: WordPressCLI, + state: State, + state_path, +) -> None: + categories = wp.list_categories() + category_map = _build_category_map(categories) + + _create_missing_categories(result, wp, category_map) + + successes: Set[str] = set() + try: + for post in result.posts: + if not post.should_update: + continue + _apply_post(post, wp, category_map) + state.posts[post.identity] = PostState( + source_timestamp=post.source_timestamp, + materialized_at=int(time.time()), + ) + successes.add(post.identity) + except Exception: + if successes: + save_state(state_path, state) + raise + + save_state(state_path, state) + + +def _build_category_map(categories: List[CategoryTerm]) -> Dict[tuple[int, str], int]: + return {(category.parent, category.name): category.term_id for category in categories} + + +def _create_missing_categories( + result: EvaluationResult, + wp: WordPressCLI, + category_map: Dict[tuple[int, str], int], +) -> None: + paths = result.categories_to_create.missing_paths + paths = sorted(paths, key=len) + seen: Set[tuple[str, ...]] = set() + for segments in paths: + key = tuple(segments) + if key in seen: + continue + seen.add(key) + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key in category_map: + parent = category_map[map_key] + continue + new_id = wp.create_category(segment, parent) + category_map[(parent, segment)] = new_id + parent = new_id + + +def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, str], int]) -> None: + category_ids: List[int] = [] + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key not in category_map: + raise WordPressError(f"Missing category during apply: {path}") + parent = category_map[map_key] + category_ids.append(parent) + + post_id = wp.find_post_id(post.identity) + if post_id is None: + wp.create_post( + title=post.title, + content=post.html, + categories=category_ids, + tags=post.tags, + source_identity=post.identity, + ) + return + + wp.update_post( + post_id=post_id, + title=post.title, + content=post.html, + categories=category_ids, + tags=post.tags, + ) diff --git a/src/cli.py b/src/cli.py new file mode 100644 index 0000000..d64132f --- /dev/null +++ b/src/cli.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +from .apply import apply_changes +from .config import load_config +from .errors import ConfigurationError, MaterializeError, ValidationError +from .evaluation import evaluate +from .state import load_state +from .wp_cli import WordPressCLI + + +def main() -> int: + parser = argparse.ArgumentParser(description="wp-materialize") + parser.add_argument("command", nargs="?", choices=["evaluate", "apply"], default="evaluate") + parser.add_argument("--config", type=Path, default=_default_config_path()) + parser.add_argument("--state", type=Path, default=_default_state_path()) + parser.add_argument("--no-sync", action="store_true", help="Skip git clone/pull") + parser.add_argument("--json", action="store_true", help="Output evaluation summary as JSON") + + args = parser.parse_args() + + try: + config = load_config(args.config) + state = load_state(args.state) + result = evaluate(config, state, sync_repos=not args.no_sync) + except ValidationError as exc: + _print_validation_error(exc) + return 1 + except (ConfigurationError, MaterializeError) as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + + if args.json: + print(_evaluation_json(result)) + else: + print(_evaluation_summary(result)) + + if args.command == "apply": + wp = WordPressCLI(config.wordpress_root) + try: + apply_changes(result, wp, state, args.state) + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + print("Apply complete") + + return 0 + + +def _default_config_path() -> Path: + return Path.home() / ".config" / "wp-materialize" / "config.json" + + +def _default_state_path() -> Path: + return Path.home() / ".config" / "wp-materialize" / "state.json" + + +def _evaluation_summary(result) -> str: + total = len(result.posts) + updates = sum(1 for post in result.posts if post.should_update) + categories = len(result.categories_to_create.missing_paths) + lines = [ + f"Posts: {total}", + f"Posts to update: {updates}", + f"Categories to create: {categories}", + ] + return "\n".join(lines) + + +def _evaluation_json(result) -> str: + payload = { + "posts": [ + { + "identity": post.identity, + "relative_path": post.relative_path, + "title": post.title, + "source_timestamp": post.source_timestamp, + "cached_timestamp": post.cached_timestamp, + "should_update": post.should_update, + "categories": post.categories, + "tags": post.tags, + } + for post in result.posts + ], + "categories_to_create": result.categories_to_create.missing_paths, + } + return json.dumps(payload, indent=2) + + +def _print_validation_error(exc: ValidationError) -> None: + print("Validation failed:", file=sys.stderr) + for issue in exc.issues: + print(f"- {issue.format()}", file=sys.stderr) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..ae650a0 --- /dev/null +++ b/src/config.py @@ -0,0 +1,104 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional + +from .errors import ConfigurationError + + +@dataclass(frozen=True) +class GitRepository: + name: str + url: str + branch: str + root_subdir: Optional[str] + + +@dataclass(frozen=True) +class DirectorySpec: + name: str + path: Path + root_subdir: Optional[str] + + +@dataclass(frozen=True) +class Config: + wordpress_root: Path + repo_storage_dir: Path + git_repositories: List[GitRepository] + directories: List[DirectorySpec] + + +def _expect_keys(obj: dict, allowed: set[str], context: str) -> None: + extra = set(obj.keys()) - allowed + if extra: + raise ConfigurationError(f"Unexpected keys in {context}: {sorted(extra)}") + + +def load_config(path: Path) -> Config: + if not path.exists(): + raise ConfigurationError(f"Config not found: {path}") + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as exc: + raise ConfigurationError(f"Invalid JSON in config: {exc}") from exc + + if not isinstance(data, dict): + raise ConfigurationError("Config must be a JSON object") + + _expect_keys(data, {"wordpress_root", "repo_storage_dir", "git_repositories", "directories"}, "config") + + wordpress_root = _require_path(data, "wordpress_root", required=True) + repo_storage_dir = _require_path(data, "repo_storage_dir", required=True) + + git_repositories = [] + for idx, repo in enumerate(data.get("git_repositories", []) or []): + if not isinstance(repo, dict): + raise ConfigurationError(f"git_repositories[{idx}] must be an object") + _expect_keys(repo, {"name", "url", "branch", "root_subdir"}, f"git_repositories[{idx}]") + name = _require_str(repo, "name", context=f"git_repositories[{idx}]") + url = _require_str(repo, "url", context=f"git_repositories[{idx}]") + branch = repo.get("branch", "main") + if not isinstance(branch, str): + raise ConfigurationError(f"git_repositories[{idx}].branch must be a string") + root_subdir = repo.get("root_subdir") + if root_subdir is not None and not isinstance(root_subdir, str): + raise ConfigurationError(f"git_repositories[{idx}].root_subdir must be a string") + git_repositories.append(GitRepository(name=name, url=url, branch=branch, root_subdir=root_subdir)) + + directories = [] + for idx, entry in enumerate(data.get("directories", []) or []): + if not isinstance(entry, dict): + raise ConfigurationError(f"directories[{idx}] must be an object") + _expect_keys(entry, {"name", "path", "root_subdir"}, f"directories[{idx}]") + name = _require_str(entry, "name", context=f"directories[{idx}]") + path_value = _require_str(entry, "path", context=f"directories[{idx}]") + root_subdir = entry.get("root_subdir") + if root_subdir is not None and not isinstance(root_subdir, str): + raise ConfigurationError(f"directories[{idx}].root_subdir must be a string") + directories.append(DirectorySpec(name=name, path=Path(path_value), root_subdir=root_subdir)) + + return Config( + wordpress_root=wordpress_root, + repo_storage_dir=repo_storage_dir, + git_repositories=git_repositories, + directories=directories, + ) + + +def _require_str(data: dict, key: str, context: str) -> str: + value = data.get(key) + if not isinstance(value, str) or not value.strip(): + raise ConfigurationError(f"{context}.{key} must be a non-empty string") + return value + + +def _require_path(data: dict, key: str, required: bool) -> Path: + value = data.get(key) + if value is None and not required: + return Path(".") + if not isinstance(value, str) or not value.strip(): + raise ConfigurationError(f"{key} must be a non-empty string") + return Path(value) diff --git a/src/errors.py b/src/errors.py new file mode 100644 index 0000000..f0d4db8 --- /dev/null +++ b/src/errors.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Iterable, List + + +class MaterializeError(Exception): + """Base error for wp-materialize.""" + + +@dataclass +class ValidationIssue: + message: str + context: str | None = None + + def format(self) -> str: + if self.context: + return f"{self.context}: {self.message}" + return self.message + + +class ValidationError(MaterializeError): + def __init__(self, issues: Iterable[ValidationIssue]): + self.issues: List[ValidationIssue] = list(issues) + message = "\n".join(issue.format() for issue in self.issues) + super().__init__(message) + + +class ConfigurationError(MaterializeError): + pass + + +class WordPressError(MaterializeError): + pass diff --git a/src/evaluation.py b/src/evaluation.py new file mode 100644 index 0000000..af98e48 --- /dev/null +++ b/src/evaluation.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +import shutil +from typing import Dict, List, Optional, Set + +from .config import Config +from .errors import ValidationError, ValidationIssue +from .git_utils import ensure_repo, git_timestamp +from .manifest import load_manifest +from .markdown_utils import convert_markdown, extract_title +from .models import CategoryPlan, EvaluationResult, InheritList, Manifest, PostPlan, Source +from .state import State +from .wp_cli import WordPressCLI + + +@dataclass +class _Context: + categories: InheritList + tags: InheritList + subdirectories: InheritList + manifest_chain: List[Path] + + +def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult: + issues: List[ValidationIssue] = [] + + sources = _load_sources(config, sync_repos, issues) + + posts: List[PostPlan] = [] + for source, content_root in sources: + _evaluate_directory( + source=source, + directory=content_root, + context=_Context( + categories=InheritList(), + tags=InheritList(), + subdirectories=InheritList(), + manifest_chain=[], + ), + state=state, + issues=issues, + posts=posts, + ) + + if shutil.which("wp") is None: + issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) + categories = [] + tag_names: Set[str] = set() + try: + wp = WordPressCLI(config.wordpress_root) + categories = wp.list_categories() + tags = wp.list_tags() + tag_names = {tag.name for tag in tags} + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) + + missing_categories = _plan_categories(posts, categories, issues, tag_names) + + if issues: + raise ValidationError(issues) + + return EvaluationResult(posts=posts, categories_to_create=CategoryPlan(missing_paths=missing_categories)) + + +def _load_sources( + config: Config, + sync_repos: bool, + issues: List[ValidationIssue], +) -> List[tuple[Source, Path]]: + sources: List[tuple[Source, Path]] = [] + + for repo in config.git_repositories: + repo_path = config.repo_storage_dir / repo.name + try: + ensure_repo(repo_path, repo.url, repo.branch, sync=sync_repos) + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(repo_path))) + continue + content_root = repo_path / repo.root_subdir if repo.root_subdir else repo_path + if not content_root.exists(): + issues.append(ValidationIssue("Repository content root missing", context=str(content_root))) + continue + sources.append( + ( + Source(name=repo.name, root_path=content_root, identity_root=repo_path, kind="git"), + content_root, + ) + ) + + for directory in config.directories: + root_path = directory.path + if not root_path.exists(): + issues.append(ValidationIssue("Directory not found", context=str(root_path))) + continue + content_root = root_path / directory.root_subdir if directory.root_subdir else root_path + if not content_root.exists(): + issues.append(ValidationIssue("Directory content root missing", context=str(content_root))) + continue + sources.append( + ( + Source(name=directory.name, root_path=content_root, identity_root=root_path, kind="dir"), + content_root, + ) + ) + + return sources + + +def _evaluate_directory( + source: Source, + directory: Path, + context: _Context, + state: State, + issues: List[ValidationIssue], + posts: List[PostPlan], +) -> None: + manifest_path = directory / ".wp-materialize.json" + manifest = load_manifest(manifest_path, issues) + if manifest is None: + return + + effective_categories = _merge_inherit(context.categories, manifest.categories) + effective_tags = _merge_inherit(context.tags, manifest.tags) + effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) + + manifest_chain = context.manifest_chain + [manifest.path] + + for file_name, spec in manifest.files.items(): + file_path = directory / file_name + if not file_path.exists(): + issues.append(ValidationIssue("File not found", context=str(file_path))) + continue + + try: + content = file_path.read_text(encoding="utf-8") + except Exception as exc: + issues.append(ValidationIssue(f"Failed to read file: {exc}", context=str(file_path))) + continue + + title = spec.title + markdown_body = content + if spec.use_heading_level is not None: + extracted = extract_title( + content, + level=spec.use_heading_level, + strict=spec.use_heading_strict, + context=str(file_path), + issues=issues, + ) + if extracted is None: + continue + title, markdown_body = extracted + elif not title: + issues.append(ValidationIssue("Missing title (title or use_heading_as_title required)", context=str(file_path))) + continue + + resolved_categories = _resolve_overrides(effective_categories, spec.categories) + resolved_tags = _resolve_overrides(effective_tags, spec.tags) + + resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) + resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) + + html = convert_markdown(markdown_body, context=str(file_path), issues=issues) + if html is None: + continue + + relative_path = _relative_path(file_path, source.identity_root, issues) + if relative_path is None: + continue + + timestamps = [] + ts = _timestamp_for_path(source, source.identity_root, relative_path, issues) + if ts is None: + continue + timestamps.append(ts) + + for manifest_file in manifest_chain: + manifest_rel = _relative_path(manifest_file, source.identity_root, issues) + if manifest_rel is None: + continue + ts_manifest = _timestamp_for_path(source, source.identity_root, manifest_rel, issues) + if ts_manifest is None: + continue + timestamps.append(ts_manifest) + + source_timestamp = max(timestamps) + identity = f"{source.name}:{relative_path}" + cached_entry = state.posts.get(identity) + cached_ts = cached_entry.source_timestamp if cached_entry else None + should_update = cached_ts is None or source_timestamp > cached_ts + + posts.append( + PostPlan( + source=source, + identity=identity, + relative_path=relative_path, + absolute_path=file_path, + title=title, + html=html, + categories=resolved_categories, + tags=resolved_tags, + source_timestamp=source_timestamp, + cached_timestamp=cached_ts, + should_update=should_update, + ) + ) + + for subdir in effective_subdirs.content: + subdir_path = directory / subdir + if not subdir_path.exists(): + issues.append(ValidationIssue("Missing subdirectory", context=str(subdir_path))) + continue + _evaluate_directory( + source=source, + directory=subdir_path, + context=_Context( + categories=effective_categories, + tags=effective_tags, + subdirectories=effective_subdirs, + manifest_chain=manifest_chain, + ), + state=state, + issues=issues, + posts=posts, + ) + + +def _merge_inherit(parent: InheritList, child: InheritList) -> InheritList: + if child.inherit: + content = parent.content + child.content + else: + content = child.content + return InheritList(content=content, inherit=True) + + +def _resolve_overrides(parent: InheritList, override: Optional[InheritList]) -> List[str]: + if override is None: + return list(parent.content) + if override.inherit: + return parent.content + override.content + return list(override.content) + + +def _normalize_list(values: List[str], label: str, context: str, issues: List[ValidationIssue]) -> List[str]: + normalized: List[str] = [] + seen: Set[str] = set() + for value in values: + cleaned = value.strip() + if not cleaned: + issues.append(ValidationIssue(f"Empty {label} entry", context=context)) + continue + if label == "category": + parts = [part.strip() for part in cleaned.split("/")] + if any(not part for part in parts): + issues.append(ValidationIssue(f"Invalid category path: {cleaned}", context=context)) + continue + cleaned = "/".join(parts) + if cleaned not in seen: + seen.add(cleaned) + normalized.append(cleaned) + return normalized + + +def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: + try: + return str(path.relative_to(root)) + except ValueError: + issues.append(ValidationIssue("Path is outside identity root", context=str(path))) + return None + + +def _timestamp_for_path( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[int]: + if source.kind == "git": + try: + return git_timestamp(identity_root, relative_path) + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=relative_path)) + return None + try: + return int((identity_root / relative_path).stat().st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _plan_categories( + posts: List[PostPlan], + categories, # list of CategoryTerm + issues: List[ValidationIssue], + existing_tags: Set[str], +) -> List[List[str]]: + category_map: Dict[tuple[int, str], int] = {} + for category in categories: + category_map[(category.parent, category.name)] = category.term_id + + missing_paths: List[List[str]] = [] + seen_missing: Set[tuple[str, ...]] = set() + + for post in posts: + if not post.should_update: + continue + for tag in post.tags: + if tag not in existing_tags: + issues.append(ValidationIssue(f"Tag does not exist: {tag}", context=post.relative_path)) + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + missing = False + for segment in segments: + key = (parent, segment) + if key in category_map: + parent = category_map[key] + continue + missing = True + break + if missing: + key = tuple(segments) + if key not in seen_missing: + seen_missing.add(key) + missing_paths.append(list(segments)) + + return missing_paths diff --git a/src/git_utils.py b/src/git_utils.py new file mode 100644 index 0000000..9d79917 --- /dev/null +++ b/src/git_utils.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import subprocess +from pathlib import Path + +from .errors import ConfigurationError + + +def ensure_repo(repo_path: Path, url: str, branch: str, sync: bool) -> None: + repo_path.parent.mkdir(parents=True, exist_ok=True) + + if not (repo_path / ".git").exists(): + if not sync: + raise ConfigurationError(f"Repository missing and sync disabled: {repo_path}") + _run(["git", "clone", "--branch", branch, url, str(repo_path)], cwd=repo_path.parent) + return + + if not sync: + return + + _run(["git", "fetch", "--all", "--prune"], cwd=repo_path) + _run(["git", "checkout", branch], cwd=repo_path) + _run(["git", "pull", "--ff-only"], cwd=repo_path) + + +def git_timestamp(repo_root: Path, relative_path: str) -> int: + result = _run( + ["git", "log", "-1", "--format=%ct", "--", relative_path], + cwd=repo_root, + capture_output=True, + ) + output = result.stdout.strip() + if not output: + raise ConfigurationError(f"No git timestamp for {relative_path}") + try: + return int(output) + except ValueError as exc: + raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc + + +def _run(cmd: list[str], cwd: Path, capture_output: bool = False) -> subprocess.CompletedProcess: + try: + return subprocess.run( + cmd, + cwd=str(cwd), + check=True, + text=True, + capture_output=capture_output, + ) + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() if exc.stderr else "" + raise ConfigurationError(f"Command failed: {' '.join(cmd)}\n{stderr}") from exc diff --git a/src/manifest.py b/src/manifest.py new file mode 100644 index 0000000..94bdba1 --- /dev/null +++ b/src/manifest.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Dict + +from .errors import ValidationIssue +from .models import FileSpec, InheritList, Manifest + + +def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: + if not path.exists(): + issues.append(ValidationIssue("Missing manifest", context=str(path))) + return None + + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as exc: + issues.append(ValidationIssue(f"Invalid JSON: {exc}", context=str(path))) + return None + + if not isinstance(data, dict): + issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) + return None + + allowed = {"categories", "tags", "subdirectories", "files"} + extra = set(data.keys()) - allowed + if extra: + issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) + return None + + categories = _parse_inherit_list(data.get("categories"), issues, f"{path}:categories") + tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") + subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") + + files: Dict[str, FileSpec] = {} + raw_files = data.get("files", {}) or {} + if not isinstance(raw_files, dict): + issues.append(ValidationIssue("files must be an object", context=str(path))) + return None + + for file_name, file_cfg in raw_files.items(): + if not isinstance(file_name, str): + issues.append(ValidationIssue("file name must be a string", context=str(path))) + continue + if not isinstance(file_cfg, dict): + issues.append(ValidationIssue(f"{file_name} must be an object", context=str(path))) + continue + extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags"} + if extra_file: + issues.append( + ValidationIssue(f"{file_name} has unexpected keys: {sorted(extra_file)}", context=str(path)) + ) + continue + + title = file_cfg.get("title") + if title is not None and (not isinstance(title, str) or not title.strip()): + issues.append(ValidationIssue(f"{file_name}.title must be a non-empty string", context=str(path))) + continue + + use_heading = file_cfg.get("use_heading_as_title") + use_level = None + use_strict = True + if use_heading is not None: + if not isinstance(use_heading, dict): + issues.append(ValidationIssue(f"{file_name}.use_heading_as_title must be an object", context=str(path))) + continue + extra_heading = set(use_heading.keys()) - {"level", "strict"} + if extra_heading: + issues.append( + ValidationIssue( + f"{file_name}.use_heading_as_title has unexpected keys: {sorted(extra_heading)}", + context=str(path), + ) + ) + continue + level = use_heading.get("level") + strict = use_heading.get("strict", True) + if not isinstance(level, int) or level < 1 or level > 6: + issues.append(ValidationIssue(f"{file_name}.use_heading_as_title.level must be 1-6", context=str(path))) + continue + if not isinstance(strict, bool): + issues.append( + ValidationIssue(f"{file_name}.use_heading_as_title.strict must be boolean", context=str(path)) + ) + continue + use_level = level + use_strict = strict + + categories_override = _parse_inherit_list(file_cfg.get("categories"), issues, f"{path}:{file_name}:categories") + tags_override = _parse_inherit_list(file_cfg.get("tags"), issues, f"{path}:{file_name}:tags") + + files[file_name] = FileSpec( + title=title, + use_heading_level=use_level, + use_heading_strict=use_strict, + categories=categories_override, + tags=tags_override, + ) + + return Manifest( + path=path, + categories=categories, + tags=tags, + subdirectories=subdirectories, + files=files, + ) + + +def _parse_inherit_list(value: object, issues: list[ValidationIssue], context: str) -> InheritList: + if value is None: + return InheritList() + if not isinstance(value, dict): + issues.append(ValidationIssue("Must be an object", context=context)) + return InheritList() + extra = set(value.keys()) - {"content", "inherit"} + if extra: + issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=context)) + return InheritList() + + content = value.get("content", []) + inherit = value.get("inherit", True) + + if not isinstance(content, list) or any(not isinstance(item, str) for item in content): + issues.append(ValidationIssue("content must be a list of strings", context=context)) + content = [] + if not isinstance(inherit, bool): + issues.append(ValidationIssue("inherit must be boolean", context=context)) + inherit = True + + return InheritList(content=[item for item in content if isinstance(item, str)], inherit=inherit) diff --git a/src/markdown_utils.py b/src/markdown_utils.py new file mode 100644 index 0000000..2a734e2 --- /dev/null +++ b/src/markdown_utils.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import re + +import markdown as md_lib + +from .errors import ValidationIssue + + +_HEADING_RE = re.compile(r"^(#{1,6})(\s+.*)$") + + +def extract_title(markdown_text: str, level: int, strict: bool, context: str, issues: list[ValidationIssue]) -> tuple[str, str] | None: + pattern = re.compile(rf"^{'#' * level}\s+(.*)$", re.MULTILINE) + matches = list(pattern.finditer(markdown_text)) + if strict and len(matches) != 1: + issues.append( + ValidationIssue( + f"Expected exactly one level-{level} heading, found {len(matches)}", + context=context, + ) + ) + return None + if not matches: + issues.append(ValidationIssue(f"Missing level-{level} heading", context=context)) + return None + + match = matches[0] + title = match.group(1).strip() + if not title: + issues.append(ValidationIssue("Heading title cannot be empty", context=context)) + return None + + lines = markdown_text.splitlines() + line_index = markdown_text[: match.start()].count("\n") + lines.pop(line_index) + body = "\n".join(lines) + body = _promote_headings(body) + return title, body + + +def _promote_headings(text: str) -> str: + promoted_lines = [] + for line in text.splitlines(): + match = _HEADING_RE.match(line) + if not match: + promoted_lines.append(line) + continue + hashes, rest = match.groups() + level = len(hashes) + if level > 1: + level -= 1 + promoted_lines.append("#" * level + rest) + return "\n".join(promoted_lines) + + +def convert_markdown(markdown_text: str, context: str, issues: list[ValidationIssue]) -> str | None: + try: + return md_lib.markdown(markdown_text, extensions=["extra"], output_format="html5") + except Exception as exc: # pragma: no cover - depends on markdown internals + issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) + return None diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..c0f0288 --- /dev/null +++ b/src/models.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, List, Optional + + +@dataclass(frozen=True) +class InheritList: + content: List[str] = field(default_factory=list) + inherit: bool = True + + +@dataclass(frozen=True) +class FileSpec: + title: Optional[str] + use_heading_level: Optional[int] + use_heading_strict: bool + categories: Optional[InheritList] + tags: Optional[InheritList] + + +@dataclass(frozen=True) +class Manifest: + path: Path + categories: InheritList + tags: InheritList + subdirectories: InheritList + files: Dict[str, FileSpec] + + +@dataclass(frozen=True) +class Source: + name: str + root_path: Path + identity_root: Path + kind: str # "git" or "dir" + + +@dataclass +class PostPlan: + source: Source + identity: str + relative_path: str + absolute_path: Path + title: str + html: str + categories: List[str] + tags: List[str] + source_timestamp: int + cached_timestamp: Optional[int] + should_update: bool + + +@dataclass +class CategoryPlan: + missing_paths: List[List[str]] + + +@dataclass +class EvaluationResult: + posts: List[PostPlan] + categories_to_create: CategoryPlan diff --git a/src/state.py b/src/state.py new file mode 100644 index 0000000..0d95c80 --- /dev/null +++ b/src/state.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict + +from .errors import ConfigurationError + + +@dataclass +class PostState: + source_timestamp: int + materialized_at: int + + +@dataclass +class State: + posts: Dict[str, PostState] = field(default_factory=dict) + + +def load_state(path: Path) -> State: + if not path.exists(): + return State() + try: + data = json.loads(path.read_text()) + except json.JSONDecodeError as exc: + raise ConfigurationError(f"Invalid JSON in state file: {exc}") from exc + + if not isinstance(data, dict): + raise ConfigurationError("State must be a JSON object") + + posts_data = data.get("posts", {}) + if not isinstance(posts_data, dict): + raise ConfigurationError("State.posts must be an object") + + posts: Dict[str, PostState] = {} + for identity, entry in posts_data.items(): + if not isinstance(entry, dict): + raise ConfigurationError(f"State.posts.{identity} must be an object") + source_ts = entry.get("source_timestamp") + materialized_at = entry.get("materialized_at") + if not isinstance(source_ts, int) or not isinstance(materialized_at, int): + raise ConfigurationError(f"State.posts.{identity} timestamps must be integers") + posts[identity] = PostState(source_timestamp=source_ts, materialized_at=materialized_at) + + return State(posts=posts) + + +def save_state(path: Path, state: State) -> None: + payload = { + "posts": { + identity: { + "source_timestamp": entry.source_timestamp, + "materialized_at": entry.materialized_at, + } + for identity, entry in state.posts.items() + } + } + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, indent=2, sort_keys=True)) diff --git a/src/wp_cli.py b/src/wp_cli.py new file mode 100644 index 0000000..6e029d7 --- /dev/null +++ b/src/wp_cli.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import json +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional + +from .errors import WordPressError + + +@dataclass(frozen=True) +class CategoryTerm: + term_id: int + name: str + parent: int + + +@dataclass(frozen=True) +class TagTerm: + term_id: int + name: str + + +class WordPressCLI: + def __init__(self, root: Path): + self.root = root + + def list_categories(self) -> List[CategoryTerm]: + data = self._run_json([ + "wp", + "term", + "list", + "category", + "--fields=term_id,name,parent", + "--format=json", + ]) + categories: List[CategoryTerm] = [] + for entry in data: + categories.append( + CategoryTerm( + term_id=int(entry["term_id"]), + name=entry["name"], + parent=int(entry["parent"]) if entry.get("parent") is not None else 0, + ) + ) + return categories + + def list_tags(self) -> List[TagTerm]: + data = self._run_json([ + "wp", + "term", + "list", + "post_tag", + "--fields=term_id,name", + "--format=json", + ]) + tags: List[TagTerm] = [] + for entry in data: + tags.append(TagTerm(term_id=int(entry["term_id"]), name=entry["name"])) + return tags + + def create_category(self, name: str, parent: int) -> int: + result = self._run( + [ + "wp", + "term", + "create", + "category", + name, + f"--parent={parent}", + "--porcelain", + ], + capture_output=True, + ) + output = result.stdout.strip() + try: + return int(output) + except ValueError as exc: + raise WordPressError(f"Invalid category id from wp cli: {output}") from exc + + def find_post_id(self, source_identity: str) -> Optional[int]: + result = self._run( + [ + "wp", + "post", + "list", + "--post_type=post", + "--meta_key=_wp_materialize_source", + f"--meta_value={source_identity}", + "--field=ID", + ], + capture_output=True, + ) + output = result.stdout.strip() + if not output: + return None + try: + return int(output.splitlines()[0]) + except ValueError as exc: + raise WordPressError(f"Invalid post id from wp cli: {output}") from exc + + def create_post( + self, + title: str, + content: str, + categories: List[int], + tags: List[str], + source_identity: str, + ) -> int: + payload = json.dumps({"_wp_materialize_source": source_identity}) + args = [ + "wp", + "post", + "create", + "--post_type=post", + "--post_status=publish", + f"--post_title={title}", + f"--post_content={content}", + f"--post_category={','.join(str(cat) for cat in categories)}", + f"--tags_input={','.join(tags)}", + f"--meta_input={payload}", + "--porcelain", + ] + result = self._run(args, capture_output=True) + output = result.stdout.strip() + try: + return int(output) + except ValueError as exc: + raise WordPressError(f"Invalid post id from wp cli: {output}") from exc + + def update_post( + self, + post_id: int, + title: str, + content: str, + categories: List[int], + tags: List[str], + ) -> None: + args = [ + "wp", + "post", + "update", + str(post_id), + f"--post_title={title}", + f"--post_content={content}", + f"--post_category={','.join(str(cat) for cat in categories)}", + f"--tags_input={','.join(tags)}", + ] + self._run(args) + + def _run_json(self, cmd: List[str]): + result = self._run(cmd, capture_output=True) + try: + return json.loads(result.stdout) + except json.JSONDecodeError as exc: + raise WordPressError(f"Invalid JSON from wp cli: {exc}\n{result.stdout}") from exc + + def _run(self, cmd: List[str], capture_output: bool = False) -> subprocess.CompletedProcess: + try: + return subprocess.run( + cmd, + cwd=str(self.root), + check=True, + text=True, + capture_output=capture_output, + ) + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() if exc.stderr else "" + raise WordPressError(f"WordPress CLI failed: {' '.join(cmd)}\n{stderr}") from exc