From 2478e2f6f43ff37bd5cb6230303d041d5981df54 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 23:38:11 +0000 Subject: [PATCH] v0.1.0 - initial release (#1) Reviewed-on: https://git.peisongxiao.com/peisongxiao/wp-materialize/pulls/1 --- .gitignore | 1 + README.md | 43 +++++- configurations.md | 77 ++++++++++- examples.md | 56 +++++++- pyproject.toml | 1 + requirements.txt | 1 + src/apply.py | 63 ++++++++- src/cli.py | 199 +++++++++++++++++++++++++-- src/config.py | 42 +++++- src/git_utils.py | 15 ++ src/local_export.py | 109 +++++++++++++++ src/manifest.py | 89 +++++++++++- src/markdown_utils.py | 66 ++++++++- src/models.py | 20 ++- src/scaffold.py | 154 +++++++++++++++++++++ src/{evaluation.py => validation.py} | 190 +++++++++++++++++++++---- src/wp_cli.py | 59 ++++++++ 17 files changed, 1118 insertions(+), 67 deletions(-) create mode 100644 src/local_export.py create mode 100644 src/scaffold.py rename src/{evaluation.py => validation.py} (62%) diff --git a/.gitignore b/.gitignore index e8709ef..36a45d3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__/ *.egg-info/ .env .venv/ +testing/**/* diff --git a/README.md b/README.md index c260a37..8ba48c9 100644 --- a/README.md +++ b/README.md @@ -78,13 +78,13 @@ State is stored separately (created on first successful apply): ## Usage -Dry-run evaluation: +Dry-run validation: ```bash -wp-materialize evaluate +wp-materialize validate ``` -Apply (evaluate, then materialize): +Apply (validate, then materialize): ```bash wp-materialize apply @@ -96,6 +96,36 @@ Skip git sync: wp-materialize apply --no-sync ``` +Local export (writes per-post directories with HTML, metadata, and WP command): + +```bash +wp-materialize local /path/to/output +``` + +Notes: +1. The local export assumes every post is new and generates create commands. +2. The local export does not call WordPress or resolve category IDs. + +Create placeholder config or manifest: + +```bash +wp-materialize new --config +wp-materialize new --config /path/to/config.json +wp-materialize new --manifest /path/to/content +``` + +Add files or subdirectories to a manifest (no evaluation): + +```bash +wp-materialize add-file /path/to/content/post.md +wp-materialize add-file /path/to/content/post.md /path/to/content +wp-materialize add-file /path/to/content/post.md --current + +wp-materialize add-subdir /path/to/content/notes +wp-materialize add-subdir /path/to/content/notes /path/to/content +wp-materialize add-subdir /path/to/content/notes --current +``` + ## Manifests Each managed directory must contain a `.wp-materialize.json` manifest. See `configurations.md` for the manifest guide. @@ -105,6 +135,13 @@ Each managed directory must contain a `.wp-materialize.json` manifest. See `conf 1. Python 3.10+ 2. Packages: - `Markdown>=3.6` + - `py_gfm` (only required when using `renderer: "py-gfm"`) + +## System Prerequisites + +1. `wp` CLI must be installed and available in PATH for `apply`. +2. `local` does not require `wp`. +3. `pandoc` must be installed and available in PATH when using `renderer: "pandoc"`. Install dependencies: diff --git a/configurations.md b/configurations.md index 0e9775a..f302a68 100644 --- a/configurations.md +++ b/configurations.md @@ -10,9 +10,16 @@ Top-level fields: Path to the WordPress root directory where the `wp` CLI is executed. 2. `repo_storage_dir` (string, required) Directory where git repositories are cloned or updated. -3. `git_repositories` (array, optional) +3. `renderer` (string, optional) + Markdown renderer to use. Allowed values: `default`, `py-gfm`, `pandoc`. +4. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. +5. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting + in the visual editor. +6. `git_repositories` (array, optional) List of git repositories to manage. Default is an empty list. -4. `directories` (array, optional) +7. `directories` (array, optional) List of non-git directories to manage. Default is an empty list. `git_repositories` entries: @@ -48,21 +55,53 @@ Top-level fields: Inherited category paths for this directory and its children. 2. `tags` (object, optional) Inherited tags for this directory and its children. -3. `subdirectories` (object, optional) +3. `author` (object, optional) + Inherited author for this directory and its children. Must resolve to a single author. +4. `renderer` (string, optional) + Markdown renderer to use for this directory. Allowed values: `default`, `py-gfm`, `pandoc`. + If omitted, it inherits from the parent scope. +5. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. If omitted, it inherits + from the parent scope. +6. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting. + If omitted, it inherits from the parent scope. +7. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -4. `files` (object, optional) +8. `files` (object, optional) Mapping of Markdown file names to file-level configuration. -`categories`, `tags`, and `subdirectories` objects: +`categories`, `tags`, `author`, and `subdirectories` objects: 1. `content` (array of strings, optional) List of values for the given field. For `categories`, each string is a hierarchical path such as `Systems/Infrastructure`. For `subdirectories`, each string is a directory name under the current directory. + For `author`, exactly one string must remain after inheritance is applied and it should be + a WordPress user ID (integer as a string). 2. `inherit` (boolean, optional, default `true`) If `true`, append to the parent effective list. If `false`, replace the parent list entirely. +Note: Root directory manifests do not need to specify `inherit` for these top-level +fields (the default is `true`). File-level overrides inside `files` still support +inheritance via their own `inherit` fields. + +The `renderer` field inherits implicitly: if omitted, the renderer is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. +The `hard_line_breaks` field inherits implicitly: if omitted, the value is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. +The `block_html` field inherits implicitly: if omitted, the value is inherited +from the parent scope; if specified, it overrides the parent without an explicit +`inherit` flag. + +Renderer dependencies: +1. `default` uses the Python `Markdown` library. +2. `py-gfm` requires the `py_gfm` package (imported as `mdx_gfm`). +3. `pandoc` requires the `pandoc` binary to be available on PATH. + `files` entries: Each key is a Markdown file name (relative to the manifest directory). @@ -73,10 +112,23 @@ Each value is an object with the following fields: 2. `use_heading_as_title` (object, optional) Extracts a heading from the Markdown as the title and removes that heading from the body while promoting remaining headings by one level. -3. `categories` (object, optional) +3. `created_on` (string, optional) + Manual override for the post creation time in `YYYY-MM-DD hh:mm` format. +4. `last_modified` (string, optional) + Manual override for the post modified time in `YYYY-MM-DD hh:mm` format. +5. `renderer` (string, optional) + Markdown renderer to use for this file. Allowed values: `default`, `py-gfm`, `pandoc`. + If omitted, it inherits from the parent scope. +6. `hard_line_breaks` (boolean, optional) + If `true`, treat single newlines as hard line breaks. If omitted, it inherits + from the parent scope. +7. `block_html` (boolean, optional) + If `true`, wrap HTML in a single Gutenberg HTML block to preserve formatting. + If omitted, it inherits from the parent scope. +8. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -4. `tags` (object, optional) +9. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. @@ -87,6 +139,12 @@ Each value is an object with the following fields: 2. `strict` (boolean, optional, default `true`) If `true`, exactly one matching heading must exist. +If `created_on` or `last_modified` is not provided, the system infers the value. +For `git_repositories` sources it uses git commit timestamps; for `directories` +sources it uses filesystem timestamps. The system does not auto-detect git for +entries declared under `directories`, even if the path is inside a git repo. +If `created_on` is in the future, WordPress will mark the post as scheduled. + ## Post Identity Each post is identified with: @@ -97,3 +155,8 @@ _wp_materialize_source = : `source_name` is the `name` from the global config entry, and `relative_path` is relative to the repo or directory root used for identity resolution. + +## Tag and Category Creation + +Missing categories and tags are created automatically during apply, after a successful +dry-run evaluation and before any post updates. diff --git a/examples.md b/examples.md index 2fc4e74..9b7bbab 100644 --- a/examples.md +++ b/examples.md @@ -10,6 +10,10 @@ Root directory manifest (`.wp-materialize.json`): { "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, "tags": { "content": ["automation", "wordpress"], "inherit": true }, + "author": { "content": ["editorial"], "inherit": true }, + "renderer": "pandoc", + "hard_line_breaks": true, + "block_html": true, "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -18,7 +22,12 @@ Root directory manifest (`.wp-materialize.json`): "tags": { "content": ["extra"], "inherit": true } }, "essay.md": { - "use_heading_as_title": { "level": 1, "strict": true } + "use_heading_as_title": { "level": 1, "strict": true }, + "renderer": "py-gfm", + "hard_line_breaks": false, + "block_html": false, + "created_on": "2025-01-10 09:30", + "last_modified": "2025-02-14 16:45" } } } @@ -45,6 +54,9 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", + "hard_line_breaks": false, + "block_html": false, "git_repositories": [], "directories": [ { @@ -62,6 +74,9 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", + "hard_line_breaks": false, + "block_html": false, "git_repositories": [ { "name": "content-repo", @@ -92,6 +107,9 @@ Subdirectory manifest (`design/.wp-materialize.json`): { "wordpress_root": "/var/www/wordpress", "repo_storage_dir": "/home/user/wp-materialize-repos", + "renderer": "default", + "hard_line_breaks": false, + "block_html": false, "git_repositories": [ { "name": "content-repo", @@ -103,3 +121,39 @@ Subdirectory manifest (`design/.wp-materialize.json`): "directories": [] } ``` + +## Timestamp Behavior Example + +- `git_repositories` entries use git commit timestamps for `created_on`/`last_modified` inference. +- `directories` entries use filesystem timestamps even if the path is inside a git repo. + +## Scaffold Command Examples + +Create a placeholder config: + +```bash +wp-materialize new --config +wp-materialize new --config /path/to/config.json +``` + +Create a dummy manifest: + +```bash +wp-materialize new --manifest /path/to/content +``` + +Add a file to a manifest: + +```bash +wp-materialize add-file /path/to/content/post.md +wp-materialize add-file /path/to/content/post.md /path/to/content +wp-materialize add-file /path/to/content/post.md --current +``` + +Add a directory to a manifest: + +```bash +wp-materialize add-subdir /path/to/content/notes +wp-materialize add-subdir /path/to/content/notes /path/to/content +wp-materialize add-subdir /path/to/content/notes --current +``` diff --git a/pyproject.toml b/pyproject.toml index f8e0499..b99a129 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ "Markdown>=3.6", + "py_gfm", ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index cb286b3..40d8241 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ Markdown>=3.6 +py_gfm diff --git a/src/apply.py b/src/apply.py index e1cc99b..cf85d2a 100644 --- a/src/apply.py +++ b/src/apply.py @@ -1,7 +1,8 @@ from __future__ import annotations import time -from typing import Dict, List, Set +from datetime import datetime +from typing import Dict, List, Optional, Set from .errors import WordPressError from .models import EvaluationResult, PostPlan @@ -17,15 +18,17 @@ def apply_changes( ) -> None: categories = wp.list_categories() category_map = _build_category_map(categories) + wp_timezone = wp.get_timezone() _create_missing_categories(result, wp, category_map) + _create_missing_tags(result, wp) successes: Set[str] = set() try: for post in result.posts: if not post.should_update: continue - _apply_post(post, wp, category_map) + _apply_post(post, wp, category_map, wp_timezone) state.posts[post.identity] = PostState( source_timestamp=post.source_timestamp, materialized_at=int(time.time()), @@ -48,7 +51,7 @@ def _create_missing_categories( wp: WordPressCLI, category_map: Dict[tuple[int, str], int], ) -> None: - paths = result.categories_to_create.missing_paths + paths = result.taxonomy_to_create.missing_categories paths = sorted(paths, key=len) seen: Set[tuple[str, ...]] = set() for segments in paths: @@ -67,7 +70,17 @@ def _create_missing_categories( parent = new_id -def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, str], int]) -> None: +def _create_missing_tags(result: EvaluationResult, wp: WordPressCLI) -> None: + for tag in result.taxonomy_to_create.missing_tags: + wp.create_tag(tag) + + +def _apply_post( + post: PostPlan, + wp: WordPressCLI, + category_map: Dict[tuple[int, str], int], + wp_timezone, +) -> None: category_ids: List[int] = [] for path in post.categories: segments = [segment for segment in path.split("/") if segment] @@ -81,6 +94,8 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, parent = category_map[map_key] category_ids.append(parent) + created_on, last_modified = _normalize_post_dates(post.created_on, post.last_modified, wp_timezone) + post_id = wp.find_post_id(post.identity) if post_id is None: wp.create_post( @@ -89,6 +104,9 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, categories=category_ids, tags=post.tags, source_identity=post.identity, + created_on=created_on, + last_modified=last_modified, + author=post.author, ) return @@ -98,4 +116,41 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, content=post.html, categories=category_ids, tags=post.tags, + created_on=created_on, + last_modified=last_modified, + author=post.author, ) + + +def _normalize_post_dates( + created_on: Optional[str], + last_modified: Optional[str], + wp_timezone, +) -> tuple[Optional[str], Optional[str]]: + if not created_on and not last_modified: + return created_on, last_modified + + now = datetime.now(wp_timezone) + created_dt = _parse_post_date(created_on, wp_timezone) + modified_dt = _parse_post_date(last_modified, wp_timezone) + + if created_dt and created_dt > now: + created_dt = now + if modified_dt and modified_dt > now: + modified_dt = now + if created_dt and modified_dt and modified_dt < created_dt: + modified_dt = created_dt + + created_str = created_dt.strftime("%Y-%m-%d %H:%M:%S") if created_dt else None + modified_str = modified_dt.strftime("%Y-%m-%d %H:%M:%S") if modified_dt else None + return created_str, modified_str + + +def _parse_post_date(value: Optional[str], wp_timezone) -> Optional[datetime]: + if not value: + return None + try: + parsed = datetime.strptime(value, "%Y-%m-%d %H:%M:%S") + return parsed.replace(tzinfo=wp_timezone) + except ValueError: + return None diff --git a/src/cli.py b/src/cli.py index d64132f..c26b3ab 100644 --- a/src/cli.py +++ b/src/cli.py @@ -8,25 +8,177 @@ from pathlib import Path from .apply import apply_changes from .config import load_config from .errors import ConfigurationError, MaterializeError, ValidationError -from .evaluation import evaluate +from .validation import validate +from .local_export import export_local +from .scaffold import add_dir_to_manifest, add_file_to_manifest, create_config, create_manifest, resolve_manifest_dir from .state import load_state from .wp_cli import WordPressCLI def main() -> int: - parser = argparse.ArgumentParser(description="wp-materialize") - parser.add_argument("command", nargs="?", choices=["evaluate", "apply"], default="evaluate") - parser.add_argument("--config", type=Path, default=_default_config_path()) - parser.add_argument("--state", type=Path, default=_default_state_path()) - parser.add_argument("--no-sync", action="store_true", help="Skip git clone/pull") - parser.add_argument("--json", action="store_true", help="Output evaluation summary as JSON") + parser = argparse.ArgumentParser( + description="wp-materialize: compile Markdown manifests into WordPress posts", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog=("Command-specific help: wp-materialize --help"), + ) + common = argparse.ArgumentParser(add_help=False) + common.add_argument( + "--config", + type=Path, + default=_default_config_path(), + help="Path to the global config JSON file.", + ) + common.add_argument( + "--state", + type=Path, + default=_default_state_path(), + help="Path to the state JSON file used for incremental tracking.", + ) + common.add_argument( + "--no-sync", + action="store_true", + help="Skip git clone/pull for git_repositories entries.", + ) + common.add_argument( + "--force-new", + action="store_true", + help="Force all posts to be treated as new (ignore incremental timestamps).", + ) + common.add_argument( + "--json", + action="store_true", + help="Output validation summary as JSON.", + ) + + subparsers = parser.add_subparsers(dest="command", metavar="command") + + subparsers.add_parser( + "validate", + parents=[common], + help="Validate config/manifests and plan changes (no WP writes).", + description="Validate config/manifests, convert Markdown, and plan changes without writing to WordPress.", + ) + subparsers.add_parser( + "apply", + parents=[common], + help="Validate then create/update WordPress posts and taxonomy.", + description="Validate, then create categories/tags and create or update posts in WordPress.", + ) + local_parser = subparsers.add_parser( + "local", + parents=[common], + help="Export per-post folders with HTML, metadata, and wp command.", + description="Export per-post folders with HTML, metadata, and the exact wp command.", + ) + local_parser.add_argument( + "output_dir", + help="Output directory for local export (required).", + ) + + new_parser = subparsers.add_parser( + "new", + help="Create placeholder config or manifest files.", + description="Create a placeholder config file or a dummy manifest.", + ) + new_group = new_parser.add_mutually_exclusive_group(required=True) + new_group.add_argument( + "--config", + nargs="?", + const=str(_default_config_path()), + metavar="file", + help="Create a placeholder config file at or the default config path.", + ) + new_group.add_argument( + "--manifest", + metavar="dir", + help="Create a dummy manifest in the specified directory.", + ) + + add_file_parser = subparsers.add_parser( + "add-file", + help="Add a file entry to a manifest.", + description="Add a file entry to the manifest in a given directory.", + ) + add_file_parser.add_argument("file", help="File path to add to the manifest.") + add_file_parser.add_argument( + "manifest_dir", + nargs="?", + help="Directory containing the manifest (defaults to current directory).", + ) + add_file_parser.add_argument( + "--current", + action="store_true", + help="Find the manifest in the same directory as the file (cannot be used with manifest_dir).", + ) + + add_dir_parser = subparsers.add_parser( + "add-subdir", + help="Add a subdirectory entry to a manifest.", + description="Add a subdirectory entry to the manifest in a given directory.", + ) + add_dir_parser.add_argument("dir", help="Directory path to add to the manifest.") + add_dir_parser.add_argument( + "manifest_dir", + nargs="?", + help="Directory containing the manifest (defaults to current directory).", + ) + add_dir_parser.add_argument( + "--current", + action="store_true", + help="Find the manifest in the same directory as the target (cannot be used with manifest_dir).", + ) args = parser.parse_args() + if args.command is None: + parser.print_help() + return 1 + + if args.command == "new": + try: + if args.config is not None: + create_config(Path(args.config)) + print(f"Created config: {args.config}") + else: + path = create_manifest(Path(args.manifest)) + print(f"Created manifest: {path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + + if args.command == "add-file": + try: + file_path = Path(args.file) + manifest_dir = resolve_manifest_dir(file_path, Path(args.manifest_dir) if args.manifest_dir else None, args.current) + add_file_to_manifest(file_path, manifest_dir) + print(f"Added file to manifest: {file_path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + + if args.command == "add-subdir": + try: + dir_path = Path(args.dir) + manifest_dir = resolve_manifest_dir(dir_path, Path(args.manifest_dir) if args.manifest_dir else None, args.current) + add_dir_to_manifest(dir_path, manifest_dir) + print(f"Added directory to manifest: {dir_path}") + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + return 0 + try: config = load_config(args.config) state = load_state(args.state) - result = evaluate(config, state, sync_repos=not args.no_sync) + result = validate( + config, + state, + sync_repos=not args.no_sync, + force_new=args.force_new, + skip_wp_checks=args.command == "local", + ) except ValidationError as exc: _print_validation_error(exc) return 1 @@ -35,9 +187,22 @@ def main() -> int: return 1 if args.json: - print(_evaluation_json(result)) + print(_validation_json(result)) else: - print(_evaluation_summary(result)) + print(_validation_summary(result)) + + if args.command == "local": + output_dir = getattr(args, "output_dir", None) + if not output_dir: + print("Error: local command requires an output directory", file=sys.stderr) + return 1 + try: + export_local(result, Path(output_dir)) + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + print("Local export complete") + return 0 if args.command == "apply": wp = WordPressCLI(config.wordpress_root) @@ -59,19 +224,21 @@ def _default_state_path() -> Path: return Path.home() / ".config" / "wp-materialize" / "state.json" -def _evaluation_summary(result) -> str: +def _validation_summary(result) -> str: total = len(result.posts) updates = sum(1 for post in result.posts if post.should_update) - categories = len(result.categories_to_create.missing_paths) + categories = len(result.taxonomy_to_create.missing_categories) + tags = len(result.taxonomy_to_create.missing_tags) lines = [ f"Posts: {total}", f"Posts to update: {updates}", f"Categories to create: {categories}", + f"Tags to create: {tags}", ] return "\n".join(lines) -def _evaluation_json(result) -> str: +def _validation_json(result) -> str: payload = { "posts": [ { @@ -83,10 +250,14 @@ def _evaluation_json(result) -> str: "should_update": post.should_update, "categories": post.categories, "tags": post.tags, + "created_on": post.created_on, + "last_modified": post.last_modified, + "author": post.author, } for post in result.posts ], - "categories_to_create": result.categories_to_create.missing_paths, + "categories_to_create": result.taxonomy_to_create.missing_categories, + "tags_to_create": result.taxonomy_to_create.missing_tags, } return json.dumps(payload, indent=2) diff --git a/src/config.py b/src/config.py index ae650a0..0a164ce 100644 --- a/src/config.py +++ b/src/config.py @@ -29,6 +29,9 @@ class Config: repo_storage_dir: Path git_repositories: List[GitRepository] directories: List[DirectorySpec] + renderer: Optional[str] + hard_line_breaks: bool + block_html: bool def _expect_keys(obj: dict, allowed: set[str], context: str) -> None: @@ -48,10 +51,25 @@ def load_config(path: Path) -> Config: if not isinstance(data, dict): raise ConfigurationError("Config must be a JSON object") - _expect_keys(data, {"wordpress_root", "repo_storage_dir", "git_repositories", "directories"}, "config") + _expect_keys( + data, + { + "wordpress_root", + "repo_storage_dir", + "git_repositories", + "directories", + "renderer", + "hard_line_breaks", + "block_html", + }, + "config", + ) wordpress_root = _require_path(data, "wordpress_root", required=True) repo_storage_dir = _require_path(data, "repo_storage_dir", required=True) + renderer = _require_renderer(data.get("renderer"), context="config.renderer") + hard_line_breaks = _require_bool_optional(data.get("hard_line_breaks"), context="config.hard_line_breaks") + block_html = _require_bool_optional(data.get("block_html"), context="config.block_html") git_repositories = [] for idx, repo in enumerate(data.get("git_repositories", []) or []): @@ -85,6 +103,9 @@ def load_config(path: Path) -> Config: repo_storage_dir=repo_storage_dir, git_repositories=git_repositories, directories=directories, + renderer=renderer, + hard_line_breaks=False if hard_line_breaks is None else hard_line_breaks, + block_html=False if block_html is None else block_html, ) @@ -102,3 +123,22 @@ def _require_path(data: dict, key: str, required: bool) -> Path: if not isinstance(value, str) or not value.strip(): raise ConfigurationError(f"{key} must be a non-empty string") return Path(value) + + +def _require_renderer(value: object, context: str) -> Optional[str]: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + raise ConfigurationError(f"{context} must be a non-empty string") + renderer = value.strip() + if renderer not in {"default", "py-gfm", "pandoc"}: + raise ConfigurationError(f"{context} must be one of: default, py-gfm, pandoc") + return renderer + + +def _require_bool_optional(value: object, context: str) -> Optional[bool]: + if value is None: + return None + if not isinstance(value, bool): + raise ConfigurationError(f"{context} must be a boolean") + return value diff --git a/src/git_utils.py b/src/git_utils.py index 9d79917..ef141f8 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -38,6 +38,21 @@ def git_timestamp(repo_root: Path, relative_path: str) -> int: raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc +def git_first_timestamp(repo_root: Path, relative_path: str) -> int: + result = _run( + ["git", "log", "--reverse", "-1", "--format=%ct", "--", relative_path], + cwd=repo_root, + capture_output=True, + ) + output = result.stdout.strip() + if not output: + raise ConfigurationError(f"No git timestamp for {relative_path}") + try: + return int(output) + except ValueError as exc: + raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc + + def _run(cmd: list[str], cwd: Path, capture_output: bool = False) -> subprocess.CompletedProcess: try: return subprocess.run( diff --git a/src/local_export.py b/src/local_export.py new file mode 100644 index 0000000..31096ff --- /dev/null +++ b/src/local_export.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import json +import re +import shlex +import unicodedata +from pathlib import Path +from typing import List, Set + +from .errors import MaterializeError +from .models import EvaluationResult, PostPlan + + +def export_local(result: EvaluationResult, output_dir: Path) -> None: + if not output_dir.exists(): + output_dir.mkdir(parents=True, exist_ok=True) + if not output_dir.is_dir(): + raise MaterializeError(f"Output path is not a directory: {output_dir}") + + used_names: Set[str] = set() + for post in result.posts: + metadata = _build_metadata(post) + command = _build_wp_command(post) + + base_name = _normalize_name(f"{post.source.name}/{post.relative_path}") + title_name = _normalize_name(post.title) + if title_name: + dir_name = f"{base_name}-{title_name}" + else: + dir_name = base_name + dir_name = _dedupe_name(dir_name, used_names) + used_names.add(dir_name) + + target_dir = output_dir / dir_name + target_dir.mkdir(parents=True, exist_ok=True) + + (target_dir / "post.html").write_text(post.html, encoding="utf-8") + (target_dir / "metadata.json").write_text( + json.dumps(metadata, indent=2, sort_keys=True), + encoding="utf-8", + ) + (target_dir / "wp-command.txt").write_text(command + "\n", encoding="utf-8") + + +def _build_metadata(post: PostPlan) -> dict: + metadata = { + "post_type": "post", + "post_status": "publish", + "post_title": post.title, + "post_content": post.html, + "post_category": post.categories, + "tags_input": post.tags, + "meta_input": {"_wp_materialize_source": post.identity}, + } + if post.created_on: + metadata["post_date"] = post.created_on + if post.last_modified: + metadata["post_modified"] = post.last_modified + if post.author: + metadata["post_author"] = post.author + return metadata + + +def _build_wp_command(post: PostPlan) -> str: + payload = json.dumps({"_wp_materialize_source": post.identity}) + args = [ + "wp", + "post", + "create", + "--post_type=post", + "--post_status=publish", + f"--post_title={post.title}", + f"--post_content={post.html}", + f"--post_category={','.join(post.categories)}", + f"--tags_input={','.join(post.tags)}", + f"--meta_input={payload}", + "--porcelain", + ] + if post.created_on: + args.append(f"--post_date={post.created_on}") + if post.last_modified: + args.append(f"--post_modified={post.last_modified}") + if post.author: + args.append(f"--post_author={post.author}") + return " ".join(shlex.quote(arg) for arg in args) + + +def _normalize_name(value: str) -> str: + text = value.strip() + text = text.replace("\\", "/") + text = text.replace("/", "-") + text = unicodedata.normalize("NFKD", text) + text = text.encode("ascii", "ignore").decode("ascii") + text = text.lower() + text = re.sub(r"[^a-z0-9._-]+", "-", text) + text = re.sub(r"-+", "-", text) + text = text.strip("-_.") + return text or "post" + + +def _dedupe_name(name: str, used: Set[str]) -> str: + if name not in used: + return name + index = 2 + while True: + candidate = f"{name}-{index}" + if candidate not in used: + return candidate + index += 1 diff --git a/src/manifest.py b/src/manifest.py index 94bdba1..4fd1be7 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from datetime import datetime from pathlib import Path from typing import Dict @@ -23,7 +24,16 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "subdirectories", "files"} + allowed = { + "categories", + "tags", + "author", + "renderer", + "hard_line_breaks", + "block_html", + "subdirectories", + "files", + } extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -31,6 +41,10 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories = _parse_inherit_list(data.get("categories"), issues, f"{path}:categories") tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") + author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") + renderer = _parse_renderer_field(data.get("renderer"), issues, f"{path}:renderer") + hard_line_breaks = _parse_bool_field(data.get("hard_line_breaks"), issues, f"{path}:hard_line_breaks") + block_html = _parse_bool_field(data.get("block_html"), issues, f"{path}:block_html") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -46,7 +60,17 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: if not isinstance(file_cfg, dict): issues.append(ValidationIssue(f"{file_name} must be an object", context=str(path))) continue - extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags"} + extra_file = set(file_cfg.keys()) - { + "title", + "use_heading_as_title", + "categories", + "tags", + "created_on", + "last_modified", + "renderer", + "hard_line_breaks", + "block_html", + } if extra_file: issues.append( ValidationIssue(f"{file_name} has unexpected keys: {sorted(extra_file)}", context=str(path)) @@ -89,6 +113,23 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories_override = _parse_inherit_list(file_cfg.get("categories"), issues, f"{path}:{file_name}:categories") tags_override = _parse_inherit_list(file_cfg.get("tags"), issues, f"{path}:{file_name}:tags") + created_on = _parse_datetime_field(file_cfg.get("created_on"), issues, f"{path}:{file_name}:created_on") + last_modified = _parse_datetime_field(file_cfg.get("last_modified"), issues, f"{path}:{file_name}:last_modified") + renderer_override = _parse_renderer_field(file_cfg.get("renderer"), issues, f"{path}:{file_name}:renderer") + hard_line_breaks_override = _parse_bool_field( + file_cfg.get("hard_line_breaks"), + issues, + f"{path}:{file_name}:hard_line_breaks", + ) + block_html_override = _parse_bool_field( + file_cfg.get("block_html"), + issues, + f"{path}:{file_name}:block_html", + ) + if created_on and last_modified and last_modified < created_on: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) + ) files[file_name] = FileSpec( title=title, @@ -96,12 +137,21 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: use_heading_strict=use_strict, categories=categories_override, tags=tags_override, + created_on=created_on, + last_modified=last_modified, + renderer=renderer_override, + hard_line_breaks=hard_line_breaks_override, + block_html=block_html_override, ) return Manifest( path=path, categories=categories, tags=tags, + author=author, + renderer=renderer, + hard_line_breaks=hard_line_breaks, + block_html=block_html, subdirectories=subdirectories, files=files, ) @@ -129,3 +179,38 @@ def _parse_inherit_list(value: object, issues: list[ValidationIssue], context: s inherit = True return InheritList(content=[item for item in content if isinstance(item, str)], inherit=inherit) + + +def _parse_datetime_field(value: object, issues: list[ValidationIssue], context: str) -> datetime | None: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + issues.append(ValidationIssue("Must be a non-empty string", context=context)) + return None + try: + return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M") + except ValueError: + issues.append(ValidationIssue("Invalid datetime format (expected YYYY-MM-DD hh:mm)", context=context)) + return None + + +def _parse_renderer_field(value: object, issues: list[ValidationIssue], context: str) -> str | None: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + issues.append(ValidationIssue("Must be a non-empty string", context=context)) + return None + renderer = value.strip() + if renderer not in {"default", "py-gfm", "pandoc"}: + issues.append(ValidationIssue("Must be one of: default, py-gfm, pandoc", context=context)) + return None + return renderer + + +def _parse_bool_field(value: object, issues: list[ValidationIssue], context: str) -> bool | None: + if value is None: + return None + if not isinstance(value, bool): + issues.append(ValidationIssue("Must be a boolean", context=context)) + return None + return value diff --git a/src/markdown_utils.py b/src/markdown_utils.py index 2a734e2..9769166 100644 --- a/src/markdown_utils.py +++ b/src/markdown_utils.py @@ -3,6 +3,7 @@ from __future__ import annotations import re import markdown as md_lib +import subprocess from .errors import ValidationIssue @@ -54,9 +55,62 @@ def _promote_headings(text: str) -> str: return "\n".join(promoted_lines) -def convert_markdown(markdown_text: str, context: str, issues: list[ValidationIssue]) -> str | None: - try: - return md_lib.markdown(markdown_text, extensions=["extra"], output_format="html5") - except Exception as exc: # pragma: no cover - depends on markdown internals - issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) - return None +def convert_markdown( + markdown_text: str, + context: str, + issues: list[ValidationIssue], + renderer: str = "default", + hard_line_breaks: bool = False, + block_html: bool = False, +) -> str | None: + def wrap_blocks(html: str) -> str: + if not block_html: + return html + return f"\n{html}\n" + + if renderer == "default": + try: + extensions = ["extra"] + if hard_line_breaks: + extensions.append("nl2br") + return wrap_blocks(md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")) + except Exception as exc: # pragma: no cover - depends on markdown internals + issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) + return None + if renderer == "py-gfm": + try: + import mdx_gfm + except Exception as exc: # pragma: no cover - dependency missing + issues.append(ValidationIssue(f"py-gfm is not available: {exc}", context=context)) + return None + extension_class = getattr(mdx_gfm, "GithubFlavoredMarkdownExtension", None) + if extension_class is None: + issues.append(ValidationIssue("py-gfm extension not found: GithubFlavoredMarkdownExtension", context=context)) + return None + try: + extensions = [extension_class()] + if hard_line_breaks: + extensions.append("nl2br") + return wrap_blocks(md_lib.markdown(markdown_text, extensions=extensions, output_format="html5")) + except Exception as exc: # pragma: no cover - depends on markdown internals + issues.append(ValidationIssue(f"Markdown conversion failed: {exc}", context=context)) + return None + if renderer == "pandoc": + try: + result = subprocess.run( + ["pandoc", f"--from={'markdown+hard_line_breaks' if hard_line_breaks else 'markdown'}", "--to=html5"], + input=markdown_text, + text=True, + capture_output=True, + check=True, + ) + return wrap_blocks(result.stdout) + except FileNotFoundError as exc: + issues.append(ValidationIssue(f"pandoc is not available: {exc}", context=context)) + return None + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.strip() if exc.stderr else "" + issues.append(ValidationIssue(f"Pandoc conversion failed: {stderr}", context=context)) + return None + issues.append(ValidationIssue(f"Unknown renderer: {renderer}", context=context)) + return None diff --git a/src/models.py b/src/models.py index c0f0288..64892dc 100644 --- a/src/models.py +++ b/src/models.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime from pathlib import Path from typing import Dict, List, Optional @@ -18,6 +19,11 @@ class FileSpec: use_heading_strict: bool categories: Optional[InheritList] tags: Optional[InheritList] + created_on: Optional[datetime] + last_modified: Optional[datetime] + renderer: Optional[str] + hard_line_breaks: Optional[bool] + block_html: Optional[bool] @dataclass(frozen=True) @@ -25,6 +31,10 @@ class Manifest: path: Path categories: InheritList tags: InheritList + author: InheritList + renderer: Optional[str] + hard_line_breaks: Optional[bool] + block_html: Optional[bool] subdirectories: InheritList files: Dict[str, FileSpec] @@ -47,17 +57,21 @@ class PostPlan: html: str categories: List[str] tags: List[str] + author: Optional[str] source_timestamp: int cached_timestamp: Optional[int] should_update: bool + created_on: Optional[str] + last_modified: Optional[str] @dataclass -class CategoryPlan: - missing_paths: List[List[str]] +class TaxonomyPlan: + missing_categories: List[List[str]] + missing_tags: List[str] @dataclass class EvaluationResult: posts: List[PostPlan] - categories_to_create: CategoryPlan + taxonomy_to_create: TaxonomyPlan diff --git a/src/scaffold.py b/src/scaffold.py new file mode 100644 index 0000000..b4e6b8d --- /dev/null +++ b/src/scaffold.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .errors import MaterializeError + + +def create_config(path: Path) -> None: + _ensure_parent_exists(path) + if path.exists(): + raise MaterializeError(f"Config already exists: {path}") + payload = { + "wordpress_root": "/path/to/wordpress", + "repo_storage_dir": "/path/to/repo-storage", + "renderer": "default", + "hard_line_breaks": False, + "block_html": False, + "git_repositories": [ + { + "name": "example-repo", + "url": "https://example.com/repo.git", + "branch": "main", + "root_subdir": None, + } + ], + "directories": [ + { + "name": "example-dir", + "path": "/path/to/content", + "root_subdir": None, + } + ], + } + path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + + +def create_manifest(directory: Path) -> Path: + if not directory.exists(): + raise MaterializeError(f"Directory does not exist: {directory}") + if not directory.is_dir(): + raise MaterializeError(f"Not a directory: {directory}") + manifest_path = directory / ".wp-materialize.json" + if manifest_path.exists(): + raise MaterializeError(f"Manifest already exists: {manifest_path}") + payload = { + "categories": {"content": [], "inherit": True}, + "tags": {"content": [], "inherit": True}, + "author": {"content": [], "inherit": True}, + "renderer": "default", + "hard_line_breaks": False, + "block_html": False, + "subdirectories": {"content": [], "inherit": True}, + "files": {}, + } + manifest_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + return manifest_path + + +def add_file_to_manifest(file_path: Path, manifest_dir: Path) -> None: + if not file_path.exists(): + raise MaterializeError(f"File does not exist: {file_path}") + if not file_path.is_file(): + raise MaterializeError(f"Not a file: {file_path}") + manifest_path = _manifest_path(manifest_dir) + data = _load_manifest_json(manifest_path) + + relative = _relative_to(file_path, manifest_dir) + files = data.setdefault("files", {}) + if not isinstance(files, dict): + raise MaterializeError("Manifest files must be an object") + if relative in files: + raise MaterializeError(f"File already exists in manifest: {relative}") + + files[relative] = {"title": "TODO: Title"} + _write_manifest_json(manifest_path, data) + + +def add_dir_to_manifest(dir_path: Path, manifest_dir: Path) -> None: + if not dir_path.exists(): + raise MaterializeError(f"Directory does not exist: {dir_path}") + if not dir_path.is_dir(): + raise MaterializeError(f"Not a directory: {dir_path}") + + manifest_path = _manifest_path(manifest_dir) + data = _load_manifest_json(manifest_path) + + relative = _relative_to(dir_path, manifest_dir) + subdirs = data.setdefault("subdirectories", {"content": [], "inherit": True}) + if not isinstance(subdirs, dict): + raise MaterializeError("Manifest subdirectories must be an object") + content = subdirs.setdefault("content", []) + if not isinstance(content, list) or any(not isinstance(item, str) for item in content): + raise MaterializeError("Manifest subdirectories.content must be a list of strings") + if relative in content: + raise MaterializeError(f"Subdirectory already exists in manifest: {relative}") + + content.append(relative) + _write_manifest_json(manifest_path, data) + + +def resolve_manifest_dir(target_path: Path, manifest_dir: Optional[Path], use_current: bool) -> Path: + if manifest_dir and use_current: + raise MaterializeError("--current cannot be used with an explicit manifest directory") + if manifest_dir: + return manifest_dir + if use_current: + return target_path.parent + return Path.cwd() + + +def _manifest_path(manifest_dir: Path) -> Path: + if not manifest_dir.exists(): + raise MaterializeError(f"Manifest directory does not exist: {manifest_dir}") + if not manifest_dir.is_dir(): + raise MaterializeError(f"Not a directory: {manifest_dir}") + manifest_path = manifest_dir / ".wp-materialize.json" + if not manifest_path.exists(): + raise MaterializeError(f"Manifest not found: {manifest_path}") + return manifest_path + + +def _load_manifest_json(path: Path) -> Dict[str, Any]: + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + raise MaterializeError(f"Invalid JSON in manifest: {exc}") from exc + if not isinstance(data, dict): + raise MaterializeError("Manifest must be a JSON object") + return data + + +def _write_manifest_json(path: Path, data: Dict[str, Any]) -> None: + path.write_text(json.dumps(data, indent=2), encoding="utf-8") + + +def _relative_to(path: Path, base: Path) -> str: + try: + relative = path.relative_to(base) + except ValueError as exc: + raise MaterializeError(f"Path is outside manifest directory: {path}") from exc + relative_str = relative.as_posix() + if relative_str in {".", ""}: + raise MaterializeError(f"Path must be inside manifest directory: {path}") + return relative_str + + +def _ensure_parent_exists(path: Path) -> None: + parent = path.parent + if not parent.exists(): + raise MaterializeError(f"Directory does not exist: {parent}") + if not parent.is_dir(): + raise MaterializeError(f"Not a directory: {parent}") diff --git a/src/evaluation.py b/src/validation.py similarity index 62% rename from src/evaluation.py rename to src/validation.py index af98e48..c7fb169 100644 --- a/src/evaluation.py +++ b/src/validation.py @@ -1,16 +1,17 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime from pathlib import Path import shutil from typing import Dict, List, Optional, Set from .config import Config from .errors import ValidationError, ValidationIssue -from .git_utils import ensure_repo, git_timestamp +from .git_utils import ensure_repo, git_first_timestamp, git_timestamp from .manifest import load_manifest from .markdown_utils import convert_markdown, extract_title -from .models import CategoryPlan, EvaluationResult, InheritList, Manifest, PostPlan, Source +from .models import EvaluationResult, InheritList, PostPlan, Source, TaxonomyPlan from .state import State from .wp_cli import WordPressCLI @@ -19,49 +20,70 @@ from .wp_cli import WordPressCLI class _Context: categories: InheritList tags: InheritList + author: InheritList + renderer: Optional[str] + hard_line_breaks: bool + block_html: bool subdirectories: InheritList manifest_chain: List[Path] -def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult: +def validate( + config: Config, + state: State, + sync_repos: bool, + force_new: bool = False, + skip_wp_checks: bool = False, +) -> EvaluationResult: issues: List[ValidationIssue] = [] sources = _load_sources(config, sync_repos, issues) posts: List[PostPlan] = [] for source, content_root in sources: - _evaluate_directory( + _validate_directory( source=source, directory=content_root, context=_Context( categories=InheritList(), tags=InheritList(), + author=InheritList(), + renderer=config.renderer, + hard_line_breaks=config.hard_line_breaks, + block_html=config.block_html, subdirectories=InheritList(), manifest_chain=[], ), state=state, issues=issues, posts=posts, + force_new=force_new, ) - if shutil.which("wp") is None: - issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) - categories = [] - tag_names: Set[str] = set() - try: - wp = WordPressCLI(config.wordpress_root) - categories = wp.list_categories() - tags = wp.list_tags() - tag_names = {tag.name for tag in tags} - except Exception as exc: - issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) + missing_categories: List[List[str]] = [] + missing_tags: List[str] = [] + if not skip_wp_checks: + if shutil.which("wp") is None: + issues.append(ValidationIssue("wp CLI not found in PATH", context=str(config.wordpress_root))) + categories = [] + tag_names: Set[str] = set() + try: + wp = WordPressCLI(config.wordpress_root) + categories = wp.list_categories() + tags = wp.list_tags() + tag_names = {tag.name for tag in tags} + except Exception as exc: + issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) - missing_categories = _plan_categories(posts, categories, issues, tag_names) + missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) if issues: raise ValidationError(issues) - return EvaluationResult(posts=posts, categories_to_create=CategoryPlan(missing_paths=missing_categories)) + return EvaluationResult( + posts=posts, + taxonomy_to_create=TaxonomyPlan(missing_categories=missing_categories, missing_tags=missing_tags), + ) def _load_sources( @@ -108,13 +130,14 @@ def _load_sources( return sources -def _evaluate_directory( +def _validate_directory( source: Source, directory: Path, context: _Context, state: State, issues: List[ValidationIssue], posts: List[PostPlan], + force_new: bool, ) -> None: manifest_path = directory / ".wp-materialize.json" manifest = load_manifest(manifest_path, issues) @@ -123,6 +146,18 @@ def _evaluate_directory( effective_categories = _merge_inherit(context.categories, manifest.categories) effective_tags = _merge_inherit(context.tags, manifest.tags) + effective_author = _merge_inherit(context.author, manifest.author) + effective_renderer = manifest.renderer if manifest.renderer is not None else context.renderer + effective_hard_line_breaks = ( + manifest.hard_line_breaks + if manifest.hard_line_breaks is not None + else context.hard_line_breaks + ) + effective_block_html = ( + manifest.block_html + if manifest.block_html is not None + else context.block_html + ) effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -161,8 +196,27 @@ def _evaluate_directory( resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) + resolved_author = _resolve_author(effective_author.content, str(file_path), issues) - html = convert_markdown(markdown_body, context=str(file_path), issues=issues) + resolved_renderer = spec.renderer if spec.renderer is not None else effective_renderer + resolved_hard_line_breaks = ( + spec.hard_line_breaks + if spec.hard_line_breaks is not None + else effective_hard_line_breaks + ) + resolved_block_html = ( + spec.block_html + if spec.block_html is not None + else effective_block_html + ) + html = convert_markdown( + markdown_body, + context=str(file_path), + issues=issues, + renderer=resolved_renderer or "default", + hard_line_breaks=resolved_hard_line_breaks, + block_html=resolved_block_html, + ) if html is None: continue @@ -189,7 +243,14 @@ def _evaluate_directory( identity = f"{source.name}:{relative_path}" cached_entry = state.posts.get(identity) cached_ts = cached_entry.source_timestamp if cached_entry else None - should_update = cached_ts is None or source_timestamp > cached_ts + should_update = True if force_new else (cached_ts is None or source_timestamp > cached_ts) + created_on, last_modified = _resolve_post_datetimes( + source=source, + identity_root=source.identity_root, + relative_path=relative_path, + spec=spec, + issues=issues, + ) posts.append( PostPlan( @@ -201,9 +262,12 @@ def _evaluate_directory( html=html, categories=resolved_categories, tags=resolved_tags, + author=resolved_author, source_timestamp=source_timestamp, cached_timestamp=cached_ts, should_update=should_update, + created_on=created_on, + last_modified=last_modified, ) ) @@ -212,18 +276,23 @@ def _evaluate_directory( if not subdir_path.exists(): issues.append(ValidationIssue("Missing subdirectory", context=str(subdir_path))) continue - _evaluate_directory( + _validate_directory( source=source, directory=subdir_path, context=_Context( categories=effective_categories, tags=effective_tags, + author=effective_author, + renderer=effective_renderer, + hard_line_breaks=effective_hard_line_breaks, + block_html=effective_block_html, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), state=state, issues=issues, posts=posts, + force_new=force_new, ) @@ -263,6 +332,16 @@ def _normalize_list(values: List[str], label: str, context: str, issues: List[Va return normalized +def _resolve_author(values: List[str], context: str, issues: List[ValidationIssue]) -> Optional[str]: + normalized = _normalize_list(values, "author", context, issues) + if not normalized: + return None + if len(normalized) > 1: + issues.append(ValidationIssue("Multiple authors specified; only one is allowed", context=context)) + return None + return normalized[0] + + def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: try: return str(path.relative_to(root)) @@ -290,25 +369,84 @@ def _timestamp_for_path( return None -def _plan_categories( +def _resolve_post_datetimes( + source: Source, + identity_root: Path, + relative_path: str, + spec, + issues: List[ValidationIssue], +) -> tuple[Optional[str], Optional[str]]: + created_dt = spec.created_on + modified_dt = spec.last_modified + + if created_dt is None or modified_dt is None: + inferred = _infer_file_timestamps(source, identity_root, relative_path, issues) + if inferred is None: + return None, None + inferred_created, inferred_modified = inferred + if created_dt is None: + created_dt = datetime.fromtimestamp(inferred_created) + if modified_dt is None: + modified_dt = datetime.fromtimestamp(inferred_modified) + + if created_dt and modified_dt and modified_dt < created_dt: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=relative_path) + ) + return None, None + + created_on = _format_wp_datetime(created_dt) if created_dt else None + last_modified = _format_wp_datetime(modified_dt) if modified_dt else None + return created_on, last_modified + + +def _infer_file_timestamps( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[tuple[int, int]]: + if source.kind == "git": + try: + created_ts = git_first_timestamp(identity_root, relative_path) + modified_ts = git_timestamp(identity_root, relative_path) + return created_ts, modified_ts + except Exception: + pass + try: + stat = (identity_root / relative_path).stat() + return int(stat.st_ctime), int(stat.st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _format_wp_datetime(value: datetime) -> str: + return value.strftime("%Y-%m-%d %H:%M:%S") + + +def _plan_taxonomy( posts: List[PostPlan], categories, # list of CategoryTerm - issues: List[ValidationIssue], existing_tags: Set[str], -) -> List[List[str]]: +) -> tuple[List[List[str]], List[str]]: category_map: Dict[tuple[int, str], int] = {} for category in categories: category_map[(category.parent, category.name)] = category.term_id missing_paths: List[List[str]] = [] seen_missing: Set[tuple[str, ...]] = set() + missing_tags: List[str] = [] + seen_tags: Set[str] = set() for post in posts: if not post.should_update: continue for tag in post.tags: if tag not in existing_tags: - issues.append(ValidationIssue(f"Tag does not exist: {tag}", context=post.relative_path)) + if tag not in seen_tags: + seen_tags.add(tag) + missing_tags.append(tag) for path in post.categories: segments = [segment for segment in path.split("/") if segment] if not segments: @@ -328,4 +466,4 @@ def _plan_categories( seen_missing.add(key) missing_paths.append(list(segments)) - return missing_paths + return missing_paths, missing_tags diff --git a/src/wp_cli.py b/src/wp_cli.py index 6e029d7..9e9f448 100644 --- a/src/wp_cli.py +++ b/src/wp_cli.py @@ -3,8 +3,10 @@ from __future__ import annotations import json import subprocess from dataclasses import dataclass +from datetime import timedelta, timezone from pathlib import Path from typing import Dict, List, Optional +from zoneinfo import ZoneInfo from .errors import WordPressError @@ -60,6 +62,44 @@ class WordPressCLI: tags.append(TagTerm(term_id=int(entry["term_id"]), name=entry["name"])) return tags + def create_tag(self, name: str) -> int: + result = self._run( + [ + "wp", + "term", + "create", + "post_tag", + name, + "--porcelain", + ], + capture_output=True, + ) + output = result.stdout.strip() + try: + return int(output) + except ValueError as exc: + raise WordPressError(f"Invalid tag id from wp cli: {output}") from exc + + def get_timezone(self): + tz_name = self._run( + ["wp", "option", "get", "timezone_string"], + capture_output=True, + ).stdout.strip() + if tz_name and tz_name.upper() != "UTC": + try: + return ZoneInfo(tz_name) + except Exception: + pass + offset_value = self._run( + ["wp", "option", "get", "gmt_offset"], + capture_output=True, + ).stdout.strip() + try: + offset = float(offset_value) + except ValueError: + offset = 0.0 + return timezone(timedelta(hours=offset)) + def create_category(self, name: str, parent: int) -> int: result = self._run( [ @@ -107,6 +147,9 @@ class WordPressCLI: categories: List[int], tags: List[str], source_identity: str, + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> int: payload = json.dumps({"_wp_materialize_source": source_identity}) args = [ @@ -122,6 +165,12 @@ class WordPressCLI: f"--meta_input={payload}", "--porcelain", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") result = self._run(args, capture_output=True) output = result.stdout.strip() try: @@ -136,17 +185,27 @@ class WordPressCLI: content: str, categories: List[int], tags: List[str], + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> None: args = [ "wp", "post", "update", str(post_id), + "--post_status=publish", f"--post_title={title}", f"--post_content={content}", f"--post_category={','.join(str(cat) for cat in categories)}", f"--tags_input={','.join(tags)}", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") self._run(args) def _run_json(self, cmd: List[str]):