From 6565a8546fe71e9e6561073563b3bebab7202bc9 Mon Sep 17 00:00:00 2001 From: Peisong Xiao Date: Sun, 8 Feb 2026 05:30:55 -0500 Subject: [PATCH] added local export support and refined program logic --- README.md | 10 +++ configurations.md | 31 +++++++-- examples.md | 10 ++- src/apply.py | 14 +++- src/cli.py | 95 ++++++++++++++++++++++++--- src/evaluation.py | 110 +++++++++++++++++++++++++++---- src/git_utils.py | 15 +++++ src/local_export.py | 157 ++++++++++++++++++++++++++++++++++++++++++++ src/manifest.py | 28 +++++++- src/models.py | 14 +++- src/wp_cli.py | 36 ++++++++++ 11 files changed, 488 insertions(+), 32 deletions(-) create mode 100644 src/local_export.py diff --git a/README.md b/README.md index c260a37..c821142 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,16 @@ Skip git sync: wp-materialize apply --no-sync ``` +Local export (writes per-post directories with HTML, metadata, and WP command): + +```bash +wp-materialize local /path/to/output +``` + +Notes: +1. The local export assumes every post is new and generates create commands. +2. Categories must already exist in WordPress for exact commands. + ## Manifests Each managed directory must contain a `.wp-materialize.json` manifest. See `configurations.md` for the manifest guide. diff --git a/configurations.md b/configurations.md index 0e9775a..1645406 100644 --- a/configurations.md +++ b/configurations.md @@ -48,21 +48,28 @@ Top-level fields: Inherited category paths for this directory and its children. 2. `tags` (object, optional) Inherited tags for this directory and its children. -3. `subdirectories` (object, optional) +3. `author` (object, optional) + Inherited author for this directory and its children. Must resolve to a single author. +4. `subdirectories` (object, optional) Explicit list of subdirectories to traverse. -4. `files` (object, optional) +5. `files` (object, optional) Mapping of Markdown file names to file-level configuration. -`categories`, `tags`, and `subdirectories` objects: +`categories`, `tags`, `author`, and `subdirectories` objects: 1. `content` (array of strings, optional) List of values for the given field. For `categories`, each string is a hierarchical path such as `Systems/Infrastructure`. For `subdirectories`, each string is a directory name under the current directory. + For `author`, exactly one string must remain after inheritance is applied. 2. `inherit` (boolean, optional, default `true`) If `true`, append to the parent effective list. If `false`, replace the parent list entirely. +Note: Root directory manifests do not need to specify `inherit` for these top-level +fields (the default is `true`). File-level overrides inside `files` still support +inheritance via their own `inherit` fields. + `files` entries: Each key is a Markdown file name (relative to the manifest directory). @@ -73,10 +80,14 @@ Each value is an object with the following fields: 2. `use_heading_as_title` (object, optional) Extracts a heading from the Markdown as the title and removes that heading from the body while promoting remaining headings by one level. -3. `categories` (object, optional) +3. `created_on` (string, optional) + Manual override for the post creation time in `YYYY-MM-DD hh:mm` format. +4. `last_modified` (string, optional) + Manual override for the post modified time in `YYYY-MM-DD hh:mm` format. +5. `categories` (object, optional) Overrides categories for this file. Uses the same `content` and `inherit` fields as the top-level `categories` object. -4. `tags` (object, optional) +6. `tags` (object, optional) Overrides tags for this file. Uses the same `content` and `inherit` fields as the top-level `tags` object. @@ -87,6 +98,11 @@ Each value is an object with the following fields: 2. `strict` (boolean, optional, default `true`) If `true`, exactly one matching heading must exist. +If `created_on` or `last_modified` is not provided, the system infers the value. +For `git_repositories` sources it uses git commit timestamps; for `directories` +sources it uses filesystem timestamps. The system does not auto-detect git for +entries declared under `directories`, even if the path is inside a git repo. + ## Post Identity Each post is identified with: @@ -97,3 +113,8 @@ _wp_materialize_source = : `source_name` is the `name` from the global config entry, and `relative_path` is relative to the repo or directory root used for identity resolution. + +## Tag and Category Creation + +Missing categories and tags are created automatically during apply, after a successful +dry-run evaluation and before any post updates. diff --git a/examples.md b/examples.md index 2fc4e74..63d37d9 100644 --- a/examples.md +++ b/examples.md @@ -10,6 +10,7 @@ Root directory manifest (`.wp-materialize.json`): { "categories": { "content": ["Systems", "Infrastructure"], "inherit": true }, "tags": { "content": ["automation", "wordpress"], "inherit": true }, + "author": { "content": ["editorial"], "inherit": true }, "subdirectories": { "content": ["design", "notes"], "inherit": true }, "files": { "post.md": { @@ -18,7 +19,9 @@ Root directory manifest (`.wp-materialize.json`): "tags": { "content": ["extra"], "inherit": true } }, "essay.md": { - "use_heading_as_title": { "level": 1, "strict": true } + "use_heading_as_title": { "level": 1, "strict": true }, + "created_on": "2025-01-10 09:30", + "last_modified": "2025-02-14 16:45" } } } @@ -103,3 +106,8 @@ Subdirectory manifest (`design/.wp-materialize.json`): "directories": [] } ``` + +## Timestamp Behavior Example + +- `git_repositories` entries use git commit timestamps for `created_on`/`last_modified` inference. +- `directories` entries use filesystem timestamps even if the path is inside a git repo. diff --git a/src/apply.py b/src/apply.py index e1cc99b..cacd5e4 100644 --- a/src/apply.py +++ b/src/apply.py @@ -19,6 +19,7 @@ def apply_changes( category_map = _build_category_map(categories) _create_missing_categories(result, wp, category_map) + _create_missing_tags(result, wp) successes: Set[str] = set() try: @@ -48,7 +49,7 @@ def _create_missing_categories( wp: WordPressCLI, category_map: Dict[tuple[int, str], int], ) -> None: - paths = result.categories_to_create.missing_paths + paths = result.taxonomy_to_create.missing_categories paths = sorted(paths, key=len) seen: Set[tuple[str, ...]] = set() for segments in paths: @@ -67,6 +68,11 @@ def _create_missing_categories( parent = new_id +def _create_missing_tags(result: EvaluationResult, wp: WordPressCLI) -> None: + for tag in result.taxonomy_to_create.missing_tags: + wp.create_tag(tag) + + def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, str], int]) -> None: category_ids: List[int] = [] for path in post.categories: @@ -89,6 +95,9 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, categories=category_ids, tags=post.tags, source_identity=post.identity, + created_on=post.created_on, + last_modified=post.last_modified, + author=post.author, ) return @@ -98,4 +107,7 @@ def _apply_post(post: PostPlan, wp: WordPressCLI, category_map: Dict[tuple[int, content=post.html, categories=category_ids, tags=post.tags, + created_on=post.created_on, + last_modified=post.last_modified, + author=post.author, ) diff --git a/src/cli.py b/src/cli.py index d64132f..2bed1a9 100644 --- a/src/cli.py +++ b/src/cli.py @@ -9,24 +9,81 @@ from .apply import apply_changes from .config import load_config from .errors import ConfigurationError, MaterializeError, ValidationError from .evaluation import evaluate +from .local_export import export_local from .state import load_state from .wp_cli import WordPressCLI def main() -> int: - parser = argparse.ArgumentParser(description="wp-materialize") - parser.add_argument("command", nargs="?", choices=["evaluate", "apply"], default="evaluate") - parser.add_argument("--config", type=Path, default=_default_config_path()) - parser.add_argument("--state", type=Path, default=_default_state_path()) - parser.add_argument("--no-sync", action="store_true", help="Skip git clone/pull") - parser.add_argument("--json", action="store_true", help="Output evaluation summary as JSON") + parser = argparse.ArgumentParser( + description="wp-materialize: compile Markdown manifests into WordPress posts", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + epilog=("Command-specific help: wp-materialize --help"), + ) + common = argparse.ArgumentParser(add_help=False) + common.add_argument( + "--config", + type=Path, + default=_default_config_path(), + help="Path to the global config JSON file.", + ) + common.add_argument( + "--state", + type=Path, + default=_default_state_path(), + help="Path to the state JSON file used for incremental tracking.", + ) + common.add_argument( + "--no-sync", + action="store_true", + help="Skip git clone/pull for git_repositories entries.", + ) + common.add_argument( + "--force-new", + action="store_true", + help="Force all posts to be treated as new (ignore incremental timestamps).", + ) + common.add_argument( + "--json", + action="store_true", + help="Output evaluation summary as JSON.", + ) + + subparsers = parser.add_subparsers(dest="command", metavar="command") + + subparsers.add_parser( + "evaluate", + parents=[common], + help="Validate config/manifests and plan changes (no WP writes).", + description="Validate config/manifests, convert Markdown, and plan changes without writing to WordPress.", + ) + subparsers.add_parser( + "apply", + parents=[common], + help="Evaluate then create/update WordPress posts and taxonomy.", + description="Evaluate, then create categories/tags and create or update posts in WordPress.", + ) + local_parser = subparsers.add_parser( + "local", + parents=[common], + help="Export per-post folders with HTML, metadata, and wp command.", + description="Export per-post folders with HTML, metadata, and the exact wp command.", + ) + local_parser.add_argument( + "output_dir", + help="Output directory for local export (required).", + ) args = parser.parse_args() + if args.command is None: + parser.print_help() + return 1 + try: config = load_config(args.config) state = load_state(args.state) - result = evaluate(config, state, sync_repos=not args.no_sync) + result = evaluate(config, state, sync_repos=not args.no_sync, force_new=args.force_new) except ValidationError as exc: _print_validation_error(exc) return 1 @@ -39,6 +96,20 @@ def main() -> int: else: print(_evaluation_summary(result)) + if args.command == "local": + output_dir = getattr(args, "output_dir", None) + if not output_dir: + print("Error: local command requires an output directory", file=sys.stderr) + return 1 + wp = WordPressCLI(config.wordpress_root) + try: + export_local(result, Path(output_dir), wp) + except MaterializeError as exc: + print(f"Error: {exc}", file=sys.stderr) + return 1 + print("Local export complete") + return 0 + if args.command == "apply": wp = WordPressCLI(config.wordpress_root) try: @@ -62,11 +133,13 @@ def _default_state_path() -> Path: def _evaluation_summary(result) -> str: total = len(result.posts) updates = sum(1 for post in result.posts if post.should_update) - categories = len(result.categories_to_create.missing_paths) + categories = len(result.taxonomy_to_create.missing_categories) + tags = len(result.taxonomy_to_create.missing_tags) lines = [ f"Posts: {total}", f"Posts to update: {updates}", f"Categories to create: {categories}", + f"Tags to create: {tags}", ] return "\n".join(lines) @@ -83,10 +156,14 @@ def _evaluation_json(result) -> str: "should_update": post.should_update, "categories": post.categories, "tags": post.tags, + "created_on": post.created_on, + "last_modified": post.last_modified, + "author": post.author, } for post in result.posts ], - "categories_to_create": result.categories_to_create.missing_paths, + "categories_to_create": result.taxonomy_to_create.missing_categories, + "tags_to_create": result.taxonomy_to_create.missing_tags, } return json.dumps(payload, indent=2) diff --git a/src/evaluation.py b/src/evaluation.py index af98e48..899ced2 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -1,16 +1,17 @@ from __future__ import annotations from dataclasses import dataclass +from datetime import datetime from pathlib import Path import shutil from typing import Dict, List, Optional, Set from .config import Config from .errors import ValidationError, ValidationIssue -from .git_utils import ensure_repo, git_timestamp +from .git_utils import ensure_repo, git_first_timestamp, git_timestamp from .manifest import load_manifest from .markdown_utils import convert_markdown, extract_title -from .models import CategoryPlan, EvaluationResult, InheritList, Manifest, PostPlan, Source +from .models import EvaluationResult, InheritList, PostPlan, Source, TaxonomyPlan from .state import State from .wp_cli import WordPressCLI @@ -19,11 +20,12 @@ from .wp_cli import WordPressCLI class _Context: categories: InheritList tags: InheritList + author: InheritList subdirectories: InheritList manifest_chain: List[Path] -def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult: +def evaluate(config: Config, state: State, sync_repos: bool, force_new: bool = False) -> EvaluationResult: issues: List[ValidationIssue] = [] sources = _load_sources(config, sync_repos, issues) @@ -36,6 +38,7 @@ def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult context=_Context( categories=InheritList(), tags=InheritList(), + author=InheritList(), subdirectories=InheritList(), manifest_chain=[], ), @@ -56,12 +59,15 @@ def evaluate(config: Config, state: State, sync_repos: bool) -> EvaluationResult except Exception as exc: issues.append(ValidationIssue(str(exc), context=str(config.wordpress_root))) - missing_categories = _plan_categories(posts, categories, issues, tag_names) + missing_categories, missing_tags = _plan_taxonomy(posts, categories, tag_names) if issues: raise ValidationError(issues) - return EvaluationResult(posts=posts, categories_to_create=CategoryPlan(missing_paths=missing_categories)) + return EvaluationResult( + posts=posts, + taxonomy_to_create=TaxonomyPlan(missing_categories=missing_categories, missing_tags=missing_tags), + ) def _load_sources( @@ -123,6 +129,7 @@ def _evaluate_directory( effective_categories = _merge_inherit(context.categories, manifest.categories) effective_tags = _merge_inherit(context.tags, manifest.tags) + effective_author = _merge_inherit(context.author, manifest.author) effective_subdirs = _merge_inherit(context.subdirectories, manifest.subdirectories) manifest_chain = context.manifest_chain + [manifest.path] @@ -161,6 +168,7 @@ def _evaluate_directory( resolved_categories = _normalize_list(resolved_categories, "category", str(file_path), issues) resolved_tags = _normalize_list(resolved_tags, "tag", str(file_path), issues) + resolved_author = _resolve_author(effective_author.content, str(file_path), issues) html = convert_markdown(markdown_body, context=str(file_path), issues=issues) if html is None: @@ -189,7 +197,14 @@ def _evaluate_directory( identity = f"{source.name}:{relative_path}" cached_entry = state.posts.get(identity) cached_ts = cached_entry.source_timestamp if cached_entry else None - should_update = cached_ts is None or source_timestamp > cached_ts + should_update = True if force_new else (cached_ts is None or source_timestamp > cached_ts) + created_on, last_modified = _resolve_post_datetimes( + source=source, + identity_root=source.identity_root, + relative_path=relative_path, + spec=spec, + issues=issues, + ) posts.append( PostPlan( @@ -201,9 +216,12 @@ def _evaluate_directory( html=html, categories=resolved_categories, tags=resolved_tags, + author=resolved_author, source_timestamp=source_timestamp, cached_timestamp=cached_ts, should_update=should_update, + created_on=created_on, + last_modified=last_modified, ) ) @@ -218,6 +236,7 @@ def _evaluate_directory( context=_Context( categories=effective_categories, tags=effective_tags, + author=effective_author, subdirectories=effective_subdirs, manifest_chain=manifest_chain, ), @@ -263,6 +282,16 @@ def _normalize_list(values: List[str], label: str, context: str, issues: List[Va return normalized +def _resolve_author(values: List[str], context: str, issues: List[ValidationIssue]) -> Optional[str]: + normalized = _normalize_list(values, "author", context, issues) + if not normalized: + return None + if len(normalized) > 1: + issues.append(ValidationIssue("Multiple authors specified; only one is allowed", context=context)) + return None + return normalized[0] + + def _relative_path(path: Path, root: Path, issues: List[ValidationIssue]) -> Optional[str]: try: return str(path.relative_to(root)) @@ -290,25 +319,84 @@ def _timestamp_for_path( return None -def _plan_categories( +def _resolve_post_datetimes( + source: Source, + identity_root: Path, + relative_path: str, + spec, + issues: List[ValidationIssue], +) -> tuple[Optional[str], Optional[str]]: + created_dt = spec.created_on + modified_dt = spec.last_modified + + if created_dt is None or modified_dt is None: + inferred = _infer_file_timestamps(source, identity_root, relative_path, issues) + if inferred is None: + return None, None + inferred_created, inferred_modified = inferred + if created_dt is None: + created_dt = datetime.fromtimestamp(inferred_created) + if modified_dt is None: + modified_dt = datetime.fromtimestamp(inferred_modified) + + if created_dt and modified_dt and modified_dt < created_dt: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=relative_path) + ) + return None, None + + created_on = _format_wp_datetime(created_dt) if created_dt else None + last_modified = _format_wp_datetime(modified_dt) if modified_dt else None + return created_on, last_modified + + +def _infer_file_timestamps( + source: Source, + identity_root: Path, + relative_path: str, + issues: List[ValidationIssue], +) -> Optional[tuple[int, int]]: + if source.kind == "git": + try: + created_ts = git_first_timestamp(identity_root, relative_path) + modified_ts = git_timestamp(identity_root, relative_path) + return created_ts, modified_ts + except Exception: + pass + try: + stat = (identity_root / relative_path).stat() + return int(stat.st_ctime), int(stat.st_mtime) + except Exception as exc: + issues.append(ValidationIssue(f"Timestamp lookup failed: {exc}", context=relative_path)) + return None + + +def _format_wp_datetime(value: datetime) -> str: + return value.strftime("%Y-%m-%d %H:%M:%S") + + +def _plan_taxonomy( posts: List[PostPlan], categories, # list of CategoryTerm - issues: List[ValidationIssue], existing_tags: Set[str], -) -> List[List[str]]: +) -> tuple[List[List[str]], List[str]]: category_map: Dict[tuple[int, str], int] = {} for category in categories: category_map[(category.parent, category.name)] = category.term_id missing_paths: List[List[str]] = [] seen_missing: Set[tuple[str, ...]] = set() + missing_tags: List[str] = [] + seen_tags: Set[str] = set() for post in posts: if not post.should_update: continue for tag in post.tags: if tag not in existing_tags: - issues.append(ValidationIssue(f"Tag does not exist: {tag}", context=post.relative_path)) + if tag not in seen_tags: + seen_tags.add(tag) + missing_tags.append(tag) for path in post.categories: segments = [segment for segment in path.split("/") if segment] if not segments: @@ -328,4 +416,4 @@ def _plan_categories( seen_missing.add(key) missing_paths.append(list(segments)) - return missing_paths + return missing_paths, missing_tags diff --git a/src/git_utils.py b/src/git_utils.py index 9d79917..ef141f8 100644 --- a/src/git_utils.py +++ b/src/git_utils.py @@ -38,6 +38,21 @@ def git_timestamp(repo_root: Path, relative_path: str) -> int: raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc +def git_first_timestamp(repo_root: Path, relative_path: str) -> int: + result = _run( + ["git", "log", "--reverse", "-1", "--format=%ct", "--", relative_path], + cwd=repo_root, + capture_output=True, + ) + output = result.stdout.strip() + if not output: + raise ConfigurationError(f"No git timestamp for {relative_path}") + try: + return int(output) + except ValueError as exc: + raise ConfigurationError(f"Invalid git timestamp for {relative_path}: {output}") from exc + + def _run(cmd: list[str], cwd: Path, capture_output: bool = False) -> subprocess.CompletedProcess: try: return subprocess.run( diff --git a/src/local_export.py b/src/local_export.py new file mode 100644 index 0000000..868b0a1 --- /dev/null +++ b/src/local_export.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import json +import re +import shlex +import unicodedata +from pathlib import Path +from typing import Dict, List, Set + +from .errors import MaterializeError, WordPressError +from .models import EvaluationResult, PostPlan +from .wp_cli import CategoryTerm, WordPressCLI + + +def export_local(result: EvaluationResult, output_dir: Path, wp: WordPressCLI) -> None: + if not output_dir.exists(): + output_dir.mkdir(parents=True, exist_ok=True) + if not output_dir.is_dir(): + raise MaterializeError(f"Output path is not a directory: {output_dir}") + + categories = wp.list_categories() + category_map = _build_category_map(categories) + missing_categories = _find_missing_categories(result.posts, category_map) + if missing_categories: + raise MaterializeError( + "Cannot build exact wp commands with missing categories. " + "Run apply to create categories first." + ) + + used_names: Set[str] = set() + for post in result.posts: + category_ids = _resolve_category_ids(post, category_map) + metadata = _build_metadata(post, category_ids) + command = _build_wp_command(post, category_ids) + + base_name = _normalize_name(f"{post.source.name}/{post.relative_path}") + title_name = _normalize_name(post.title) + if title_name: + dir_name = f"{base_name}-{title_name}" + else: + dir_name = base_name + dir_name = _dedupe_name(dir_name, used_names) + used_names.add(dir_name) + + target_dir = output_dir / dir_name + target_dir.mkdir(parents=True, exist_ok=True) + + (target_dir / "post.html").write_text(post.html, encoding="utf-8") + (target_dir / "metadata.json").write_text( + json.dumps(metadata, indent=2, sort_keys=True), + encoding="utf-8", + ) + (target_dir / "wp-command.txt").write_text(command + "\n", encoding="utf-8") + + +def _build_category_map(categories: List[CategoryTerm]) -> Dict[tuple[int, str], int]: + return {(category.parent, category.name): category.term_id for category in categories} + + +def _resolve_category_ids(post: PostPlan, category_map: Dict[tuple[int, str], int]) -> List[int]: + category_ids: List[int] = [] + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key not in category_map: + raise WordPressError(f"Missing category during local export: {path}") + parent = category_map[map_key] + category_ids.append(parent) + return category_ids + + +def _find_missing_categories(posts: List[PostPlan], category_map: Dict[tuple[int, str], int]) -> List[str]: + missing: Set[str] = set() + for post in posts: + for path in post.categories: + segments = [segment for segment in path.split("/") if segment] + if not segments: + continue + parent = 0 + for segment in segments: + map_key = (parent, segment) + if map_key not in category_map: + missing.add(path) + break + parent = category_map[map_key] + return sorted(missing) + + +def _build_metadata(post: PostPlan, category_ids: List[int]) -> dict: + metadata = { + "post_type": "post", + "post_status": "publish", + "post_title": post.title, + "post_content": post.html, + "post_category": category_ids, + "tags_input": post.tags, + "meta_input": {"_wp_materialize_source": post.identity}, + } + if post.created_on: + metadata["post_date"] = post.created_on + if post.last_modified: + metadata["post_modified"] = post.last_modified + if post.author: + metadata["post_author"] = post.author + return metadata + + +def _build_wp_command(post: PostPlan, category_ids: List[int]) -> str: + payload = json.dumps({"_wp_materialize_source": post.identity}) + args = [ + "wp", + "post", + "create", + "--post_type=post", + "--post_status=publish", + f"--post_title={post.title}", + f"--post_content={post.html}", + f"--post_category={','.join(str(cat) for cat in category_ids)}", + f"--tags_input={','.join(post.tags)}", + f"--meta_input={payload}", + "--porcelain", + ] + if post.created_on: + args.append(f"--post_date={post.created_on}") + if post.last_modified: + args.append(f"--post_modified={post.last_modified}") + if post.author: + args.append(f"--post_author={post.author}") + return " ".join(shlex.quote(arg) for arg in args) + + +def _normalize_name(value: str) -> str: + text = value.strip() + text = text.replace("\\", "/") + text = text.replace("/", "-") + text = unicodedata.normalize("NFKD", text) + text = text.encode("ascii", "ignore").decode("ascii") + text = text.lower() + text = re.sub(r"[^a-z0-9._-]+", "-", text) + text = re.sub(r"-+", "-", text) + text = text.strip("-_.") + return text or "post" + + +def _dedupe_name(name: str, used: Set[str]) -> str: + if name not in used: + return name + index = 2 + while True: + candidate = f"{name}-{index}" + if candidate not in used: + return candidate + index += 1 diff --git a/src/manifest.py b/src/manifest.py index 94bdba1..97ea5ca 100644 --- a/src/manifest.py +++ b/src/manifest.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from datetime import datetime from pathlib import Path from typing import Dict @@ -23,7 +24,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: issues.append(ValidationIssue("Manifest must be a JSON object", context=str(path))) return None - allowed = {"categories", "tags", "subdirectories", "files"} + allowed = {"categories", "tags", "author", "subdirectories", "files"} extra = set(data.keys()) - allowed if extra: issues.append(ValidationIssue(f"Unexpected keys: {sorted(extra)}", context=str(path))) @@ -31,6 +32,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories = _parse_inherit_list(data.get("categories"), issues, f"{path}:categories") tags = _parse_inherit_list(data.get("tags"), issues, f"{path}:tags") + author = _parse_inherit_list(data.get("author"), issues, f"{path}:author") subdirectories = _parse_inherit_list(data.get("subdirectories"), issues, f"{path}:subdirectories") files: Dict[str, FileSpec] = {} @@ -46,7 +48,7 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: if not isinstance(file_cfg, dict): issues.append(ValidationIssue(f"{file_name} must be an object", context=str(path))) continue - extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags"} + extra_file = set(file_cfg.keys()) - {"title", "use_heading_as_title", "categories", "tags", "created_on", "last_modified"} if extra_file: issues.append( ValidationIssue(f"{file_name} has unexpected keys: {sorted(extra_file)}", context=str(path)) @@ -89,6 +91,12 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: categories_override = _parse_inherit_list(file_cfg.get("categories"), issues, f"{path}:{file_name}:categories") tags_override = _parse_inherit_list(file_cfg.get("tags"), issues, f"{path}:{file_name}:tags") + created_on = _parse_datetime_field(file_cfg.get("created_on"), issues, f"{path}:{file_name}:created_on") + last_modified = _parse_datetime_field(file_cfg.get("last_modified"), issues, f"{path}:{file_name}:last_modified") + if created_on and last_modified and last_modified < created_on: + issues.append( + ValidationIssue("last_modified cannot be earlier than created_on", context=str(path)) + ) files[file_name] = FileSpec( title=title, @@ -96,12 +104,15 @@ def load_manifest(path: Path, issues: list[ValidationIssue]) -> Manifest | None: use_heading_strict=use_strict, categories=categories_override, tags=tags_override, + created_on=created_on, + last_modified=last_modified, ) return Manifest( path=path, categories=categories, tags=tags, + author=author, subdirectories=subdirectories, files=files, ) @@ -129,3 +140,16 @@ def _parse_inherit_list(value: object, issues: list[ValidationIssue], context: s inherit = True return InheritList(content=[item for item in content if isinstance(item, str)], inherit=inherit) + + +def _parse_datetime_field(value: object, issues: list[ValidationIssue], context: str) -> datetime | None: + if value is None: + return None + if not isinstance(value, str) or not value.strip(): + issues.append(ValidationIssue("Must be a non-empty string", context=context)) + return None + try: + return datetime.strptime(value.strip(), "%Y-%m-%d %H:%M") + except ValueError: + issues.append(ValidationIssue("Invalid datetime format (expected YYYY-MM-DD hh:mm)", context=context)) + return None diff --git a/src/models.py b/src/models.py index c0f0288..c010b27 100644 --- a/src/models.py +++ b/src/models.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from datetime import datetime from pathlib import Path from typing import Dict, List, Optional @@ -18,6 +19,8 @@ class FileSpec: use_heading_strict: bool categories: Optional[InheritList] tags: Optional[InheritList] + created_on: Optional[datetime] + last_modified: Optional[datetime] @dataclass(frozen=True) @@ -25,6 +28,7 @@ class Manifest: path: Path categories: InheritList tags: InheritList + author: InheritList subdirectories: InheritList files: Dict[str, FileSpec] @@ -47,17 +51,21 @@ class PostPlan: html: str categories: List[str] tags: List[str] + author: Optional[str] source_timestamp: int cached_timestamp: Optional[int] should_update: bool + created_on: Optional[str] + last_modified: Optional[str] @dataclass -class CategoryPlan: - missing_paths: List[List[str]] +class TaxonomyPlan: + missing_categories: List[List[str]] + missing_tags: List[str] @dataclass class EvaluationResult: posts: List[PostPlan] - categories_to_create: CategoryPlan + taxonomy_to_create: TaxonomyPlan diff --git a/src/wp_cli.py b/src/wp_cli.py index 6e029d7..4264776 100644 --- a/src/wp_cli.py +++ b/src/wp_cli.py @@ -60,6 +60,24 @@ class WordPressCLI: tags.append(TagTerm(term_id=int(entry["term_id"]), name=entry["name"])) return tags + def create_tag(self, name: str) -> int: + result = self._run( + [ + "wp", + "term", + "create", + "post_tag", + name, + "--porcelain", + ], + capture_output=True, + ) + output = result.stdout.strip() + try: + return int(output) + except ValueError as exc: + raise WordPressError(f"Invalid tag id from wp cli: {output}") from exc + def create_category(self, name: str, parent: int) -> int: result = self._run( [ @@ -107,6 +125,9 @@ class WordPressCLI: categories: List[int], tags: List[str], source_identity: str, + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> int: payload = json.dumps({"_wp_materialize_source": source_identity}) args = [ @@ -122,6 +143,12 @@ class WordPressCLI: f"--meta_input={payload}", "--porcelain", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") result = self._run(args, capture_output=True) output = result.stdout.strip() try: @@ -136,6 +163,9 @@ class WordPressCLI: content: str, categories: List[int], tags: List[str], + created_on: Optional[str] = None, + last_modified: Optional[str] = None, + author: Optional[str] = None, ) -> None: args = [ "wp", @@ -147,6 +177,12 @@ class WordPressCLI: f"--post_category={','.join(str(cat) for cat in categories)}", f"--tags_input={','.join(tags)}", ] + if created_on: + args.append(f"--post_date={created_on}") + if last_modified: + args.append(f"--post_modified={last_modified}") + if author: + args.append(f"--post_author={author}") self._run(args) def _run_json(self, cmd: List[str]):