diff --git a/.deepwork/rules/json_validation/examples/invalid-example.json b/.deepwork/rules/json_validation/examples/invalid-example.json new file mode 100644 index 00000000..386f9d5a --- /dev/null +++ b/.deepwork/rules/json_validation/examples/invalid-example.json @@ -0,0 +1,6 @@ +{ + "$schema": "./test-schema.json", + "name": "", + "version": "not-a-version", + "extra_field": "not allowed" +} diff --git a/.deepwork/rules/json_validation/examples/invalid-example.yaml b/.deepwork/rules/json_validation/examples/invalid-example.yaml new file mode 100644 index 00000000..b76d9967 --- /dev/null +++ b/.deepwork/rules/json_validation/examples/invalid-example.yaml @@ -0,0 +1,3 @@ +$schema: ./test-schema.json +name: my-project +version: 123 diff --git a/.deepwork/rules/json_validation/examples/test-schema.json b/.deepwork/rules/json_validation/examples/test-schema.json new file mode 100644 index 00000000..aee0164f --- /dev/null +++ b/.deepwork/rules/json_validation/examples/test-schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["name", "version"], + "properties": { + "$schema": { + "type": "string" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "version": { + "type": "string", + "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$" + }, + "description": { + "type": "string" + } + }, + "additionalProperties": false +} diff --git a/.deepwork/rules/json_validation/examples/valid-example.json b/.deepwork/rules/json_validation/examples/valid-example.json new file mode 100644 index 00000000..c98b41b6 --- /dev/null +++ b/.deepwork/rules/json_validation/examples/valid-example.json @@ -0,0 +1,6 @@ +{ + "$schema": "./test-schema.json", + "name": "my-project", + "version": "2.0.0", + "description": "A sample JSON project" +} diff --git a/.deepwork/rules/json_validation/examples/valid-example.yaml b/.deepwork/rules/json_validation/examples/valid-example.yaml new file mode 100644 index 00000000..65491749 --- /dev/null +++ b/.deepwork/rules/json_validation/examples/valid-example.yaml @@ -0,0 +1,4 @@ +$schema: ./test-schema.json +name: my-project +version: "1.0.0" +description: A sample project diff --git a/.deepwork/rules/json_validation/schema-validation.md b/.deepwork/rules/json_validation/schema-validation.md new file mode 100644 index 00000000..3e3c5919 --- /dev/null +++ b/.deepwork/rules/json_validation/schema-validation.md @@ -0,0 +1,79 @@ +--- +name: Schema Validation +trigger: + - "**/*.yml" + - "**/*.yaml" + - "**/*.json" +action: + command: python3 .deepwork/rules/json_validation/scripts/validate_schema.py {file} + run_for: each_match +compare_to: prompt +--- +Validates YAML and JSON files against their declared JSON Schema. + +This rule triggers on any `.yml`, `.yaml`, or `.json` file that is modified. It +performs a quick text scan for a `$schema` declaration before doing any parsing. +Only files with a schema reference are fully parsed and validated. + +## Schema Declaration + +**YAML files:** +```yaml +$schema: https://json-schema.org/draft-07/schema +# or +$schema: ./schemas/my-schema.json +``` + +**JSON files:** +```json +{ + "$schema": "https://json-schema.org/draft-07/schema", + "name": "example" +} +``` + +## Behavior + +1. **Quick scan**: Searches first 4KB for `$schema` pattern (no parsing) +2. **Skip if none**: Files without schema declaration pass immediately +3. **Full parse**: Only files with schema are fully parsed +4. **Validate**: Content validated against the declared schema + +### Exit Codes + +- **0 (pass)**: File validates against schema, or no schema declared +- **1 (fail)**: Validation failed - returns blocking JSON with error details +- **2 (error)**: Could not load schema or parse file + +## Example Output + +On validation failure: +```json +{ + "status": "fail", + "file": "config.json", + "schema": "https://example.com/schemas/config.json", + "error_count": 2, + "errors": [ + { + "message": "'name' is a required property", + "path": "/", + "schema_path": "/required" + }, + { + "message": "42 is not of type 'string'", + "path": "/version", + "schema_path": "/properties/version/type", + "value": 42 + } + ] +} +``` + +## Requirements + +- Python 3.10+ +- `jsonschema` package (required) +- `pyyaml` package (required for YAML files) + +Install with: `pip install jsonschema pyyaml` diff --git a/.deepwork/rules/json_validation/scripts/validate_schema.py b/.deepwork/rules/json_validation/scripts/validate_schema.py new file mode 100755 index 00000000..ed944832 --- /dev/null +++ b/.deepwork/rules/json_validation/scripts/validate_schema.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +""" +Validate YAML and JSON files against their declared JSON Schema. + +This script first performs a quick text search for $schema in the file. +Only if a schema reference is found does it fully parse the file and validate. + +Supported file types: .yml, .yaml, .json + +Exit codes: + 0 - Validation passed (or no schema declared) + 1 - Validation failed (outputs JSON with failure details) + 2 - Error fetching/loading schema +""" + +import json +import re +import sys +import urllib.request +import urllib.error +from pathlib import Path + +try: + import yaml +except ImportError: + yaml = None + +try: + import jsonschema + from jsonschema import Draft7Validator +except ImportError: + print(json.dumps({ + "status": "error", + "message": "jsonschema is not installed. Run: pip install jsonschema" + })) + sys.exit(2) + + +# Pattern to quickly detect $schema in file content without full parsing +# Matches both JSON ("$schema": "...") and YAML ($schema: ...) +SCHEMA_PATTERN = re.compile( + r'''["']?\$schema["']?\s*[:=]\s*["']?([^"'\s,}\]]+)''', + re.IGNORECASE +) + + +def quick_detect_schema(file_path: str) -> str | None: + """ + Quickly scan file for $schema declaration without full parsing. + Returns the schema reference if found, None otherwise. + """ + try: + with open(file_path, "r", encoding="utf-8") as f: + # Read first 4KB - schema should be near the top + content = f.read(4096) + + match = SCHEMA_PATTERN.search(content) + if match: + return match.group(1).rstrip("'\"") + return None + + except Exception: + return None + + +def parse_file(file_path: str) -> tuple[dict | list | None, str | None]: + """Parse a YAML or JSON file and return its contents.""" + path = Path(file_path) + suffix = path.suffix.lower() + + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + if suffix == ".json": + return json.loads(content), None + elif suffix in (".yml", ".yaml"): + if yaml is None: + return None, "PyYAML is not installed. Run: pip install pyyaml" + return yaml.safe_load(content), None + else: + # Try JSON first, then YAML + try: + return json.loads(content), None + except json.JSONDecodeError: + if yaml: + return yaml.safe_load(content), None + return None, f"Unsupported file type: {suffix}" + + except json.JSONDecodeError as e: + return None, f"Invalid JSON syntax: {e}" + except yaml.YAMLError as e: + return None, f"Invalid YAML syntax: {e}" + except FileNotFoundError: + return None, f"File not found: {file_path}" + except Exception as e: + return None, f"Error reading file: {e}" + + +def extract_schema_reference(content: dict) -> str | None: + """Extract the $schema reference from parsed content.""" + if not isinstance(content, dict): + return None + return content.get("$schema") + + +def fetch_schema_from_url(url: str) -> tuple[dict | None, str | None]: + """Fetch a JSON Schema from a URL.""" + try: + req = urllib.request.Request( + url, + headers={"User-Agent": "schema-validator/1.0"} + ) + with urllib.request.urlopen(req, timeout=30) as response: + schema_content = response.read().decode("utf-8") + + # Try to parse as JSON first, then YAML + try: + return json.loads(schema_content), None + except json.JSONDecodeError: + if yaml: + try: + return yaml.safe_load(schema_content), None + except yaml.YAMLError as e: + return None, f"Invalid schema format at URL: {e}" + return None, "Invalid JSON schema format at URL" + + except urllib.error.URLError as e: + return None, f"Failed to fetch schema from URL: {e}" + except Exception as e: + return None, f"Error fetching schema: {e}" + + +def load_schema_from_path(schema_path: str, source_file_path: str) -> tuple[dict | None, str | None]: + """Load a JSON Schema from a local file path.""" + # Resolve relative paths relative to the source file's directory + path = Path(schema_path) + if not path.is_absolute(): + source_dir = Path(source_file_path).parent + path = source_dir / path + + path = path.resolve() + + if not path.exists(): + return None, f"Schema file not found: {path}" + + try: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + + # Try to parse as JSON first, then YAML + try: + return json.loads(content), None + except json.JSONDecodeError: + if yaml: + try: + return yaml.safe_load(content), None + except yaml.YAMLError as e: + return None, f"Invalid schema format: {e}" + return None, "Invalid JSON schema format" + + except Exception as e: + return None, f"Error reading schema file: {e}" + + +def load_schema(schema_ref: str, source_file_path: str) -> tuple[dict | None, str | None]: + """Load a schema from either a URL or local path.""" + if schema_ref.startswith(("http://", "https://")): + return fetch_schema_from_url(schema_ref) + else: + return load_schema_from_path(schema_ref, source_file_path) + + +def validate_against_schema(content: dict, schema: dict) -> list[dict]: + """Validate content against a JSON Schema and return list of errors.""" + validator = Draft7Validator(schema) + errors = [] + + for error in sorted(validator.iter_errors(content), key=lambda e: e.path): + error_info = { + "message": error.message, + "path": "/" + "/".join(str(p) for p in error.absolute_path) if error.absolute_path else "/", + "schema_path": "/" + "/".join(str(p) for p in error.absolute_schema_path) if error.absolute_schema_path else "/", + } + + # Add the failing value if it's simple enough to display + if error.instance is not None and not isinstance(error.instance, (dict, list)): + error_info["value"] = error.instance + + errors.append(error_info) + + return errors + + +def main(): + if len(sys.argv) < 2: + print(json.dumps({ + "status": "error", + "message": "Usage: validate_schema.py " + })) + sys.exit(2) + + file_path = sys.argv[1] + + # Step 1: Quick detection - scan for $schema without parsing + quick_schema = quick_detect_schema(file_path) + if not quick_schema: + # No schema found in quick scan - pass without full parsing + print(json.dumps({ + "status": "pass", + "file": file_path, + "message": "No $schema declared, skipping validation" + })) + sys.exit(0) + + # Step 2: Schema detected - now do full parsing + content, error = parse_file(file_path) + if error: + print(json.dumps({ + "status": "error", + "file": file_path, + "message": error + })) + sys.exit(2) + + # Get the actual schema reference from parsed content + schema_ref = extract_schema_reference(content) + if not schema_ref: + # Quick scan found something but it wasn't actually a $schema field + print(json.dumps({ + "status": "pass", + "file": file_path, + "message": "No $schema declared, skipping validation" + })) + sys.exit(0) + + # Step 3: Load the schema + schema, error = load_schema(schema_ref, file_path) + if error: + print(json.dumps({ + "status": "error", + "file": file_path, + "schema": schema_ref, + "message": error + })) + sys.exit(2) + + # Step 4: Validate + errors = validate_against_schema(content, schema) + + if not errors: + print(json.dumps({ + "status": "pass", + "file": file_path, + "schema": schema_ref, + "message": "Validation passed" + })) + sys.exit(0) + else: + print(json.dumps({ + "status": "fail", + "file": file_path, + "schema": schema_ref, + "error_count": len(errors), + "errors": errors + }, indent=2)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/AGENTS.md b/AGENTS.md index 3b1dfeec..4b1addbf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,18 +23,16 @@ When creating or modifying jobs in this repository, you MUST understand which ty ### 2. Library Jobs (`library/jobs/`) -**What they are**: Example or reusable jobs that any repository is welcome to use, but are NOT auto-installed. Users must explicitly copy or import these into their projects. +**What they are**: Example or reusable jobs that any repository is welcome to use, but are NOT auto-installed. Users must explicitly copy or import these into their projects. Some library jobs may be symlinks to bespoke jobs that serve as good examples. **Location**: `library/jobs/[job_name]/` -**Examples** (potential): -- Competitive research workflows -- Code review processes -- Documentation generation -- Release management +**Examples**: +- `commit` - Lint, test, and commit workflow (symlink to `.deepwork/jobs/commit`) **Editing rules**: -- Edit directly in `library/jobs/[job_name]/` +- If the job is a symlink, edit the source in `.deepwork/jobs/[job_name]/` +- If the job is a standalone directory, edit directly in `library/jobs/[job_name]/` - These are templates/examples for users to adopt - Should be well-documented and self-contained @@ -77,12 +75,20 @@ deepwork/ ├── src/deepwork/standard_jobs/ # Standard jobs (source of truth) │ ├── deepwork_jobs/ │ └── deepwork_rules/ -├── library/jobs/ # Library/example jobs -│ └── [example_job]/ -└── .deepwork/jobs/ # Installed standard jobs + bespoke jobs - ├── deepwork_jobs/ # ← Installed copy, NOT source of truth - ├── deepwork_rules/ # ← Installed copy, NOT source of truth - └── [bespoke_job]/ # ← Source of truth for bespoke only +├── library/ # Library of examples +│ ├── jobs/ # Library jobs (may be symlinks) +│ │ ├── commit -> ../../.deepwork/jobs/commit +│ │ └── README.md +│ └── rules/ # Library rules (may be symlinks) +│ └── json_validation -> ../../.deepwork/rules/json_validation +└── .deepwork/ # Repo-specific configuration + ├── jobs/ # Installed standard jobs + bespoke jobs + │ ├── deepwork_jobs/ # ← Installed copy, NOT source of truth + │ ├── deepwork_rules/ # ← Installed copy, NOT source of truth + │ └── commit/ # ← Bespoke job (also exposed in library/) + └── rules/ # Repo-specific rules + └── json_validation/ # ← Bespoke rule (also exposed in library/) +``` ## Debugging Issues @@ -113,5 +119,3 @@ The environment includes: - Python 3.11 - uv (package manager) - All dev dependencies (pytest, ruff, mypy, etc.) - -``` diff --git a/claude.md b/claude.md index 07d4b325..d6e36fe9 100644 --- a/claude.md +++ b/claude.md @@ -47,7 +47,9 @@ deepwork/ │ │ └── deepwork_rules/ │ ├── schemas/ # Job definition schemas │ └── utils/ # Utilities (fs, git, yaml, validation) -├── library/jobs/ # Reusable example jobs (not auto-installed) +├── library/ # Reusable examples (not auto-installed) +│ ├── jobs/ # Example jobs (some may be symlinks) +│ └── rules/ # Example rules (some may be symlinks) ├── tests/ # Test suite ├── doc/ # Documentation └── doc/architecture.md # Detailed architecture document diff --git a/library/jobs/README.md b/library/jobs/README.md index 4c49bb02..3e871155 100644 --- a/library/jobs/README.md +++ b/library/jobs/README.md @@ -16,7 +16,7 @@ Each job in this library follows the same structure as the `.deepwork/jobs` subf ``` library/jobs/ -├── [job-name]/ +├── [job-name]/ # May be actual folder or symlink to .deepwork/jobs/ │ ├── job.yml # Job definition (name, steps, dependencies) │ └── steps/ │ ├── step_one.md # Instructions for step one @@ -29,6 +29,8 @@ library/jobs/ └── README.md ``` +**Note**: Some jobs in this library may be symlinks to `.deepwork/jobs/` where the actual job definitions live. This allows the library to expose jobs that are actively used in this repository. + ### job.yml The job definition file contains: diff --git a/library/jobs/commit b/library/jobs/commit new file mode 120000 index 00000000..ad92446f --- /dev/null +++ b/library/jobs/commit @@ -0,0 +1 @@ +../../.deepwork/jobs/commit \ No newline at end of file diff --git a/library/rules/json_validation b/library/rules/json_validation new file mode 120000 index 00000000..2a47ead3 --- /dev/null +++ b/library/rules/json_validation @@ -0,0 +1 @@ +../../.deepwork/rules/json_validation \ No newline at end of file