diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 6ffd9009..f81f6ecc 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -175,6 +175,8 @@ Pipelines may include optional platform-specific configuration directories for e Platform configurations are entirely optional and should not be required to run the pipeline with standard WDL executors (Cromwell, miniWDL, Sprocket). +**Cirro Configuration Validation**: Pipelines with `.cirro/` directories are automatically validated in CI. The validation checks that all required files are present (`preprocess.py`, `process-form.json`, `process-input.json`, `process-output.json`, `process-compute.config`), JSON files are valid, and `preprocess.py` has no syntax errors. You can run this locally with `make lint_cirro`. + ## Testing Requirements ### Local Tests @@ -256,6 +258,7 @@ All contributions must pass our automated testing pipeline which executes on a P - **Container verification**: All Docker images must be accessible and functional - **Syntax validation**: WDL syntax and structure validation - **Integration testing**: Cross-module compatibility testing +- **Cirro validation**: Validates `.cirro/` configurations for pipelines that include them ## Documentation Website diff --git a/.github/scripts/validate_cirro.py b/.github/scripts/validate_cirro.py new file mode 100644 index 00000000..e99765af --- /dev/null +++ b/.github/scripts/validate_cirro.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Validate .cirro configurations in WILDS pipelines. + +Checks that .cirro directories contain the required files, +JSON files are valid, and preprocess.py has no syntax errors. +""" + +import json +import sys +from pathlib import Path + +REQUIRED_FILES = [ + "preprocess.py", + "process-form.json", + "process-input.json", + "process-output.json", + "process-compute.config", +] + +def validate_json_file(filepath): + """Validate that a file contains valid JSON. Returns list of error strings.""" + errors = [] + try: + with open(filepath) as f: + data = json.load(f) + except json.JSONDecodeError as e: + errors.append(f" Invalid JSON in {filepath.name}: {e}") + return errors, None + return errors, data + + +def validate_form(filepath): + """Validate process-form.json has expected structure.""" + errors, data = validate_json_file(filepath) + if data is None: + return errors + + if not isinstance(data, dict): + errors.append(f" {filepath.name}: expected a JSON object at top level") + return errors + + if "form" not in data: + errors.append(f" {filepath.name}: missing top-level 'form' key") + return errors + + form = data["form"] + if not isinstance(form, dict): + errors.append(f" {filepath.name}: 'form' should be an object") + return errors + + if "properties" not in form: + errors.append(f" {filepath.name}: 'form' missing 'properties' key") + + if "required" in form and not isinstance(form["required"], list): + errors.append(f" {filepath.name}: 'required' should be a list") + + return errors + + +def validate_input(filepath): + """Validate process-input.json has JSON path mappings.""" + errors, data = validate_json_file(filepath) + if data is None: + return errors + + if not isinstance(data, dict): + errors.append(f" {filepath.name}: expected a JSON object") + return errors + + for key, value in data.items(): + if not isinstance(value, str): + errors.append(f" {filepath.name}: value for '{key}' should be a string, got {type(value).__name__}") + elif not value.startswith("$."): + errors.append(f" {filepath.name}: value for '{key}' should be a JSON path (start with '$.')") + + return errors + + +def validate_output(filepath): + """Validate process-output.json is valid JSON.""" + errors, _ = validate_json_file(filepath) + return errors + + +def validate_preprocess(filepath): + """Validate preprocess.py has no syntax errors.""" + errors = [] + try: + source = filepath.read_text() + compile(source, str(filepath), "exec") + except SyntaxError as e: + errors.append(f" {filepath.name}: Python syntax error: {e}") + return errors + + +def validate_cirro_dir(cirro_dir): + """Validate a single .cirro directory. Returns list of error strings.""" + errors = [] + + # Check required files + for filename in REQUIRED_FILES: + if not (cirro_dir / filename).exists(): + errors.append(f" Missing required file: {filename}") + + # Validate individual files + form_path = cirro_dir / "process-form.json" + if form_path.exists(): + errors.extend(validate_form(form_path)) + + input_path = cirro_dir / "process-input.json" + if input_path.exists(): + errors.extend(validate_input(input_path)) + + output_path = cirro_dir / "process-output.json" + if output_path.exists(): + errors.extend(validate_output(output_path)) + + preprocess_path = cirro_dir / "preprocess.py" + if preprocess_path.exists(): + errors.extend(validate_preprocess(preprocess_path)) + + return errors + + +def main(): + pipelines_dir = Path("pipelines") + if not pipelines_dir.exists(): + print("No pipelines directory found") + return 0 + + found_any = False + all_errors = {} + + for pipeline_dir in sorted(pipelines_dir.iterdir()): + if not pipeline_dir.is_dir(): + continue + + cirro_dir = pipeline_dir / ".cirro" + if not cirro_dir.is_dir(): + print(f"Skipping {pipeline_dir.name} (no .cirro directory)") + continue + + found_any = True + print(f"Validating {pipeline_dir.name}/.cirro/ ...") + errors = validate_cirro_dir(cirro_dir) + + if errors: + all_errors[pipeline_dir.name] = errors + print(f" FAIL ({len(errors)} issue(s))") + else: + print(f" OK") + + if not found_any: + print("No .cirro directories found in any pipeline") + return 0 + + if all_errors: + print(f"\n{'='*50}") + print(f"Cirro validation failed for {len(all_errors)} pipeline(s):\n") + for pipeline, errors in all_errors.items(): + print(f"{pipeline}:") + for error in errors: + print(error) + print() + return 1 + + print(f"\nAll Cirro configurations valid!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 48ab77ab..2d1feae5 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -8,6 +8,9 @@ on: pull_request: types: [opened, reopened, synchronize] +permissions: + contents: read + jobs: miniwdl_check: runs-on: ubuntu-latest @@ -80,3 +83,18 @@ jobs: uses: stjude-rust-labs/sprocket-action@v0.10.0 with: action: lint + + cirro_validation: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v4 + - + name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.13 + - + name: Validate Cirro Configurations + run: python .github/scripts/validate_cirro.py diff --git a/Makefile b/Makefile index 5d664d4f..7636c484 100644 --- a/Makefile +++ b/Makefile @@ -146,7 +146,11 @@ lint_womtool: check_java check_womtool check_name ## Run WOMtool validate on mod done -lint: lint_sprocket lint_miniwdl lint_womtool ## Run all linting checks +lint_cirro: ## Validate .cirro configurations in pipelines + @echo "Validating Cirro configurations..." + @python3 .github/scripts/validate_cirro.py + +lint: lint_sprocket lint_miniwdl lint_womtool lint_cirro ## Run all linting checks ##@ Run