-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathvalidate_cirro.py
More file actions
173 lines (132 loc) · 4.86 KB
/
validate_cirro.py
File metadata and controls
173 lines (132 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
"""
Validate .cirro configurations in WILDS pipelines.
Checks that .cirro directories contain the required files,
JSON files are valid, and preprocess.py has no syntax errors.
"""
import json
import sys
from pathlib import Path
REQUIRED_FILES = [
"preprocess.py",
"process-form.json",
"process-input.json",
"process-output.json",
"process-compute.config",
]
def validate_json_file(filepath):
"""Validate that a file contains valid JSON. Returns list of error strings."""
errors = []
try:
with open(filepath) as f:
data = json.load(f)
except json.JSONDecodeError as e:
errors.append(f" Invalid JSON in {filepath.name}: {e}")
return errors, None
return errors, data
def validate_form(filepath):
"""Validate process-form.json has expected structure."""
errors, data = validate_json_file(filepath)
if data is None:
return errors
if not isinstance(data, dict):
errors.append(f" {filepath.name}: expected a JSON object at top level")
return errors
if "form" not in data:
errors.append(f" {filepath.name}: missing top-level 'form' key")
return errors
form = data["form"]
if not isinstance(form, dict):
errors.append(f" {filepath.name}: 'form' should be an object")
return errors
if "properties" not in form:
errors.append(f" {filepath.name}: 'form' missing 'properties' key")
if "required" in form and not isinstance(form["required"], list):
errors.append(f" {filepath.name}: 'required' should be a list")
return errors
def validate_input(filepath):
"""Validate process-input.json has JSON path mappings."""
errors, data = validate_json_file(filepath)
if data is None:
return errors
if not isinstance(data, dict):
errors.append(f" {filepath.name}: expected a JSON object")
return errors
for key, value in data.items():
if not isinstance(value, str):
errors.append(f" {filepath.name}: value for '{key}' should be a string, got {type(value).__name__}")
elif not value.startswith("$."):
errors.append(f" {filepath.name}: value for '{key}' should be a JSON path (start with '$.')")
return errors
def validate_output(filepath):
"""Validate process-output.json is valid JSON."""
errors, _ = validate_json_file(filepath)
return errors
def validate_preprocess(filepath):
"""Validate preprocess.py has no syntax errors."""
errors = []
try:
source = filepath.read_text()
compile(source, str(filepath), "exec")
except SyntaxError as e:
errors.append(f" {filepath.name}: Python syntax error: {e}")
return errors
def validate_cirro_dir(cirro_dir):
"""Validate a single .cirro directory. Returns list of error strings."""
errors = []
# Check required files
for filename in REQUIRED_FILES:
if not (cirro_dir / filename).exists():
errors.append(f" Missing required file: {filename}")
# Validate individual files
form_path = cirro_dir / "process-form.json"
if form_path.exists():
errors.extend(validate_form(form_path))
input_path = cirro_dir / "process-input.json"
if input_path.exists():
errors.extend(validate_input(input_path))
output_path = cirro_dir / "process-output.json"
if output_path.exists():
errors.extend(validate_output(output_path))
preprocess_path = cirro_dir / "preprocess.py"
if preprocess_path.exists():
errors.extend(validate_preprocess(preprocess_path))
return errors
def main():
pipelines_dir = Path("pipelines")
if not pipelines_dir.exists():
print("No pipelines directory found")
return 0
found_any = False
all_errors = {}
for pipeline_dir in sorted(pipelines_dir.iterdir()):
if not pipeline_dir.is_dir():
continue
cirro_dir = pipeline_dir / ".cirro"
if not cirro_dir.is_dir():
print(f"Skipping {pipeline_dir.name} (no .cirro directory)")
continue
found_any = True
print(f"Validating {pipeline_dir.name}/.cirro/ ...")
errors = validate_cirro_dir(cirro_dir)
if errors:
all_errors[pipeline_dir.name] = errors
print(f" FAIL ({len(errors)} issue(s))")
else:
print(f" OK")
if not found_any:
print("No .cirro directories found in any pipeline")
return 0
if all_errors:
print(f"\n{'='*50}")
print(f"Cirro validation failed for {len(all_errors)} pipeline(s):\n")
for pipeline, errors in all_errors.items():
print(f"{pipeline}:")
for error in errors:
print(error)
print()
return 1
print(f"\nAll Cirro configurations valid!")
return 0
if __name__ == "__main__":
sys.exit(main())