diff --git a/tools/security-tracker-stats-dashboard/fetch_bodies.py b/tools/security-tracker-stats-dashboard/fetch_bodies.py index dccfd6b9..6e346afd 100644 --- a/tools/security-tracker-stats-dashboard/fetch_bodies.py +++ b/tools/security-tracker-stats-dashboard/fetch_bodies.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Fetch issue body + closedByPullRequestsReferences for every tracker issue and cache to /tmp/claude/dashboard/issue_extra.json.""" diff --git a/tools/security-tracker-stats-dashboard/fetch_events.py b/tools/security-tracker-stats-dashboard/fetch_events.py index 6488572a..3d4bdc9b 100644 --- a/tools/security-tracker-stats-dashboard/fetch_events.py +++ b/tools/security-tracker-stats-dashboard/fetch_events.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Fetch per-issue label-history events. Resumes from cache.""" import json diff --git a/tools/security-tracker-stats-dashboard/fetch_issues.py b/tools/security-tracker-stats-dashboard/fetch_issues.py index e1587ce7..f71de349 100644 --- a/tools/security-tracker-stats-dashboard/fetch_issues.py +++ b/tools/security-tracker-stats-dashboard/fetch_issues.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Dump all tracker issues (state=all, no PRs) to /issues.json.""" import json diff --git a/tools/security-tracker-stats-dashboard/fetch_prs.py b/tools/security-tracker-stats-dashboard/fetch_prs.py index 8ed7ca9a..4b545397 100644 --- a/tools/security-tracker-stats-dashboard/fetch_prs.py +++ b/tools/security-tracker-stats-dashboard/fetch_prs.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Fetch createdAt + mergedAt + state for every upstream-repo PR referenced by any tracker (via closedByPullRequestsReferences or body parse). Cache to `/prs.json`. diff --git a/tools/security-tracker-stats-dashboard/fetch_roster.py b/tools/security-tracker-stats-dashboard/fetch_roster.py index 2ef0f669..9d3aef25 100644 --- a/tools/security-tracker-stats-dashboard/fetch_roster.py +++ b/tools/security-tracker-stats-dashboard/fetch_roster.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """Dump the security-team roster (tracker repo's collaborators) to /roster.txt.""" import os diff --git a/tools/security-tracker-stats-dashboard/render.py b/tools/security-tracker-stats-dashboard/render.py index 42c3cff4..26c21680 100644 --- a/tools/security-tracker-stats-dashboard/render.py +++ b/tools/security-tracker-stats-dashboard/render.py @@ -1,4 +1,22 @@ #!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + """ Regenerate a tracker-stats dashboard. Reads cached issues+events+PR data from `$TRACKER_STATS_CACHE` (default `/tmp/tracker-stats-cache`) and writes diff --git a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py index 929e28be..3a40743b 100644 --- a/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py +++ b/tools/skill-and-tool-validator/src/skill_and_tool_validator/__init__.py @@ -17,7 +17,7 @@ """Validate framework skill definitions. -This module validates seven aspects of every skill under +This module validates eight aspects of every skill under skills/: 1. YAML frontmatter — every SKILL.md must have a valid frontmatter @@ -44,6 +44,11 @@ 7. Trigger-phrase preservation (SOFT) — quoted phrases inside when_to_use must not be dropped vs the base ref (default origin/main), preventing routing-recall regressions. +8. License-header presence (HARD) — every non-trivial Python source + file under ``tools/`` must carry the SPDX one-liner or the full + Apache Software Foundation license preamble. Skill ``.md`` files + declare their license via the required ``license:`` frontmatter key + (checked by aspect 1), so they need no separate header. SOFT categories surface as advisory warnings (stderr) without failing the run unless ``--strict`` is passed. @@ -242,6 +247,9 @@ def _read_mode_table() -> dict[str, str]: # Every framework skill is installed under a `magpie-` namespace prefix, so its # SKILL.md `name:` must be `magpie-` (see skills/setup/SKILL.md). NAME_CONVENTION_CATEGORY = "name_convention" +# License-header check: every skill .md and non-trivial tool Python file must +# carry the Apache-2.0 SPDX identifier or the full ASF preamble. +LICENSE_HEADER_CATEGORY = "license_header" # The `magpie-` namespace prefix every installed framework skill carries. SKILL_NAME_PREFIX = "magpie-" @@ -263,6 +271,7 @@ def _read_mode_table() -> dict[str, str]: CAPABILITY_SYNC_CATEGORY, INJECTION_GUARD_CATEGORY, NAME_CONVENTION_CATEGORY, + LICENSE_HEADER_CATEGORY, } ) ALL_CATEGORIES = HARD_CATEGORIES | SOFT_CATEGORIES @@ -1584,6 +1593,78 @@ def validate_lowercase_f_field(path: Path, text: str) -> Iterable[Violation]: ) +# --------------------------------------------------------------------------- +# License-header check +# --------------------------------------------------------------------------- + +# Acceptable license markers for Python source files: either the SPDX +# one-liner or the full Apache Software Foundation license preamble URL. +_LICENSE_PY_MARKERS: tuple[str, ...] = ( + "SPDX-License-Identifier: Apache-2.0", + "apache.org/licenses/LICENSE-2.0", +) + +# Files smaller than this threshold (bytes / characters) are treated as +# empty placeholder stubs and exempted from the license-header check. +_MIN_LICENSE_FILE_SIZE = 50 + +# Path components that mark generated or vendored subtrees that must not +# be checked (venv, installed packages, etc.). +_LICENSE_SKIP_PATH_PARTS: frozenset[str] = frozenset( + {".venv", "site-packages", "node_modules", "__pycache__"} +) + + +def collect_tool_python_files(root: Path | None = None) -> list[Path]: + """Return non-trivial Python source files owned by this framework under tools/. + + Excludes generated / vendored subtrees (``.venv``, ``site-packages``, + ``node_modules``, ``__pycache__``) and empty placeholder files whose + content is shorter than ``_MIN_LICENSE_FILE_SIZE`` characters. + """ + base = (root or find_repo_root()) / TOOLS_DIR + if not base.exists(): + return [] + result: list[Path] = [] + for path in base.rglob("*.py"): + if any(part in _LICENSE_SKIP_PATH_PARTS for part in path.parts): + continue + try: + if path.stat().st_size < _MIN_LICENSE_FILE_SIZE: + continue + except OSError: + continue + result.append(path) + return sorted(result) + + +def validate_license_header(path: Path, text: str) -> Iterable[Violation]: + """Check that a tool ``.py`` file carries a license header. + + **Python files** (``tools/**/*.py``, non-trivial): must contain either the + SPDX one-liner (``# SPDX-License-Identifier: Apache-2.0``) or the full + Apache Software Foundation license preamble URL + (``apache.org/licenses/LICENSE-2.0``). + + Skill ``.md`` files are exempt — they declare their license via the + required ``license:`` frontmatter key (validated by the frontmatter + check), so a separate SPDX comment would be redundant. + + A missing header is a HARD failure — caught at validation time rather + than in code review. + """ + if path.suffix.lower() == ".py" and not any(marker in text for marker in _LICENSE_PY_MARKERS): + yield Violation( + path, + 1, + "missing license header — Python source files must carry either " + "'# SPDX-License-Identifier: Apache-2.0' or the Apache Software " + "Foundation license preamble (URL: apache.org/licenses/LICENSE-2.0); " + "see AGENTS.md § Commit and PR conventions", + category=LICENSE_HEADER_CATEGORY, + ) + + def collect_skill_dirs(root: Path | None = None) -> set[Path]: """Return the set of skill directories (immediate children of skills).""" base = (root or find_repo_root()) / SKILLS_DIR @@ -1666,13 +1747,22 @@ def run_validation(root: Path | None = None) -> list[Violation]: violations.extend(validate_privacy_patterns(path, text)) violations.extend(validate_trigger_preservation(path, text, repo_root=repo_root)) - # All skill files get link + placeholder + security-pattern validation + # All skill files get link + placeholder + security-pattern checks violations.extend(validate_links(path, text, skill_dirs, doc_files)) violations.extend(validate_placeholders(path, text)) violations.extend(validate_security_patterns(path, text)) violations.extend(validate_gh_list_limit(path, text)) violations.extend(validate_lowercase_f_field(path, text)) + # License-header check for tool Python source files. + for py_path in collect_tool_python_files(repo_root): + try: + py_text = py_path.read_text(encoding="utf-8") + except OSError as exc: + violations.append(Violation(py_path, None, f"cannot read file: {exc}")) + continue + violations.extend(validate_license_header(py_path, py_text)) + # Tool-level checks: every tools// has a README that declares its capability. violations.extend(validate_tools(repo_root)) diff --git a/tools/skill-and-tool-validator/tests/test_validator.py b/tools/skill-and-tool-validator/tests/test_validator.py index 3a631bcd..1630ae0b 100644 --- a/tools/skill-and-tool-validator/tests/test_validator.py +++ b/tools/skill-and-tool-validator/tests/test_validator.py @@ -37,6 +37,7 @@ INJECTION_GUARD_CATEGORY, INJECTION_GUARD_TODO_CATEGORY, INJECTION_GUARD_TODO_SENTINEL, + LICENSE_HEADER_CATEGORY, LOWERCASE_F_FIELD_CATEGORY, MAX_METADATA_CHARS, PRINCIPLE_CATEGORY, @@ -48,6 +49,7 @@ collect_doc_files, collect_files_to_check, collect_skill_dirs, + collect_tool_python_files, extract_headings, find_repo_root, is_path_allowlisted, @@ -62,6 +64,7 @@ validate_frontmatter, validate_gh_list_limit, validate_injection_guard, + validate_license_header, validate_links, validate_lowercase_f_field, validate_name_convention, @@ -631,7 +634,9 @@ def _make_skill_dir(self, root: Path, skill_name: str = "setup-foo") -> Path: skill_dir = root / "skills" / skill_name skill_dir.mkdir(parents=True) (skill_dir / "SKILL.md").write_text( - f"---\nname: magpie-{skill_name}\ndescription: bar\ncapability: capability:setup\nlicense: Apache-2.0\n---\n# body\n", + f"---\nname: magpie-{skill_name}\ndescription: bar\ncapability: capability:setup\nlicense: Apache-2.0\n---\n" + "\n" + "# body\n", encoding="utf-8", ) docs = root / "docs" @@ -699,6 +704,7 @@ def test_setup_skill_with_multiple_sub_docs_passes_cleanly(self, tmp_path: Path) skill_dir = self._make_skill_dir(tmp_path, skill_name="setup") for name in ("adopt.md", "agents.md", "overrides.md", "upgrade.md", "verify.md"): (skill_dir / name).write_text( + "\n" f"# {name.removesuffix('.md')}\n\nContent for {name}.\n", encoding="utf-8", ) @@ -1437,6 +1443,135 @@ def test_lowercase_f_field_in_soft_categories(self) -> None: assert LOWERCASE_F_FIELD_CATEGORY in SOFT_CATEGORIES +# --------------------------------------------------------------------------- +# License-header check +# --------------------------------------------------------------------------- + +# Full Apache License preamble as used in Python tool files. +_ASF_HEADER = ( + "# Licensed to the Apache Software Foundation (ASF) under one\n" + "# or more contributor license agreements. See the NOTICE file\n" + "# distributed with this work for additional information\n" + "# regarding copyright ownership. The ASF licenses this file\n" + "# to you under the Apache License, Version 2.0 (the\n" + '# "License"); you may not use this file except in compliance\n' + "# with the License. You may obtain a copy of the License at\n" + "#\n" + "# http://www.apache.org/licenses/LICENSE-2.0\n" + "#\n" + "# Unless required by applicable law or agreed to in writing,\n" + "# software distributed under the License is distributed on an\n" + '# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n' + "# KIND, either express or implied. See the License for the\n" + "# specific language governing permissions and limitations\n" + "# under the License.\n" +) +_SPDX_PY_HEADER = "# SPDX-License-Identifier: Apache-2.0\n" + + +class TestValidateLicenseHeader: + # ------------------------------------------------------------------ # + # Python (.py) checks # + # ------------------------------------------------------------------ # + + def test_license_header_violation_is_hard_category(self) -> None: + assert LICENSE_HEADER_CATEGORY in HARD_CATEGORIES + assert LICENSE_HEADER_CATEGORY not in SOFT_CATEGORIES + + def test_md_file_is_exempt(self, tmp_path: Path) -> None: + """Skill .md files declare license via frontmatter, so they need no header.""" + path = tmp_path / "SKILL.md" + text = "---\nname: foo\ndescription: bar\ncapability: capability:setup\nlicense: Apache-2.0\n---\n# Body\n" + violations = list(validate_license_header(path, text)) + assert violations == [] + + def test_py_with_asf_header_passes(self, tmp_path: Path) -> None: + """A Python file with the full ASF license preamble → no violation.""" + path = tmp_path / "tool.py" + text = _ASF_HEADER + '\n"""Module docstring."""\n' + violations = list(validate_license_header(path, text)) + assert violations == [] + + def test_py_with_spdx_one_liner_passes(self, tmp_path: Path) -> None: + """A Python file with only the SPDX one-liner → no violation.""" + path = tmp_path / "tool.py" + text = _SPDX_PY_HEADER + '\n"""Module docstring."""\n' + violations = list(validate_license_header(path, text)) + assert violations == [] + + def test_py_without_any_header_fails(self, tmp_path: Path) -> None: + """A Python file with no license marker → HARD violation.""" + path = tmp_path / "tool.py" + text = '"""Module with no license header."""\n\ndef foo() -> None:\n pass\n' + violations = list(validate_license_header(path, text)) + assert len(violations) == 1 + assert violations[0].category == LICENSE_HEADER_CATEGORY + assert "license header" in violations[0].message + + def test_py_shebang_plus_asf_passes(self, tmp_path: Path) -> None: + """A script with shebang + ASF header → no violation.""" + path = tmp_path / "script.py" + text = "#!/usr/bin/env python3\n" + _ASF_HEADER + '"""Script."""\n' + violations = list(validate_license_header(path, text)) + assert violations == [] + + def test_non_py_non_md_file_ignored(self, tmp_path: Path) -> None: + """Files with other extensions are not checked.""" + path = tmp_path / "config.toml" + text = "[tool]\nno_license = true\n" + violations = list(validate_license_header(path, text)) + assert violations == [] + + # ------------------------------------------------------------------ # + # collect_tool_python_files scoping # + # ------------------------------------------------------------------ # + + def test_collect_tool_python_files_includes_src_files(self, tmp_path: Path) -> None: + """Non-trivial Python files under tools/*/src/ are included.""" + (tmp_path / "tools" / "my-tool" / "src" / "my_tool").mkdir(parents=True) + target = tmp_path / "tools" / "my-tool" / "src" / "my_tool" / "__init__.py" + target.write_text(_ASF_HEADER + '"""Package."""\n') + files = collect_tool_python_files(tmp_path) + assert target in files + + def test_collect_tool_python_files_excludes_venv(self, tmp_path: Path) -> None: + """Files under .venv/ are excluded even if otherwise eligible.""" + venv_py = tmp_path / "tools" / "my-tool" / ".venv" / "lib" / "python3.12" / "site-packages" / "pkg.py" + venv_py.parent.mkdir(parents=True) + venv_py.write_text(_ASF_HEADER + '"""Third-party."""\n') + files = collect_tool_python_files(tmp_path) + assert venv_py not in files + + def test_collect_tool_python_files_excludes_empty_stubs(self, tmp_path: Path) -> None: + """Truly empty __init__.py stubs are excluded (below the size threshold).""" + (tmp_path / "tools" / "my-tool" / "tests").mkdir(parents=True) + stub = tmp_path / "tools" / "my-tool" / "tests" / "__init__.py" + stub.write_text("") # empty + files = collect_tool_python_files(tmp_path) + assert stub not in files + + def test_collect_tool_python_files_returns_empty_when_no_tools_dir(self, tmp_path: Path) -> None: + assert collect_tool_python_files(tmp_path) == [] + + # ------------------------------------------------------------------ # + # Integration: real repo passes # + # ------------------------------------------------------------------ # + + def test_real_repo_tool_python_files_all_have_headers(self) -> None: + """Every non-trivial tool Python file in the real repo carries a license header.""" + from skill_and_tool_validator import _LICENSE_PY_MARKERS + + repo_root = find_repo_root() + missing = [ + p + for p in collect_tool_python_files(repo_root) + if not any(marker in p.read_text(encoding="utf-8") for marker in _LICENSE_PY_MARKERS) + ] + assert missing == [], f"{len(missing)} tool Python file(s) missing any license header:\n" + "\n".join( + f" {p.relative_to(repo_root)}" for p in missing + ) + + # --------------------------------------------------------------------------- # SOFT category exposure # --------------------------------------------------------------------------- @@ -1920,7 +2055,9 @@ def _make_valid_skill(root: Path, name: str) -> Path: skill_dir = root / "skills" / name skill_dir.mkdir(parents=True, exist_ok=True) (skill_dir / "SKILL.md").write_text( - f"---\nname: magpie-{name}\ndescription: A test skill.\ncapability: capability:setup\nlicense: Apache-2.0\n---\n# Body\nSome content.\n" + f"---\nname: magpie-{name}\ndescription: A test skill.\ncapability: capability:setup\nlicense: Apache-2.0\n---\n" + "\n" + "# Body\nSome content.\n" ) # Inject a row into the skill table of the seeded doc. doc = root / "docs" / "labels-and-capabilities.md" @@ -1970,7 +2107,10 @@ def test_skip_categories_suppresses_violations( root = _skill_root(tmp_path) skill_dir = root / "skills" / "bad-skill" skill_dir.mkdir(parents=True) - (skill_dir / "SKILL.md").write_text("# No frontmatter\n") + (skill_dir / "SKILL.md").write_text( + "\n" + "# No frontmatter\n" + ) monkeypatch.chdir(root) # Frontmatter violations use the "general" default category. @@ -1990,6 +2130,7 @@ def test_strict_promotes_soft_violations_to_hard( "description: A test skill.\n" "capability: capability:setup\nlicense: Apache-2.0\n" "---\n" + "\n" "```bash\n" 'gh pr comment 1 --body "attacker content"\n' "```\n"