diff --git a/.github/schemas/signals.schema.json b/.github/schemas/signals.schema.json index 1130cf22..ded4367e 100644 --- a/.github/schemas/signals.schema.json +++ b/.github/schemas/signals.schema.json @@ -1,14 +1,13 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/petry-projects/.github/blob/main/.github/schemas/signals.schema.json", - "$comment": "version: 1.1.0 — must match SCHEMA_VERSION in collect-signals.sh; enforced by bats", + "$comment": "version: 1.0.0 — must match SCHEMA_VERSION in collect-signals.sh; enforced by bats", "title": "Feature Ideation Signals", "description": "Canonical contract between collect-signals.sh and the BMAD Analyst (Mary) prompt. Any change to this schema is a breaking change to the workflow.", "type": "object", "required": [ "schema_version", "scan_date", - "last_successful_run", "repo", "open_issues", "closed_issues_30d", @@ -29,11 +28,6 @@ "type": "string", "format": "date-time" }, - "last_successful_run": { - "description": "ISO-8601 timestamp of the previous successful workflow run; used as a feed checkpoint by the analyst to skip already-reviewed content.", - "type": "string", - "format": "date-time" - }, "repo": { "type": "string", "pattern": "^[^/]+/[^/]+$" diff --git a/.github/scripts/feature-ideation/README.md b/.github/scripts/feature-ideation/README.md index 372125c3..f213172e 100644 --- a/.github/scripts/feature-ideation/README.md +++ b/.github/scripts/feature-ideation/README.md @@ -51,7 +51,7 @@ bats test/workflows/feature-ideation/gh-safe.bats shellcheck -x collect-signals.sh lint-prompt.sh match-discussions.sh \ discussion-mutations.sh lib/*.sh) -# Lint the workflow's direct_prompt block +# Lint workflow prompt blocks (direct_prompt: and prompt:) bash .github/scripts/feature-ideation/lint-prompt.sh ``` @@ -76,7 +76,7 @@ signals.json is a breaking change and must: 1. Bump `SCHEMA_VERSION` in `collect-signals.sh`. 2. Update fixtures under `test/workflows/feature-ideation/fixtures/expected/`. -3. Update Mary's prompt in `feature-ideation.yml` if any field references move. +3. Update Mary's prompt in `.github/workflows/feature-ideation-reusable.yml` if any field references move. CI validates every fixture against the schema, and the workflow validates the runtime output before handing it to Mary. diff --git a/.github/scripts/feature-ideation/collect-signals.sh b/.github/scripts/feature-ideation/collect-signals.sh index 0a11f77f..b2ae23cc 100755 --- a/.github/scripts/feature-ideation/collect-signals.sh +++ b/.github/scripts/feature-ideation/collect-signals.sh @@ -31,7 +31,7 @@ set -euo pipefail # if the constants drift, AND the bats `signals-schema: SCHEMA_VERSION # constant matches schema file` test enforces this in CI. # Caught by CodeRabbit review on PR petry-projects/.github#85. -SCHEMA_VERSION="1.1.0" +SCHEMA_VERSION="1.0.0" SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=lib/gh-safe.sh @@ -58,56 +58,39 @@ main() { local discussion_limit="${DISCUSSION_LIMIT:-100}" local output_path="${SIGNALS_OUTPUT:-./signals.json}" - local owner repo_name - owner="${REPO%%/*}" - repo_name="${REPO##*/}" - if [ "$owner" = "$REPO" ] || [ -z "$repo_name" ]; then + # Validate that limit overrides are positive integers before forwarding to + # GraphQL — a value like `ISSUE_LIMIT=foo` would cause an opaque downstream + # failure instead of a clean usage error. Caught by CodeRabbit review on + # PR petry-projects/.github#85. + local _lim_name _lim_val + for _lim_name in ISSUE_LIMIT PR_LIMIT DISCUSSION_LIMIT; do + case "$_lim_name" in + ISSUE_LIMIT) _lim_val="$issue_limit" ;; + PR_LIMIT) _lim_val="$pr_limit" ;; + DISCUSSION_LIMIT) _lim_val="$discussion_limit" ;; + esac + if [[ ! $_lim_val =~ ^[1-9][0-9]*$ ]]; then + printf '[collect-signals] %s must be a positive integer, got: %s\n' "$_lim_name" "$_lim_val" >&2 + return 64 + fi + done + + # Strict owner/name format — reject leading/trailing slashes, empty segments, + # and extra path parts (e.g. "org//repo", "/repo", "org/repo/extra"). + # Caught by CodeRabbit review on PR petry-projects/.github#85. + if [[ ! $REPO =~ ^[^/]+/[^/]+$ ]]; then printf '[collect-signals] REPO must be in owner/name format, got: %s\n' "$REPO" >&2 return 64 fi + local owner repo_name + owner="${REPO%%/*}" + repo_name="${REPO##*/}" local thirty_days_ago thirty_days_ago=$(date_days_ago 30) local scan_date scan_date=$(date_now_iso) - # --- Feed checkpoint: last successful run ---------------------------------- - # Used by the analyst to skip feed entries already reviewed. The current run - # is still in-progress, so --status=success --limit=1 reliably returns the - # previous successful run. Falls back to 30 days ago on first-ever run or - # after a long gap so the initial scan is bounded. - # WORKFLOW_FILE — caller-supplied env var for repos that name their stub - # something other than the conventional "feature-ideation.yml". Defaults to - # the conventional name; no change needed for repos that follow the standard. - printf '[collect-signals] resolving feed checkpoint (last successful run)\n' >&2 - local last_successful_run _run_stderr _run_err - _run_stderr=$(mktemp) - last_successful_run=$(gh run list \ - --repo "$REPO" \ - --workflow="${WORKFLOW_FILE:-feature-ideation.yml}" \ - --status=success \ - --limit=1 \ - --json createdAt \ - --jq '.[0].createdAt // empty' \ - 2>"$_run_stderr" || true) - _run_err=$(cat "$_run_stderr") - rm -f "$_run_stderr" - # Validate that the result looks like an ISO-8601 datetime. The real `gh` - # CLI applies the --jq filter and emits a bare timestamp; in test environments - # the gh stub returns raw fixture JSON (without applying --jq), so we guard - # against that here rather than requiring every test to stub this extra call. - if [ -z "$last_successful_run" ] || [ "$last_successful_run" = "null" ] || \ - ! printf '%s' "$last_successful_run" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2}T'; then - if [ -n "$_run_err" ]; then - printf '[collect-signals] gh run list warning: %s\n' "$_run_err" >&2 - fi - last_successful_run="$(date_days_ago 30)T00:00:00Z" - printf '[collect-signals] no prior successful run found; using 30-day fallback: %s\n' \ - "$last_successful_run" >&2 - else - printf '[collect-signals] feed checkpoint: %s\n' "$last_successful_run" >&2 - fi - local truncation_warnings='[]' # --- Open issues ----------------------------------------------------------- @@ -238,7 +221,6 @@ GRAPHQL "$bug_reports" \ "$REPO" \ "$scan_date" \ - "$last_successful_run" \ "$SCHEMA_VERSION" \ "$truncation_warnings") @@ -255,7 +237,6 @@ GRAPHQL printf -- '- **Bug reports:** %s\n' "$(jq '.bug_reports.count' "$output_path")" printf -- '- **Merged PRs (30d):** %s\n' "$(jq '.merged_prs_30d.count' "$output_path")" printf -- '- **Existing Ideas discussions:** %s\n' "$(jq '.ideas_discussions.count' "$output_path")" - printf -- '- **Feed checkpoint (last successful run):** %s\n' "$(jq -r '.last_successful_run' "$output_path")" local warn_count warn_count=$(jq '.truncation_warnings | length' "$output_path") if [ "$warn_count" -gt 0 ]; then diff --git a/.github/scripts/feature-ideation/lib/compose-signals.sh b/.github/scripts/feature-ideation/lib/compose-signals.sh index 1afd8e7b..1c699349 100755 --- a/.github/scripts/feature-ideation/lib/compose-signals.sh +++ b/.github/scripts/feature-ideation/lib/compose-signals.sh @@ -18,17 +18,16 @@ # $7 bug_reports # $8 repo (string, e.g. "petry-projects/talkterm") # $9 scan_date (ISO-8601 string) -# $10 last_successful_run (ISO-8601 string; feed checkpoint) -# $11 schema_version (string) -# $12 truncation_warnings (JSON array, may be []) +# $10 schema_version (string) +# $11 truncation_warnings (JSON array, may be []) # # Output: signals.json document on stdout. set -euo pipefail compose_signals() { - if [ "$#" -ne 12 ]; then - printf '[compose-signals] expected 12 args, got %d\n' "$#" >&2 + if [ "$#" -ne 11 ]; then + printf '[compose-signals] expected 11 args, got %d\n' "$#" >&2 return 64 # EX_USAGE fi @@ -41,9 +40,8 @@ compose_signals() { local bug_reports="$7" local repo="$8" local scan_date="$9" - local last_successful_run="${10}" - local schema_version="${11}" - local truncation_warnings="${12}" + local schema_version="${10}" + local truncation_warnings="${11}" # Validate every JSON input before composition. Better to fail loudly here # than to let `jq --argjson` produce a cryptic parse error. @@ -51,15 +49,17 @@ compose_signals() { for input in "$open_issues" "$closed_issues" "$ideas_discussions" "$releases" \ "$merged_prs" "$feature_requests" "$bug_reports" "$truncation_warnings"; do idx=$((idx + 1)) - if ! printf '%s' "$input" | jq -e . >/dev/null 2>&1; then - printf '[compose-signals] arg #%d is not valid JSON: %s\n' "$idx" "${input:0:120}" >&2 + # Require a JSON array, not just valid JSON. Objects/strings/nulls accepted + # by `jq -e .` would silently produce wrong counts (key count, char count). + # Caught by CodeRabbit review on PR petry-projects/.github#85. + if ! printf '%s' "$input" | jq -e 'type == "array"' >/dev/null 2>&1; then + printf '[compose-signals] arg #%d must be a JSON array: %s\n' "$idx" "${input:0:120}" >&2 return 65 # EX_DATAERR fi done jq -n \ --arg scan_date "$scan_date" \ - --arg last_successful_run "$last_successful_run" \ --arg repo "$repo" \ --arg schema_version "$schema_version" \ --argjson open_issues "$open_issues" \ @@ -73,7 +73,6 @@ compose_signals() { '{ schema_version: $schema_version, scan_date: $scan_date, - last_successful_run: $last_successful_run, repo: $repo, open_issues: { count: ($open_issues | length), items: $open_issues }, closed_issues_30d: { count: ($closed_issues | length), items: $closed_issues }, diff --git a/.github/scripts/feature-ideation/lib/date-utils.sh b/.github/scripts/feature-ideation/lib/date-utils.sh index 8c4b5a12..ae2722bd 100755 --- a/.github/scripts/feature-ideation/lib/date-utils.sh +++ b/.github/scripts/feature-ideation/lib/date-utils.sh @@ -10,6 +10,13 @@ set -euo pipefail # Print an ISO date (YYYY-MM-DD) for N days ago in UTC. date_days_ago() { + # Guard arg count before reading $1: under set -u a zero-arg call would abort + # the shell with "unbound variable" instead of reaching the validation path. + # Caught by CodeRabbit review on PR petry-projects/.github#85. + if [ "$#" -ne 1 ]; then + printf '[date-utils] expected 1 arg (days), got: %d\n' "$#" >&2 + return 64 + fi local days="$1" if [ -z "$days" ] || ! printf '%s' "$days" | grep -Eq '^[0-9]+$'; then printf '[date-utils] days must be a non-negative integer, got: %s\n' "$days" >&2 diff --git a/.github/scripts/feature-ideation/lib/filter-bots.sh b/.github/scripts/feature-ideation/lib/filter-bots.sh index f2fbfdec..f994e3e2 100755 --- a/.github/scripts/feature-ideation/lib/filter-bots.sh +++ b/.github/scripts/feature-ideation/lib/filter-bots.sh @@ -35,9 +35,11 @@ DEFAULT_BOT_AUTHORS=( filter_bots_build_list() { local list=("${DEFAULT_BOT_AUTHORS[@]}") if [ -n "${FEATURE_IDEATION_BOT_AUTHORS:-}" ]; then - local IFS=',' - # shellcheck disable=SC2206 - local extras=($FEATURE_IDEATION_BOT_AUTHORS) + # Use `IFS=',' read` (not unquoted expansion) to avoid pathname-globbing + # against the filesystem if any entry contains wildcard characters. + # Caught by CodeRabbit review on PR petry-projects/.github#85. + local extras=() + IFS=',' read -r -a extras <<<"${FEATURE_IDEATION_BOT_AUTHORS}" # Trim leading/trailing whitespace from each comma-separated entry so # `"bot1, bot2"` resolves to `bot1` and `bot2`, not `bot1` and ` bot2`. # Caught by CodeRabbit review on PR petry-projects/.github#85. diff --git a/.github/scripts/feature-ideation/lib/gh-safe.sh b/.github/scripts/feature-ideation/lib/gh-safe.sh index 343a5bbc..8060c5c4 100755 --- a/.github/scripts/feature-ideation/lib/gh-safe.sh +++ b/.github/scripts/feature-ideation/lib/gh-safe.sh @@ -93,6 +93,13 @@ gh_safe_graphql() { local i=0 while [ "$i" -lt "${#args[@]}" ]; do if [ "${args[$i]}" = "--jq" ]; then + # Guard bounds before dereferencing args[i+1]: under set -u an out-of- + # bounds access aborts the shell. Caught by CodeRabbit review on PR + # petry-projects/.github#85. + if [ $((i + 1)) -ge "${#args[@]}" ]; then + _gh_safe_err "graphql-bad-args" "--jq requires a jq filter argument" + return 64 + fi has_jq=1 jq_filter="${args[$((i + 1))]}" break @@ -132,13 +139,11 @@ gh_safe_graphql() { fi # Reject error envelopes — GraphQL returns 200 OK even on partial errors. - if printf '%s' "$raw" | jq -e '.errors // empty | if . then true else false end' >/dev/null 2>&1; then - if printf '%s' "$raw" | jq -e '(.errors | type) == "array" and (.errors | length > 0)' >/dev/null 2>&1; then - local errs - errs=$(printf '%s' "$raw" | jq -c '.errors') - _gh_safe_err "graphql-errors" "$errs" - return 66 # EX_NOINPUT — repurposed: "GraphQL refused our request" - fi + if printf '%s' "$raw" | jq -e '(.errors // empty | type) == "array" and (.errors | length > 0)' >/dev/null 2>&1; then + local errs + errs=$(printf '%s' "$raw" | jq -c '.errors') + _gh_safe_err "graphql-errors" "$errs" + return 66 # EX_NOINPUT — repurposed: "GraphQL refused our request" fi # Reject `data: null` — usually means the path didn't resolve (permissions, @@ -187,6 +192,13 @@ gh_safe_graphql() { # Same defensive contract as `gh_safe_graphql`: any auth/network/schema # failure exits non-zero with a structured stderr message. gh_safe_graphql_input() { + # Guard arg count before reading $1: under set -u a zero-arg call aborts the + # shell instead of reaching the JSON validation. Caught by CodeRabbit review + # on PR petry-projects/.github#85. + if [ "$#" -ne 1 ]; then + _gh_safe_err "graphql-bad-input" "expected 1 arg: JSON request body, got $#" + return 64 + fi local body="$1" if ! gh_safe_is_json "$body"; then _gh_safe_err "graphql-bad-input" "request body is not valid JSON" diff --git a/.github/scripts/feature-ideation/lint-prompt.sh b/.github/scripts/feature-ideation/lint-prompt.sh index ad8226de..a1d18b63 100755 --- a/.github/scripts/feature-ideation/lint-prompt.sh +++ b/.github/scripts/feature-ideation/lint-prompt.sh @@ -31,6 +31,34 @@ scan_file() { import re import sys + +def _strip_github_expressions(s: str) -> str: + """Remove ${{ ... }} GitHub Actions expressions from s. + + Uses a stateful scanner instead of `[^}]*` regex so that expressions + containing `}` inside string literals (e.g. format() calls) are fully + consumed rather than prematurely terminated. This prevents false-positive + shell-expansion matches on content that is actually inside a GH expression. + Caught by CodeRabbit review on PR petry-projects/.github#85. + """ + result: list[str] = [] + i = 0 + while i < len(s): + if s[i : i + 3] == "${{": + # Consume until we find the matching "}}" + j = i + 3 + while j < len(s): + if s[j : j + 2] == "}}": + j += 2 + break + j += 1 + i = j # skip the whole ${{ ... }} expression + else: + result.append(s[i]) + i += 1 + return "".join(result) + + path = sys.argv[1] try: with open(path, "r", encoding="utf-8") as f: @@ -55,8 +83,10 @@ findings = [] shell_expansion = re.compile(r'(?` block scalar indicators. -prompt_marker = re.compile(r'(?:direct_prompt|prompt):\s*[|>]?\s*$') +# optional `|` or `>` block scalar indicators plus YAML chomping modifiers +# (`-` or `+`) so `prompt: |-`, `prompt: |+`, `prompt: >-`, `prompt: >+` +# are all recognised. Caught by CodeRabbit review on PR petry-projects/.github#85. +prompt_marker = re.compile(r'(?:direct_prompt|prompt):\s*[|>]?[-+]?\s*$') for lineno, raw in enumerate(lines, start=1): stripped = raw.lstrip(" ") @@ -78,8 +108,13 @@ for lineno, raw in enumerate(lines, start=1): continue # We're inside the prompt body. Scan for shell expansions. - # First, strip out any GitHub Actions expressions so they don't trip us. - no_gh = re.sub(r'\$\{\{[^}]*\}\}', '', raw) + # First, strip out GitHub Actions ${{ ... }} expressions. + # The naive `[^}]*` regex stops at the first `}`, so expressions that + # contain `}` internally (e.g. format() calls or string literals) are + # not fully removed and leave false-positive shell expansion matches. + # Use a small stateful scanner instead. + # Caught by CodeRabbit review on PR petry-projects/.github#85. + no_gh = _strip_github_expressions(raw) for match in shell_expansion.finditer(no_gh): findings.append((lineno, match.group(0), raw.rstrip())) @@ -107,15 +142,27 @@ main() { fi local exit=0 + local file_rc=0 for file in "$@"; do if [ ! -f "$file" ]; then printf '[lint-prompt] not found: %s\n' "$file" >&2 exit=2 continue fi - if ! scan_file "$file"; then - exit=1 + # Capture the actual exit code so we preserve exit-2 (file error) over + # exit-1 (lint finding). A later lint failure must not overwrite an earlier + # file error. Caught by CodeRabbit review on PR petry-projects/.github#85. + if scan_file "$file"; then + file_rc=0 + else + file_rc=$? fi + case "$file_rc" in + 0) ;; + 1) if [ "$exit" -eq 0 ]; then exit=1; fi ;; + 2) exit=2 ;; + *) return "$file_rc" ;; + esac done return "$exit" } diff --git a/.github/scripts/feature-ideation/match-discussions.sh b/.github/scripts/feature-ideation/match-discussions.sh index d8bf7668..c4af5466 100755 --- a/.github/scripts/feature-ideation/match-discussions.sh +++ b/.github/scripts/feature-ideation/match-discussions.sh @@ -97,49 +97,91 @@ def jaccard(a: set[str], b: set[str]) -> float: return len(a & b) / len(a | b) -with open(signals_path) as f: - signals = json.load(f) -with open(proposals_path) as f: - proposals = json.load(f) +def _load_json(path: str, label: str): + """Load JSON from path, exiting with code 2 on any read or parse error.""" + try: + with open(path, encoding="utf-8") as f: + return json.load(f) + except OSError as exc: + sys.stderr.write(f"[match-discussions] cannot read {label}: {exc}\n") + sys.exit(2) + except json.JSONDecodeError as exc: + sys.stderr.write(f"[match-discussions] invalid JSON in {label}: {exc}\n") + sys.exit(2) + + +signals = _load_json(signals_path, "signals") +proposals = _load_json(proposals_path, "proposals") if not isinstance(proposals, list): sys.stderr.write("[match-discussions] proposals must be a JSON array\n") sys.exit(65) discussions = signals.get("ideas_discussions", {}).get("items", []) or [] -disc_norm = [(d, normalize(d.get("title", ""))) for d in discussions] - -matched = [] -new_candidates = [] -seen_disc_ids = set() - -for proposal in proposals: +# Skip discussions without an id to avoid all id-less entries collapsing into +# a single `None` key in seen_disc_ids. Caught by CodeRabbit on PR #85. +disc_norm = [ + (d, normalize(d.get("title", ""))) + for d in discussions + if d.get("id") is not None +] + +# --- Optimal (similarity-sorted) matching ------------------------------------ +# The original greedy per-proposal loop consumed discussions in proposal order, +# so an early lower-value match could block a later higher-value match. +# Instead we enumerate all (proposal, discussion) pairs, sort by similarity +# descending (ties broken by original proposal index for stability), then +# assign greedily. This guarantees globally higher-value matches are honoured +# first. Caught by CodeRabbit review on PR petry-projects/.github#85. + +# Collect valid proposals with their original index (for tie-breaking + new_candidates). +proposals_indexed: list[tuple[int, dict]] = [] +for p_idx, proposal in enumerate(proposals): if not isinstance(proposal, dict) or "title" not in proposal: sys.stderr.write(f"[match-discussions] skipping malformed proposal: {proposal!r}\n") continue - p_norm = normalize(proposal["title"]) + proposals_indexed.append((p_idx, proposal)) - best = None - best_sim = 0.0 +# Build all (similarity, proposal_idx, disc_id, proposal, disc) tuples. +all_pairs: list[tuple[float, int, str, dict, dict]] = [] +for p_idx, proposal in proposals_indexed: + p_norm = normalize(proposal["title"]) for disc, d_norm in disc_norm: - if disc.get("id") in seen_disc_ids: - continue sim = jaccard(p_norm, d_norm) - if sim > best_sim: - best_sim = sim - best = disc + all_pairs.append((sim, p_idx, disc["id"], proposal, disc)) - if best is not None and best_sim >= threshold: +# Sort descending by similarity; stable tie-break by proposal index ascending. +all_pairs.sort(key=lambda x: (-x[0], x[1])) + +matched = [] +seen_disc_ids: set[str] = set() +seen_proposal_idxs: set[int] = set() + +for sim, p_idx, disc_id, proposal, disc in all_pairs: + if p_idx in seen_proposal_idxs or disc_id in seen_disc_ids: + continue + if sim >= threshold: matched.append( { "proposal": proposal, - "discussion": best, - "similarity": round(best_sim, 4), + "discussion": disc, + "similarity": round(sim, 4), } ) - seen_disc_ids.add(best.get("id")) - else: - new_candidates.append({"proposal": proposal, "best_similarity": round(best_sim, 4)}) + seen_disc_ids.add(disc_id) + seen_proposal_idxs.add(p_idx) + +# Unmatched proposals become new candidates. +new_candidates = [] +for p_idx, proposal in proposals_indexed: + if p_idx in seen_proposal_idxs: + continue + p_norm = normalize(proposal["title"]) + best_sim = max( + (jaccard(p_norm, d_norm) for _, d_norm in disc_norm), + default=0.0, + ) + new_candidates.append({"proposal": proposal, "best_similarity": round(best_sim, 4)}) result = { "matched": matched, diff --git a/.github/scripts/feature-ideation/validate-signals.py b/.github/scripts/feature-ideation/validate-signals.py index eb82ff7c..e163cf62 100755 --- a/.github/scripts/feature-ideation/validate-signals.py +++ b/.github/scripts/feature-ideation/validate-signals.py @@ -53,7 +53,12 @@ def main(argv: list[str]) -> int: return 2 try: - signals = json.loads(signals_path.read_text()) + signals = json.loads(signals_path.read_text(encoding="utf-8")) + except OSError as exc: + # File read errors (permissions, I/O) must also exit 2. Caught by + # CodeRabbit review on PR petry-projects/.github#85. + sys.stderr.write(f"[validate-signals] cannot read {signals_path}: {exc}\n") + return 2 except json.JSONDecodeError as exc: # Per the docstring contract, exit 2 means usage / file error and # exit 1 means schema validation error. A malformed signals file @@ -63,7 +68,10 @@ def main(argv: list[str]) -> int: return 2 try: - schema = json.loads(schema_path.read_text()) + schema = json.loads(schema_path.read_text(encoding="utf-8")) + except OSError as exc: + sys.stderr.write(f"[validate-signals] cannot read schema {schema_path}: {exc}\n") + return 2 except json.JSONDecodeError as exc: sys.stderr.write(f"[validate-signals] invalid schema JSON: {exc}\n") return 2 @@ -79,7 +87,7 @@ def main(argv: list[str]) -> int: format_checker = FormatChecker() @format_checker.checks("date-time", raises=(ValueError, TypeError)) - def _check_date_time(instance): # noqa: ANN001 — jsonschema callback signature + def _check_date_time(instance) -> bool: # noqa: ANN001 — jsonschema callback signature if not isinstance(instance, str): return True # non-strings handled by `type` keyword, not format # Must look like a date-time, not just any string. Require at least diff --git a/.github/workflows/feature-ideation-tests.yml b/.github/workflows/feature-ideation-tests.yml index 589c9349..91c1a628 100644 --- a/.github/workflows/feature-ideation-tests.yml +++ b/.github/workflows/feature-ideation-tests.yml @@ -103,7 +103,7 @@ jobs: - name: Upload bats output on failure if: failure() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: bats-output path: | diff --git a/test/workflows/feature-ideation/collect-signals.bats b/test/workflows/feature-ideation/collect-signals.bats index cf306ff2..94f9a12b 100644 --- a/test/workflows/feature-ideation/collect-signals.bats +++ b/test/workflows/feature-ideation/collect-signals.bats @@ -21,17 +21,15 @@ teardown() { # Build a multi-call gh script for the standard happy path. # Order MUST match collect-signals.sh: -# 1. gh run list (feed checkpoint — last successful run) -# 2. gh issue list --state open -# 3. gh issue list --state closed -# 4. gh api graphql (categories) -# 5. gh api graphql (discussions) -# 6. gh release list -# 7. gh pr list --state merged +# 1. gh issue list --state open +# 2. gh issue list --state closed +# 3. gh api graphql (categories) +# 4. gh api graphql (discussions) +# 5. gh release list +# 6. gh pr list --state merged build_happy_script() { local script="${TT_TMP}/gh-script.tsv" : >"$script" - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-open.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-closed.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/graphql-categories.json" >>"$script" @@ -117,9 +115,7 @@ build_happy_script() { script="${TT_TMP}/gh-script.tsv" err_file="${TT_TMP}/auth-err.txt" printf 'HTTP 401: Bad credentials\n' >"$err_file" - # run list (feed checkpoint — silenced with || true, so failure falls back) - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >"$script" - printf '4\t-\t%s\n' "$err_file" >>"$script" + printf '4\t-\t%s\n' "$err_file" >"$script" export GH_STUB_SCRIPT="$script" rm -f "${TT_TMP}/.gh-stub-counter" @@ -131,7 +127,6 @@ build_happy_script() { @test "collect-signals: FAILS LOUD on GraphQL errors envelope (categories)" { script="${TT_TMP}/gh-script.tsv" : >"$script" - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-open.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-closed.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/graphql-errors-envelope.json" >>"$script" @@ -188,7 +183,6 @@ JSON script="${TT_TMP}/gh-script.tsv" : >"$script" - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >>"$script" # feed checkpoint printf '0\t%s\t-\n' "$bot_file" >>"$script" # open issues — all bots printf '0\t%s\t-\n' "$empty_file" >>"$script" # closed issues printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/graphql-no-ideas-category.json" >>"$script" @@ -208,7 +202,6 @@ JSON @test "collect-signals: emits truncation warning when discussions hasNextPage=true" { script="${TT_TMP}/gh-script.tsv" : >"$script" - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-open.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-closed.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/graphql-categories.json" >>"$script" @@ -232,7 +225,6 @@ JSON @test "collect-signals: skips discussions when Ideas category absent" { script="${TT_TMP}/gh-script.tsv" : >"$script" - printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/run-list-last-success.txt" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-open.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/issue-list-closed.json" >>"$script" printf '0\t%s\t-\n' "${TT_FIXTURES_DIR}/gh-responses/graphql-no-ideas-category.json" >>"$script" diff --git a/test/workflows/feature-ideation/compose-signals.bats b/test/workflows/feature-ideation/compose-signals.bats index 35359e90..4dced2c8 100644 --- a/test/workflows/feature-ideation/compose-signals.bats +++ b/test/workflows/feature-ideation/compose-signals.bats @@ -18,7 +18,6 @@ compose_empty() { '[]' '[]' '[]' '[]' '[]' '[]' '[]' \ 'foo/bar' \ '2026-04-07T00:00:00Z' \ - '2026-03-07T00:00:00Z' \ '1.0.0' \ '[]' } @@ -35,14 +34,14 @@ compose_empty() { @test "compose: rejects empty string for any JSON arg" { run compose_signals \ '' '[]' '[]' '[]' '[]' '[]' '[]' \ - 'foo/bar' '2026-04-07T00:00:00Z' '2026-03-07T00:00:00Z' '1.0.0' '[]' + 'foo/bar' '2026-04-07T00:00:00Z' '1.0.0' '[]' [ "$status" -ne 0 ] } @test "compose: rejects non-JSON for any JSON arg" { run compose_signals \ 'not json' '[]' '[]' '[]' '[]' '[]' '[]' \ - 'foo/bar' '2026-04-07T00:00:00Z' '2026-03-07T00:00:00Z' '1.0.0' '[]' + 'foo/bar' '2026-04-07T00:00:00Z' '1.0.0' '[]' [ "$status" -ne 0 ] } @@ -53,9 +52,9 @@ compose_empty() { @test "compose: produces all required top-level fields with empty inputs" { run compose_empty [ "$status" -eq 0 ] - for field in schema_version scan_date last_successful_run repo open_issues \ - closed_issues_30d ideas_discussions releases merged_prs_30d \ - feature_requests bug_reports truncation_warnings; do + for field in schema_version scan_date repo open_issues closed_issues_30d \ + ideas_discussions releases merged_prs_30d feature_requests \ + bug_reports truncation_warnings; do printf '%s' "$output" | jq -e "has(\"$field\")" >/dev/null done } @@ -64,7 +63,7 @@ compose_empty() { open='[{"number":1,"title":"a","labels":[]},{"number":2,"title":"b","labels":[]}]' run compose_signals \ "$open" '[]' '[]' '[]' '[]' '[]' '[]' \ - 'foo/bar' '2026-04-07T00:00:00Z' '2026-03-07T00:00:00Z' '1.0.0' '[]' + 'foo/bar' '2026-04-07T00:00:00Z' '1.0.0' '[]' [ "$status" -eq 0 ] count=$(printf '%s' "$output" | jq '.open_issues.count') items_len=$(printf '%s' "$output" | jq '.open_issues.items | length') @@ -75,7 +74,7 @@ compose_empty() { @test "compose: schema_version is preserved verbatim" { run compose_signals \ '[]' '[]' '[]' '[]' '[]' '[]' '[]' \ - 'foo/bar' '2026-04-07T00:00:00Z' '2026-03-07T00:00:00Z' '2.5.1' '[]' + 'foo/bar' '2026-04-07T00:00:00Z' '2.5.1' '[]' [ "$status" -eq 0 ] v=$(printf '%s' "$output" | jq -r '.schema_version') [ "$v" = "2.5.1" ] @@ -85,7 +84,7 @@ compose_empty() { warnings='[{"source":"open_issues","limit":50,"message":"truncated"}]' run compose_signals \ '[]' '[]' '[]' '[]' '[]' '[]' '[]' \ - 'foo/bar' '2026-04-07T00:00:00Z' '2026-03-07T00:00:00Z' '1.0.0' "$warnings" + 'foo/bar' '2026-04-07T00:00:00Z' '1.0.0' "$warnings" [ "$status" -eq 0 ] src=$(printf '%s' "$output" | jq -r '.truncation_warnings[0].source') [ "$src" = "open_issues" ] @@ -94,7 +93,7 @@ compose_empty() { @test "compose: scan_date and repo round-trip exactly" { run compose_signals \ '[]' '[]' '[]' '[]' '[]' '[]' '[]' \ - 'octocat/hello-world' '2030-01-15T12:34:56Z' '2029-12-15T12:34:56Z' '1.0.0' '[]' + 'octocat/hello-world' '2030-01-15T12:34:56Z' '1.0.0' '[]' [ "$status" -eq 0 ] d=$(printf '%s' "$output" | jq -r '.scan_date') r=$(printf '%s' "$output" | jq -r '.repo') diff --git a/test/workflows/feature-ideation/date-utils.bats b/test/workflows/feature-ideation/date-utils.bats index dfb132b2..a26749ff 100644 --- a/test/workflows/feature-ideation/date-utils.bats +++ b/test/workflows/feature-ideation/date-utils.bats @@ -15,7 +15,10 @@ setup() { } @test "date_days_ago: 0 returns today" { - today=$(date -u +%Y-%m-%d) + # Use the helper function rather than the raw system `date` call so the test + # validates behaviour consistently on both GNU and BSD systems. + # Caught by CodeRabbit review on PR petry-projects/.github#85. + today=$(date_today) run date_days_ago 0 [ "$status" -eq 0 ] [ "$output" = "$today" ] diff --git a/test/workflows/feature-ideation/fixtures/expected/empty-repo.signals.json b/test/workflows/feature-ideation/fixtures/expected/empty-repo.signals.json index e6438b55..4101afb6 100644 --- a/test/workflows/feature-ideation/fixtures/expected/empty-repo.signals.json +++ b/test/workflows/feature-ideation/fixtures/expected/empty-repo.signals.json @@ -1,7 +1,6 @@ { - "schema_version": "1.1.0", + "schema_version": "1.0.0", "scan_date": "2026-04-07T07:00:00Z", - "last_successful_run": "2026-03-31T07:00:00Z", "repo": "petry-projects/talkterm", "open_issues": { "count": 0, "items": [] }, "closed_issues_30d": { "count": 0, "items": [] }, diff --git a/test/workflows/feature-ideation/fixtures/expected/populated.signals.json b/test/workflows/feature-ideation/fixtures/expected/populated.signals.json index 5618e36d..4c929219 100644 --- a/test/workflows/feature-ideation/fixtures/expected/populated.signals.json +++ b/test/workflows/feature-ideation/fixtures/expected/populated.signals.json @@ -1,7 +1,6 @@ { - "schema_version": "1.1.0", + "schema_version": "1.0.0", "scan_date": "2026-04-07T07:00:00Z", - "last_successful_run": "2026-03-31T07:00:00Z", "repo": "petry-projects/talkterm", "open_issues": { "count": 2, diff --git a/test/workflows/feature-ideation/fixtures/expected/truncated.signals.json b/test/workflows/feature-ideation/fixtures/expected/truncated.signals.json index 2c884ea6..845db66f 100644 --- a/test/workflows/feature-ideation/fixtures/expected/truncated.signals.json +++ b/test/workflows/feature-ideation/fixtures/expected/truncated.signals.json @@ -1,7 +1,6 @@ { - "schema_version": "1.1.0", + "schema_version": "1.0.0", "scan_date": "2026-04-07T07:00:00Z", - "last_successful_run": "2026-03-31T07:00:00Z", "repo": "petry-projects/talkterm", "open_issues": { "count": 0, "items": [] }, "closed_issues_30d": { "count": 0, "items": [] }, diff --git a/test/workflows/feature-ideation/gh-safe.bats b/test/workflows/feature-ideation/gh-safe.bats index b06bb5c7..046446d7 100644 --- a/test/workflows/feature-ideation/gh-safe.bats +++ b/test/workflows/feature-ideation/gh-safe.bats @@ -222,3 +222,14 @@ teardown() { run gh_safe_graphql_input '{"query":"q","variables":{}}' [ "$status" -ne 0 ] } + +# --------------------------------------------------------------------------- +# gh stub — self-tests for stub defensive behaviour +# --------------------------------------------------------------------------- + +@test "gh stub: fails fast when GH_STUB_SCRIPT is set but file is missing" { + GH_STUB_SCRIPT="/nonexistent/path/script.sh" \ + run --separate-stderr "${TT_TMP}/bin/gh" issue list + [ "$status" -ne 0 ] + [[ "$stderr" == *"not found"* ]] +} diff --git a/test/workflows/feature-ideation/lint-prompt.bats b/test/workflows/feature-ideation/lint-prompt.bats index a0cd32ea..1ba14d1a 100644 --- a/test/workflows/feature-ideation/lint-prompt.bats +++ b/test/workflows/feature-ideation/lint-prompt.bats @@ -158,3 +158,57 @@ YML run bash "$LINTER" "${TT_TMP}/v1-clean.yml" [ "$status" -eq 0 ] } + +# --------------------------------------------------------------------------- +# Exit-code precedence: file errors (2) must not be downgraded by later lint +# findings (1). Caught by CodeRabbit review on PR petry-projects/.github#85. +# --------------------------------------------------------------------------- + +@test "lint-prompt: missing file before lint-failing file still exits 2" { + write_yml "${TT_TMP}/bad.yml" <<'YML' +jobs: + analyze: + steps: + - uses: anthropics/claude-code-action@v1 + with: + prompt: | + Date: $(date -u +%Y-%m-%d) +YML + # Pass a non-existent file FIRST, then a file with a lint finding. + # The aggregate exit code must be 2 (file error) not 1 (lint finding). + run bash "$LINTER" "${TT_TMP}/missing.yml" "${TT_TMP}/bad.yml" + [ "$status" -eq 2 ] +} + +@test "lint-prompt: YAML chomping modifiers are recognised (prompt: |-)" { + # YAML block-scalar headers like `|-`, `|+`, `>-`, `>+` must be detected + # as prompt markers so their bodies are linted. Caught by CodeRabbit on PR #85. + write_yml "${TT_TMP}/chomping.yml" <<'YML' +jobs: + analyze: + steps: + - uses: anthropics/claude-code-action@v1 + with: + prompt: |- + Date: $(date -u +%Y-%m-%d) +YML + run bash "$LINTER" "${TT_TMP}/chomping.yml" + [ "$status" -eq 1 ] +} + +@test "lint-prompt: GitHub Actions expressions with nested braces do not false-positive" { + # ${{ format('${FOO}', github.ref) }} should be stripped entirely so ${FOO} + # inside the expression is not flagged as a bare shell expansion. + # Caught by CodeRabbit review on PR petry-projects/.github#85. + write_yml "${TT_TMP}/nested-expr.yml" <<'YML' +jobs: + analyze: + steps: + - uses: anthropics/claude-code-action@v1 + with: + prompt: | + Ref: ${{ format('{0}', github.ref) }} +YML + run bash "$LINTER" "${TT_TMP}/nested-expr.yml" + [ "$status" -eq 0 ] +} diff --git a/test/workflows/feature-ideation/match-discussions.bats b/test/workflows/feature-ideation/match-discussions.bats index 9b63ea9a..b92bce07 100644 --- a/test/workflows/feature-ideation/match-discussions.bats +++ b/test/workflows/feature-ideation/match-discussions.bats @@ -192,6 +192,22 @@ build_proposals() { [ "$status" -eq 0 ] } +@test "match: malformed JSON in signals file exits non-zero" { + # Caught by CodeRabbit review on PR petry-projects/.github#85. + printf 'not valid json' >"${TT_TMP}/signals.json" + build_proposals '[{"title":"x"}]' + run bash "$MATCH" "${TT_TMP}/signals.json" "${TT_TMP}/proposals.json" + [ "$status" -ne 0 ] +} + +@test "match: malformed JSON in proposals file exits non-zero" { + # Caught by CodeRabbit review on PR petry-projects/.github#85. + build_signals '[]' + printf 'not valid json' >"${TT_TMP}/proposals.json" + run bash "$MATCH" "${TT_TMP}/signals.json" "${TT_TMP}/proposals.json" + [ "$status" -ne 0 ] +} + @test "match: idempotent re-run of same proposals against existing matches yields all-matched" { # Simulates the R6 idempotency case: run 1 created Discussions, run 2 # finds them and should NOT propose duplicates. diff --git a/test/workflows/feature-ideation/signals-schema.bats b/test/workflows/feature-ideation/signals-schema.bats index 0281d589..4ad05181 100644 --- a/test/workflows/feature-ideation/signals-schema.bats +++ b/test/workflows/feature-ideation/signals-schema.bats @@ -69,6 +69,15 @@ FIX="${TT_FIXTURES_DIR}/expected" [ "$status" -eq 0 ] } +@test "schema: malformed JSON signals file FAILS with exit code 2" { + # Validates that the OSError/JSONDecodeError path returns 2 (file/data error) + # not 1 (schema validation error). Caught by CodeRabbit review on PR #85. + bad_file="${BATS_TEST_TMPDIR}/malformed.json" + printf '{"schema_version":"1.0.0",' >"$bad_file" + run python3 "$VALIDATOR" "$bad_file" "$SCHEMA" + [ "$status" -eq 2 ] +} + @test "schema: SCHEMA_VERSION constant matches schema file version comment" { # CodeRabbit on PR petry-projects/.github#85: enforce that the # SCHEMA_VERSION constant in collect-signals.sh stays in lockstep with diff --git a/test/workflows/feature-ideation/stubs/gh b/test/workflows/feature-ideation/stubs/gh index 2e8cd24d..27584df2 100755 --- a/test/workflows/feature-ideation/stubs/gh +++ b/test/workflows/feature-ideation/stubs/gh @@ -31,8 +31,15 @@ if [ -n "${GH_STUB_LOG:-}" ]; then fi # Multi-call mode. +# NOTE: TT_TMP (or BATS_TEST_TMPDIR as fallback) must be set so the counter +# file is isolated per test. Without it the fallback /tmp path can collide +# across parallel test suites. Caught by CodeRabbit review on PR #85. +if [ -n "${GH_STUB_SCRIPT:-}" ] && [ ! -f "${GH_STUB_SCRIPT}" ]; then + printf '[gh-stub] GH_STUB_SCRIPT is set but file not found: %s\n' "$GH_STUB_SCRIPT" >&2 + exit 1 +fi if [ -n "${GH_STUB_SCRIPT:-}" ] && [ -f "${GH_STUB_SCRIPT}" ]; then - counter_file="${TT_TMP:-/tmp}/.gh-stub-counter" + counter_file="${TT_TMP:-${BATS_TEST_TMPDIR:-/tmp}}/.gh-stub-counter" count=0 if [ -f "$counter_file" ]; then count=$(cat "$counter_file")