jaeger/.github/workflows/ci-summary-report.yml at 5f6cfc180955593c826f81104e81120975ac088f · jaegertracing/jaeger · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# Copyright (c) 2026 The Jaeger Authors.
# SPDX-License-Identifier: Apache-2.0

# CI Summary Report: reusable workflow (workflow_call) invoked by CI Orchestrator.
# Computes metrics comparison and coverage gating, then uploads a ci-summary
# artifact with the results. ci-summary-report-publish.yml (triggered by
# workflow_run) reads that artifact to post PR comments and check runs, because
# pull_request workflows from forks cannot write to the upstream repository.
#
# Design: docs/adr/004-migrating-coverage-gating-to-github-actions.md

name: CI Summary Report
on:
  workflow_call:
permissions:
  contents: read
  actions: write  # required for actions/cache save and gh run download

jobs:
  summary-report:
    name: Summary Report
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
        with:
          ref: ${{ github.sha }}

      # Download all artifacts uploaded by the calling (CI Orchestrator) run.
      # This includes coverage-* and metrics_snapshot_* artifacts from all CI jobs.
      - name: Download all artifacts
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          gh run download "${{ github.run_id }}" \
            --repo "${{ github.repository }}" --dir .artifacts

      - name: Install dependencies
        run: python3 -m pip install prometheus-client

      - name: Compare metrics and generate summary
        id: compare-metrics
        shell: bash
        run: bash ./scripts/e2e/metrics_summary.sh

      - name: Set up Go for coverage tools
        uses: ./.github/actions/setup-go
        with:
          go-version: 1.26.x

      - name: Install coverage tools
        run: make install-coverage-tools

      - name: Merge coverage profiles
        id: merge-coverage
        run: |
          mapfile -t COVER_FILES < <(find .artifacts -path "*/coverage-*/*.out" -type f)
          if [ ${#COVER_FILES[@]} -eq 0 ]; then
            echo "No coverage files found; skipping coverage gate."
            echo "skipped=true" >> "$GITHUB_OUTPUT"
          else
            echo "Merging ${#COVER_FILES[@]} coverage profiles"
            ./.tools/gocovmerge "${COVER_FILES[@]}" > .artifacts/merged-coverage.out
            echo "skipped=false" >> "$GITHUB_OUTPUT"
          fi

      - name: Filter excluded paths from merged coverage
        if: success() && steps.merge-coverage.outputs.skipped == 'false'
        run: |
          # Applies the same exclusions as .codecov.yml (single source of truth).
          # filter_coverage.py modifies the file in-place.
          python3 scripts/e2e/filter_coverage.py .artifacts/merged-coverage.out
          echo "Coverage lines after filtering: $(wc -l < .artifacts/merged-coverage.out)"

      - name: Calculate current coverage percentage
        if: success() && steps.merge-coverage.outputs.skipped == 'false'
        id: coverage
        run: |
          PCT=$(go tool cover -func=.artifacts/merged-coverage.out \
            | grep "^total:" | awk '{print $3}' | tr -d '%')
          echo "percentage=${PCT}" >> "$GITHUB_OUTPUT"
          echo "${PCT}" > .artifacts/current-coverage.txt
          echo "Current coverage: ${PCT}%"

      - name: Restore baseline coverage from main
        if: success() && steps.merge-coverage.outputs.skipped == 'false'
        id: restore-baseline
        uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57
        with:
          path: .artifacts/baseline-coverage.txt
          # Exact match intentionally never hits (run IDs differ between runs).
          # The restore-keys prefix coverage-baseline_ always falls back to the
          # most recently created cache entry (GitHub returns the newest match
          # for prefix lookups), which is the latest passing main-branch run.
          # The trailing underscore avoids matching a plain "coverage-baseline"
          # key if one were ever created.
          # Storage is negligible: each entry is a single number (~10 B) and
          # GitHub automatically evicts entries unused for 7 days.
          key: coverage-baseline_${{ github.run_id }}
          restore-keys: |
            coverage-baseline_

      - name: Gate on coverage regression
        if: success() && steps.merge-coverage.outputs.skipped == 'false'
        id: coverage-gate
        run: |
          CURRENT="${{ steps.coverage.outputs.percentage }}"
          BASELINE_MSG="(no baseline yet)"
          failure_reasons=()

          if [ -z "$CURRENT" ]; then
            failure_reasons+=("coverage percentage is empty; go tool cover may have failed")
          else
            # Gate 1: absolute minimum threshold
            MINIMUM=95.0
            if (( $(echo "$CURRENT < $MINIMUM" | bc -l) )); then
              failure_reasons+=("coverage ${CURRENT}% is below minimum ${MINIMUM}%")
            fi

            # Gate 2: no regression vs main baseline
            if [ -f .artifacts/baseline-coverage.txt ]; then
              BASELINE=$(cat .artifacts/baseline-coverage.txt)
              if [ -z "$BASELINE" ]; then
                failure_reasons+=("baseline coverage file is empty; cannot perform regression check")
              else
                BASELINE_MSG="(baseline ${BASELINE}%)"
                if (( $(echo "$CURRENT < $BASELINE" | bc -l) )); then
                  failure_reasons+=("coverage dropped from ${BASELINE}% to ${CURRENT}%")
                fi
              fi
            fi
          fi

          if [ ${#failure_reasons[@]} -gt 0 ]; then
            msg=$(IFS='; '; echo "${failure_reasons[*]}")
            echo "conclusion=failure" >> "$GITHUB_OUTPUT"
            echo "summary=${msg}" >> "$GITHUB_OUTPUT"
            echo "::error::${msg}"
          else
            echo "conclusion=success" >> "$GITHUB_OUTPUT"
            echo "summary=Coverage ${CURRENT}% ${BASELINE_MSG}" >> "$GITHUB_OUTPUT"
            echo "Coverage ${CURRENT}% ${BASELINE_MSG}: OK"
          fi

      # Serialize only strongly-typed values to JSON so ci-summary-report-publish.yml
      # never handles free-form text from test output (which could contain injections).
      # All display text is constructed from this structured data by trusted publish-
      # workflow code running in the base repository context.
      #
      # metrics_snapshots is an array of per-snapshot change data (metric names and
      # counts) produced by metrics_summary.sh.  The publish workflow validates every
      # field before rendering (see sanitizeSnapshots in ci-summary-report-publish.js).
      - name: Save conclusions for publish workflow
        if: always()
        env:
          PR_NUMBER:              ${{ github.event.pull_request.number }}
          METRICS_CONCLUSION:     ${{ steps.compare-metrics.outputs.CONCLUSION }}
          METRICS_TOTAL:          ${{ steps.compare-metrics.outputs.TOTAL_CHANGES }}
          METRICS_INFRA_ERRORS:   ${{ steps.compare-metrics.outputs.INFRA_ERRORS }}
          COVERAGE_MERGE_OUTCOME: ${{ steps.merge-coverage.outcome }}
          COVERAGE_SKIPPED:       ${{ steps.merge-coverage.outputs.skipped }}
          COVERAGE_CONCLUSION:    ${{ steps.coverage-gate.outputs.conclusion }}
          COVERAGE_PCT:           ${{ steps.coverage.outputs.percentage }}
        run: |
          mkdir -p .artifacts
          python3 - <<'PYEOF'
          import json, os

          metrics_conclusion = os.environ.get('METRICS_CONCLUSION') or 'failure'
          metrics_total_env = os.environ.get('METRICS_TOTAL')
          if metrics_total_env not in (None, ''):
              metrics_total = int(metrics_total_env)
          else:
              metrics_total = None
          has_infra_errors   = os.environ.get('METRICS_INFRA_ERRORS') == 'true'

          coverage_merge_outcome = os.environ.get('COVERAGE_MERGE_OUTCOME', '')
          coverage_skipped       = os.environ.get('COVERAGE_SKIPPED') == 'true'
          if coverage_merge_outcome in ('failure', 'cancelled'):
              # merge-coverage failed before writing its skipped output; treat as failure
              # so a pipeline error is not silently reported as coverage skipped/success.
              coverage_conclusion = 'failure'
          elif coverage_skipped:
              coverage_conclusion = 'skipped'
          else:
              coverage_conclusion = os.environ.get('COVERAGE_CONCLUSION') or 'skipped'

          coverage_pct      = None
          coverage_baseline = None
          if not coverage_skipped:
              try:
                  coverage_pct = float(os.environ.get('COVERAGE_PCT') or '')
              except (ValueError, TypeError):
                  pass
              try:
                  with open('.artifacts/baseline-coverage.txt') as f:
                      coverage_baseline = float(f.read().strip())
              except (FileNotFoundError, ValueError):
                  pass

          # Load per-snapshot metric change data (produced by metrics_summary.sh).
          # If the file is missing or malformed, set to None — the publish
          # workflow treats null/missing as "no detail available".
          metrics_snapshots = None
          try:
              with open('.artifacts/metrics_snapshots.json') as f:
                  metrics_snapshots = json.load(f)
          except (FileNotFoundError, json.JSONDecodeError, ValueError):
              pass

          pr_number_env = os.environ.get('PR_NUMBER')
          pr_number = int(pr_number_env) if pr_number_env else None

          data = {
              'pr_number':               pr_number,
              'metrics_conclusion':      metrics_conclusion,
              'metrics_total_changes':   metrics_total,
              'metrics_has_infra_errors': has_infra_errors,
              'metrics_snapshots':       metrics_snapshots,
              'coverage_conclusion':     coverage_conclusion,
              'coverage_percentage':     coverage_pct,
              'coverage_baseline':       coverage_baseline,
              'coverage_skipped':        coverage_skipped,
          }
          with open('.artifacts/ci-summary.json', 'w') as f:
              json.dump(data, f, indent=2)
          print(json.dumps(data, indent=2))
          PYEOF

      - name: Upload CI summary artifact
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
        with:
          name: ci-summary
          path: .artifacts/ci-summary.json
          retention-days: 7
          if-no-files-found: warn

      # Save baseline coverage on main-branch runs so PRs can compare against it.
      # Only save when the coverage gate passes so a failing/partial run never
      # overwrites a valid baseline with a bad value.
      - name: Save coverage baseline on main branch
        if: >-
          github.ref == 'refs/heads/main' &&
          steps.merge-coverage.outputs.skipped == 'false' &&
          steps.coverage-gate.outputs.conclusion == 'success'
        run: cp .artifacts/current-coverage.txt .artifacts/baseline-coverage.txt

      - name: Cache coverage baseline
        if: >-
          github.ref == 'refs/heads/main' &&
          steps.merge-coverage.outputs.skipped == 'false' &&
          steps.coverage-gate.outputs.conclusion == 'success'
        uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57
        with:
          path: .artifacts/baseline-coverage.txt
          key: coverage-baseline_${{ github.run_id }}

      # Fail the job (and the calling CI Orchestrator run) so the coverage/metrics
      # regression is immediately visible in the PR Checks table.
      # The ci-summary artifact is already uploaded above (if: always()), so
      # ci-summary-report-publish.yml can still post the PR comment and check runs.
      - name: Fail if coverage or metrics gate failed
        if: |
          steps.compare-metrics.outputs.CONCLUSION == 'failure' ||
          steps.coverage-gate.outputs.conclusion == 'failure'
        run: |
          echo "Metrics: ${{ steps.compare-metrics.outputs.CONCLUSION }}"
          echo "Coverage: ${{ steps.coverage-gate.outputs.conclusion }}"
          exit 1