htekdev · htekdev · Jun 12, 2026 · Jun 12, 2026 · Jun 12, 2026
diff --git a/errors/caching-artifacts/cache-corrupt-from-cancelled-workflow-always-save.yml b/errors/caching-artifacts/cache-corrupt-from-cancelled-workflow-always-save.yml
@@ -0,0 +1,105 @@
+id: caching-artifacts-138
+title: 'Workflow Cancellation During cache/restore Leaves Corrupt Cache — always() Save Poisons Future Runs'
+category: caching-artifacts
+severity: error
+tags:
+  - actions-cache
+  - cache
+  - corrupt-cache
+  - workflow-cancel
+  - windows
+  - always
+  - cache-restore
+  - cache-save
+patterns:
+  - regex: 'Error: The operation was canceled\.'
+    flags: 'i'
+  - regex: 'cache\.tzst.*force-local.*\nError: The operation was canceled'
+    flags: 'im'
+error_messages:
+  - "Error: The operation was canceled."
+  - "\"C:\\Program Files\\Git\\usr\\bin\\tar.exe\" -xf D:/a/_temp/.../cache.tzst ... --use-compress-program \"zstd -d\"\nError: The operation was canceled."
+root_cause: |
+  When a workflow is cancelled mid-run while actions/cache/restore is actively
+  extracting the cache archive, the extraction is interrupted at an arbitrary point.
+  The target directory on disk is left in a partially-extracted (corrupt) state —
+  some files are present, some are missing, and some may be truncated.
+
+  If the workflow uses actions/cache/save with `if: always()` (a popular pattern to
+  ensure the cache is populated even when a job fails), the save step runs after the
+  cancellation because `always()` evaluates to true regardless of job status including
+  cancellation. The save step re-archives the now-partial directory and uploads it
+  under the original cache key, atomically overwriting the last known-good cache.
+
+  Subsequent workflow runs perform a cache hit on this key, restore the corrupted
+  archive, and experience seemingly unrelated build failures: missing source files,
+  truncated binaries, incomplete package store manifests, etc. The restore step reports
+  SUCCESS because the download/extract of the corrupt archive itself succeeds — the
+  corruption only manifests when the extracted files are actually used.
+
+  This issue is most commonly observed on Windows runners where cache extraction via
+  tar/zstd is slower and workflow cancellations are more likely to land during active
+  extraction. Reported in actions/cache#1729 with a reproducible example from the
+  Agda project on Cabal/Haskell builds.
+fix: |
+  Guard the cache/save step with a condition that prevents saving when the preceding
+  restore was interrupted or the job was cancelled. Use one of:
+
+  1. Prefer the composite `actions/cache@v5` action over split save/restore — the
+     composite action's save is a post action, which is NOT executed when a job is
+     cancelled (post actions only run on success/failure, not cancellation).
+
+  2. If using split save/restore, add an explicit `if:` condition on the save step
+     that checks both step outcome and job cancellation status.
+
+  3. If the cache key is already corrupted, delete it manually via GitHub UI
+     (Actions > Caches) or API before the next run.
+fix_code:
+  - language: yaml
+    label: 'Preferred fix: Use composite actions/cache — post action skips on cancellation'
+    code: |
+      steps:
+        - name: Cache cabal store
+          uses: actions/cache@v5    # composite action — save post-step NOT run on cancel
+          with:
+            path: ~/.cabal
+            key: cabal-${{ runner.os }}-${{ hashFiles('cabal.project') }}
+            restore-keys: |
+              cabal-${{ runner.os }}-
+
+        - name: Build
+          run: cabal build all
+  - language: yaml
+    label: 'Split save/restore: Guard cache/save against cancelled restore'
+    code: |
+      steps:
+        - name: Restore cache
+          id: cache-restore
+          uses: actions/cache/restore@v5
+          with:
+            path: ~/.cabal
+            key: cabal-${{ runner.os }}-${{ hashFiles('cabal.project') }}
+
+        - name: Build
+          run: cabal build all
+
+        - name: Save cache
+          uses: actions/cache/save@v5
+          # Only save when restore completed AND build succeeded
+          # Do NOT use if: always() — this runs even on cancellation and saves corrupt state
+          if: steps.cache-restore.outcome == 'success' && job.status == 'success'
+          with:
+            path: ~/.cabal
+            key: ${{ steps.cache-restore.outputs.cache-primary-key }}
+prevention:
+  - 'Do not use `if: always()` on actions/cache/save steps — always() evaluates true even on job cancellation, allowing a corrupt partial-restore state to be saved.'
+  - 'Prefer the composite `actions/cache@v5` over separate save/restore when you do not need fine-grained control — its post action is cancelled-safe.'
+  - 'If a job is cancelled during restore and you suspect cache corruption, manually delete the affected cache key via GitHub UI (repo → Actions → Caches) before the next run.'
+  - 'Guard save with `job.status == success` or inspect `steps.<restore-id>.outcome` to ensure a complete restore before re-saving.'
+docs:
+  - url: 'https://github.com/actions/cache/issues/1729'
+    label: 'actions/cache #1729 — Workflow cancellation during restore writes corrupt cache'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows'
+    label: 'GitHub Docs — Caching dependencies to speed up workflows'
+  - url: 'https://docs.github.com/en/actions/reference/context-reference#job-context'
+    label: 'GitHub Docs — job context (job.status)'
diff --git a/errors/caching-artifacts/cache-ipv6-server-enotfound-parsed-as-hostname.yml b/errors/caching-artifacts/cache-ipv6-server-enotfound-parsed-as-hostname.yml
@@ -0,0 +1,84 @@
+id: caching-artifacts-139
+title: 'actions/cache Fails With ENOTFOUND on IPv6 Self-Hosted Cache Server — IPv6 Address Literal Parsed as Hostname'
+category: caching-artifacts
+severity: warning
+tags:
+  - actions-cache
+  - ipv6
+  - self-hosted-runner
+  - ENOTFOUND
+  - ACTIONS_CACHE_URL
+  - getaddrinfo
+  - cache-server
+patterns:
+  - regex: 'getCacheEntry failed: getaddrinfo ENOTFOUND \[[\da-f:]+\]'
+    flags: 'i'
+  - regex: 'reserveCache failed: getaddrinfo ENOTFOUND \[[\da-f:]+\]'
+    flags: 'i'
+  - regex: 'Failed to (restore|save): (getCacheEntry|reserveCache) failed: getaddrinfo ENOTFOUND \['
+    flags: 'i'
+error_messages:
+  - "::warning::Failed to restore: getCacheEntry failed: getaddrinfo ENOTFOUND [2001:bc8:1d90:1fc1:dc00:ff:fe2b:3f97]"
+  - "::warning::Failed to save: reserveCache failed: getaddrinfo ENOTFOUND [2001:bc8:1d90:1fc1:dc00:ff:fe2b:3f97]"
+root_cause: |
+  When a self-hosted runner is configured with an IPv6 cache server URL via the
+  ACTIONS_CACHE_URL environment variable (e.g. http://[2001:bc8:...]:8080/), the
+  Node.js HTTP client inside the @actions/cache toolkit package incorrectly strips
+  the surrounding square brackets from the IPv6 address literal when constructing
+  the hostname for DNS resolution.
+
+  The resulting bare IPv6 address string (e.g. 2001:bc8:1d90:1fc1:dc00:ff:fe2b:3f97,
+  without brackets) is passed to getaddrinfo as if it were a DNS hostname. Since it
+  is not a valid hostname and not an IPv4 address, getaddrinfo returns ENOTFOUND,
+  causing every cache restore and save to fail.
+
+  The failures surface as warnings (not errors) so the workflow continues to run —
+  but the cache is never used and never saved, silently eliminating any performance
+  benefit and causing repeated cold build times on self-hosted setups with IPv6 cache
+  infrastructure.
+
+  Root fix merged in actions/toolkit PR #2298 (HTTP client IPv6 bracket handling).
+  The fix may not be deployed to all currently pinned versions of actions/cache.
+  Reported in actions/cache#1718.
+fix: |
+  Configure the self-hosted cache server to listen on an IPv4 address or register a
+  DNS hostname that resolves to the cache server, and use that in ACTIONS_CACHE_URL
+  instead of a bare IPv6 literal.
+
+  If the fix from actions/toolkit PR #2298 has been shipped in a new actions/cache
+  release, upgrading to that release will also resolve the issue.
+fix_code:
+  - language: yaml
+    label: 'Use IPv4 or DNS hostname for ACTIONS_CACHE_URL instead of IPv6 literal'
+    code: |
+      # Set in runner environment, Docker Compose, or workflow env: block.
+      # WRONG — IPv6 literal causes getaddrinfo ENOTFOUND:
+      # ACTIONS_CACHE_URL: http://[2001:bc8:1d90:1fc1:dc00:ff:fe2b:3f97]:8080/
+
+      # CORRECT — Use IPv4 address:
+      env:
+        ACTIONS_CACHE_URL: http://192.168.1.10:8080/
+
+      # OR use a DNS hostname that resolves to the cache server:
+      # ACTIONS_CACHE_URL: http://cache.internal.example.com:8080/
+  - language: yaml
+    label: 'Test ACTIONS_CACHE_URL connectivity from the runner before deploying'
+    code: |
+      # Add a diagnostic step to verify cache server is reachable:
+      steps:
+        - name: Test cache server connectivity
+          run: |
+            echo "ACTIONS_CACHE_URL: $ACTIONS_CACHE_URL"
+            curl -s -o /dev/null -w "%{http_code}" "$ACTIONS_CACHE_URL" || echo "Cache server unreachable"
+prevention:
+  - 'Configure self-hosted cache servers (e.g. actions-cache-server, Minio, Gitea cache) to listen on an IPv4 interface or a DNS hostname rather than a bare IPv6 literal.'
+  - 'Test ACTIONS_CACHE_URL reachability with curl from the runner before relying on cache steps in CI workflows.'
+  - 'Treat ::warning:: lines from actions/cache in workflow logs as actionable — cache failures silently degrade performance without failing the job.'
+  - 'Watch actions/cache and actions/toolkit releases for the IPv6 address bracket fix (actions/toolkit PR #2298) to ship in a stable release.'
+docs:
+  - url: 'https://github.com/actions/cache/issues/1718'
+    label: 'actions/cache #1718 — Caching fails on IPv6 cache server'
+  - url: 'https://github.com/actions/toolkit/pull/2298'
+    label: 'actions/toolkit PR #2298 — Fix IPv6 address parsing in HTTP client'
+  - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#communication-requirements'
+    label: 'GitHub Docs — Self-hosted runner communication requirements'
diff --git a/errors/runner-environment/setup-python-version-not-found-ubuntu-26-container-self-hosted.yml b/errors/runner-environment/setup-python-version-not-found-ubuntu-26-container-self-hosted.yml
@@ -0,0 +1,86 @@
+id: runner-environment-479
+title: 'setup-python Fails to Find Python Version in Ubuntu 26.04 Container on Self-Hosted Runner'
+category: runner-environment
+severity: error
+tags:
+  - setup-python
+  - ubuntu-26
+  - container
+  - self-hosted-runner
+  - versions-manifest
+  - python-version-not-found
+patterns:
+  - regex: 'The version .+ with architecture .x64. was not found for this operating system'
+    flags: 'i'
+  - regex: 'setup-python.*version.*not found.*container.*ubuntu'
+    flags: 'i'
+error_messages:
+  - "Error: The version '3.13' with architecture 'x64' was not found for this operating system."
+root_cause: |
+  When actions/setup-python runs inside a job container (e.g. `container: ubuntu:latest`
+  or `container: ubuntu:26.04`) on a SELF-HOSTED runner, it queries the versions manifest
+  to find pre-built Python binaries. On Ubuntu 26.04 (Noble+) containers, the action
+  cannot resolve a matching manifest entry and returns "version not found."
+
+  The root cause is that the versions-manifest.json for actions/setup-python maps Python
+  distributions to specific Linux identifiers. Ubuntu 26.04 (or `ubuntu:latest` when it
+  resolves to 26.04) presents a different OS identifier than Ubuntu 24.04 or 22.04. The
+  manifest does not yet include a matching pre-compiled binary distribution for the new
+  Ubuntu 26.04 glibc/OS combination, so no download URL is found and the action errors out.
+
+  This only affects SELF-HOSTED runners. GitHub-hosted `ubuntu-latest` runners work because
+  they bypass the manifest lookup for containers differently — the host runner provides
+  toolcache entries that containers can access, while self-hosted runners lack those entries.
+
+  The runner's host OS (e.g. Amazon Linux) is irrelevant — it is the container OS that
+  matters for the Python binary lookup.
+
+  Reported in setup-python#1309 (June 2026, 8 reactions). Downgrading to
+  `container: ubuntu:24.04` is the confirmed workaround.
+fix: |
+  1. Pin the container image to Ubuntu 24.04 (or 22.04) instead of `ubuntu:latest` or
+     `ubuntu:26.04` until the setup-python manifest includes Ubuntu 26.04 distributions.
+  2. Use `python-version-file:` pointing to a .python-version or pyproject.toml to allow
+     setup-python to install from the system package manager as a fallback.
+  3. Pre-install Python in the container image itself and set `update-environment: false`
+     to skip the manifest lookup entirely.
+fix_code:
+  - language: yaml
+    label: 'Pin container to ubuntu:24.04 until setup-python manifest supports 26.04'
+    code: |
+      jobs:
+        build:
+          runs-on: [self-hosted]
+          container: ubuntu:24.04    # Pin to 24.04 — ubuntu:latest may resolve to 26.04
+          steps:
+            - uses: actions/setup-python@v6
+              with:
+                python-version: '3.13'
+  - language: yaml
+    label: 'Pre-install Python in a custom container image to bypass manifest lookup'
+    code: |
+      # Dockerfile
+      FROM ubuntu:26.04
+      RUN apt-get update && apt-get install -y python3.13 python3.13-venv python3-pip
+
+      # Workflow — set update-environment: false to use pre-installed Python
+      jobs:
+        build:
+          runs-on: [self-hosted]
+          container: myorg/ubuntu-26-python:latest    # custom image with Python pre-installed
+          steps:
+            - uses: actions/setup-python@v6
+              with:
+                python-version: '3.13'
+                update-environment: false   # don't download — use system Python
+prevention:
+  - 'Do not use `container: ubuntu:latest` on self-hosted runners when relying on setup-python — `ubuntu:latest` may resolve to a new major version (e.g. 26.04) for which pre-built Python binaries are not yet available in the versions manifest.'
+  - 'Pin container image tags to a specific version (ubuntu:24.04) rather than floating tags in production self-hosted workflows.'
+  - 'Watch setup-python releases and versions-manifest.json for Ubuntu 26.04 support to land before migrating self-hosted container workflows.'
+docs:
+  - url: 'https://github.com/actions/setup-python/issues/1309'
+    label: 'setup-python #1309 — Failing to fetch version from manifest when using Ubuntu 26.04 container on self-hosted runner'
+  - url: 'https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md'
+    label: 'setup-python Advanced Usage — containers and self-hosted runners'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-where-your-workflow-runs/running-jobs-in-a-container'
+    label: 'GitHub Docs — Running jobs in a container'
diff --git a/...failures/reusable-workflow-environment-secrets-silently-empty-without-secrets-inherit.yml b/...failures/reusable-workflow-environment-secrets-silently-empty-without-secrets-inherit.yml
@@ -0,0 +1,106 @@
+id: sf-215
+title: 'Environment-Scoped Secrets Silently Resolve to Empty String in Reusable Workflow Without secrets: inherit'
+category: silent-failures
+severity: silent-failure
+tags:
+  - reusable-workflow
+  - secrets
+  - environment
+  - secrets-inherit
+  - workflow-call
+  - silent-failure
+patterns:
+  - regex: 'MY_SECRET length: 0'
+    flags: 'i'
+  - regex: 'EMPTY.*secret.*resolved\|secret.*resolv.*empty.*reusable'
+    flags: 'i'
+error_messages:
+  - "MY_SECRET length: 0"
+  - "EMPTY (secret resolved to empty string in reusable workflow)"
+root_cause: |
+  The GitHub Actions documentation states that environment-scoped secrets can be
+  accessed inside a reusable (called) workflow simply by declaring the environment
+  on the called job: "If a called workflow needs to access environment secrets,
+  the environment must be defined in the called workflow."
+
+  In practice this documentation is incomplete: environment-scoped secrets resolve
+  to an empty string ("") in the called workflow UNLESS the CALLER explicitly adds
+  `secrets: inherit` to the `uses:` step. Without `secrets: inherit`, only secrets
+  that are explicitly forwarded through a `secrets:` mapping block are passed to the
+  called workflow.
+
+  Even though the called job declares `environment: my-env`, the environment's
+  secrets are NOT automatically injected from the caller's scope at runtime. The
+  called workflow's secrets context contains only what the caller explicitly passes.
+  No error is thrown — the step succeeds but the secret variable is silently empty,
+  causing dependent logic (auth, API calls, signing operations) to fail downstream
+  with unrelated-looking errors.
+
+  Reported in detail with a minimal repro in actions/runner#4453. The GitHub docs
+  page for reusable workflows was cited but does not reflect the actual runtime behavior.
+fix: |
+  Add `secrets: inherit` to the caller workflow's `uses:` step. This passes all
+  secrets that the caller can access (including environment-scoped secrets) down to
+  the called workflow.
+
+  Alternatively, use explicit `secrets:` mapping to forward specific secrets from
+  the caller's scope to named inputs in the called workflow's `on.workflow_call.secrets`
+  block.
+fix_code:
+  - language: yaml
+    label: 'Caller: Add secrets: inherit to forward environment-scoped secrets'
+    code: |
+      # .github/workflows/caller.yml
+      on: workflow_dispatch
+      jobs:
+        call-it:
+          uses: ./.github/workflows/reusable.yml
+          with:
+            target_environment: my-env
+          secrets: inherit    # ← required for env-scoped secrets to resolve in called workflow
+  - language: yaml
+    label: 'Caller: Alternatively, explicitly forward individual secrets'
+    code: |
+      # .github/workflows/caller.yml
+      on: workflow_dispatch
+      jobs:
+        call-it:
+          uses: ./.github/workflows/reusable.yml
+          with:
+            target_environment: my-env
+          secrets:
+            MY_SECRET: ${{ secrets.MY_SECRET }}   # explicit forwarding from caller scope
+  - language: yaml
+    label: 'Called workflow: Declare expected secrets in on.workflow_call.secrets'
+    code: |
+      # .github/workflows/reusable.yml
+      on:
+        workflow_call:
+          inputs:
+            target_environment:
+              required: true
+              type: string
+          secrets:
+            MY_SECRET:
+              required: true    # declared — caller must forward via secrets: mapping
+      jobs:
+        worker:
+          runs-on: ubuntu-latest
+          environment: ${{ inputs.target_environment }}
+          steps:
+            - name: Use secret
+              env:
+                MY_SECRET: ${{ secrets.MY_SECRET }}
+              run: echo "Secret is populated — length ${#MY_SECRET}"
+prevention:
+  - 'Always add `secrets: inherit` to caller workflows that invoke reusable workflows needing environment-scoped secrets.'
+  - 'If using explicit `secrets:` mapping, include all environment-scoped secrets the called workflow references, not just repository-level secrets.'
+  - 'Add a guard step in the called workflow to detect silently empty secrets early: `if [ -z "$MY_SECRET" ]; then echo "::error::MY_SECRET is empty — check caller secrets: inherit"; exit 1; fi`'
+  - 'Test reusable workflows from a fresh caller that does NOT already inherit secrets to verify the `secrets: inherit` path works end-to-end.'
+docs:
+  - url: 'https://github.com/actions/runner/issues/4453'
+    label: 'actions/runner #4453 — Environment-scoped secrets unreachable from reusable workflow without secrets: inherit'
+  - url: 'https://docs.github.com/en/actions/sharing-automations/reusing-workflows#using-inputs-and-secrets-in-a-reusable-workflow'
+    label: 'GitHub Docs — Using inputs and secrets in a reusable workflow'
+  - url: 'https://docs.github.com/en/actions/reference/workflows-and-actions/workflow-syntax#jobsjob_idsecretsinherit'
+    label: 'GitHub Docs — jobs.<job_id>.secrets: inherit'