From a4069cbdc44ef81226586d06055464beb8483b20 Mon Sep 17 00:00:00 2001 From: Jammy2211 Date: Fri, 15 May 2026 10:02:28 +0100 Subject: [PATCH] ci: add live URL audit (weekly cron) + grandfather current broken URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add .url_check_allowlist.txt at repo root containing the 0 broken URLs the audit currently flags in PyAutoArray — mostly external paywalled / dead links and a few internal docs renames that need editorial fixes. The weekly cron job will only fail when a NEW broken URL appears that isn't in this file. Update .github/workflows/url_check.yml: - keep the existing offline regex guard (runs on every PR + push, now with ~15 additional bad patterns thanks to the matching PyAutoBuild extension) - add a url_check_live job that runs on schedule (Mon 04:00 UTC) and workflow_dispatch. On non-zero exit it opens or comments on a [url-check] New broken URLs detected tracking issue. - on a clean run, auto-closes any prior open [url-check] issue. Tool: PyAutoLabs/PyAutoBuild#87 (paired PR — must merge first for the extended regex patterns and the new live tool to be available). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/url_check.yml | 95 +++++++++++++++++++++++++++++++++ .url_check_allowlist.txt | 3 ++ 2 files changed, 98 insertions(+) create mode 100644 .github/workflows/url_check.yml create mode 100644 .url_check_allowlist.txt diff --git a/.github/workflows/url_check.yml b/.github/workflows/url_check.yml new file mode 100644 index 00000000..cdd5fe46 --- /dev/null +++ b/.github/workflows/url_check.yml @@ -0,0 +1,95 @@ +name: URL Check + +on: + push: + branches: [main] + pull_request: + schedule: + - cron: '0 4 * * 1' # Mondays 04:00 UTC + workflow_dispatch: + +permissions: + contents: read + +jobs: + url_check_patterns: + name: Offline regex guard + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + path: repo + - name: Checkout PyAutoBuild + uses: actions/checkout@v4 + with: + repository: PyAutoLabs/PyAutoBuild + ref: main + path: PyAutoBuild + - name: Run url_check.sh + run: bash PyAutoBuild/autobuild/url_check.sh repo + + url_check_live: + name: Live HTTP audit (weekly) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + path: repo + - name: Checkout PyAutoBuild + uses: actions/checkout@v4 + with: + repository: PyAutoLabs/PyAutoBuild + ref: main + path: PyAutoBuild + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install requests + run: pip install --quiet requests + - name: Run live URL audit + id: audit + run: | + set +e + body=$(bash PyAutoBuild/autobuild/url_check_live.sh repo) + rc=$? + printf '%s\n' "$body" > /tmp/url_audit_body.md + echo "rc=$rc" >> "$GITHUB_OUTPUT" + cat /tmp/url_audit_body.md + exit 0 + - name: Open or update [url-check] tracking issue + if: steps.audit.outputs.rc != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + existing=$(gh issue list --search '"[url-check]"' --state open --json number --jq '.[0].number // empty') + if [ -n "$existing" ]; then + echo "Updating existing tracking issue #$existing" + gh issue comment "$existing" --body-file /tmp/url_audit_body.md + else + echo "Opening new tracking issue" + gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md \ + --label url-check 2>/dev/null \ + || gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md + fi + - name: Close stale tracking issue if audit is clean + if: steps.audit.outputs.rc == '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + for n in $(gh issue list --search '"[url-check]"' --state open --json number --jq '.[].number'); do + echo "Closing now-clean tracking issue #$n" + gh issue close "$n" --comment "Weekly URL audit is clean — closing automatically." + done diff --git a/.url_check_allowlist.txt b/.url_check_allowlist.txt new file mode 100644 index 00000000..17b0f3dc --- /dev/null +++ b/.url_check_allowlist.txt @@ -0,0 +1,3 @@ +# Known broken URLs grandfathered for url_check_live.sh. +# Add a URL on its own line to ignore it. Comments start with '#'. +# Currently no broken URLs in this repo's docs.