From 2ee676d70c3fdc8e33564a97fe23fc799bceefca Mon Sep 17 00:00:00 2001 From: Jammy2211 Date: Fri, 15 May 2026 10:02:36 +0100 Subject: [PATCH] ci: add live URL audit (weekly cron) + grandfather current broken URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add .url_check_allowlist.txt at repo root containing the 11 broken URLs the audit currently flags in HowToFit — mostly external paywalled / dead links and a few internal docs renames that need editorial fixes. The weekly cron job will only fail when a NEW broken URL appears that isn't in this file. Update .github/workflows/url_check.yml: - keep the existing offline regex guard (runs on every PR + push, now with ~15 additional bad patterns thanks to the matching PyAutoBuild extension) - add a url_check_live job that runs on schedule (Mon 04:00 UTC) and workflow_dispatch. On non-zero exit it opens or comments on a [url-check] New broken URLs detected tracking issue. - on a clean run, auto-closes any prior open [url-check] issue. Tool: PyAutoLabs/PyAutoBuild#87 (paired PR — must merge first for the extended regex patterns and the new live tool to be available). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/url_check.yml | 74 ++++++++++++++++++++++++++++++++- .url_check_allowlist.txt | 24 +++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 .url_check_allowlist.txt diff --git a/.github/workflows/url_check.yml b/.github/workflows/url_check.yml index 41ee620..cdd5fe4 100644 --- a/.github/workflows/url_check.yml +++ b/.github/workflows/url_check.yml @@ -4,9 +4,16 @@ on: push: branches: [main] pull_request: + schedule: + - cron: '0 4 * * 1' # Mondays 04:00 UTC + workflow_dispatch: + +permissions: + contents: read jobs: - url_check: + url_check_patterns: + name: Offline regex guard runs-on: ubuntu-latest steps: - name: Checkout repo @@ -21,3 +28,68 @@ jobs: path: PyAutoBuild - name: Run url_check.sh run: bash PyAutoBuild/autobuild/url_check.sh repo + + url_check_live: + name: Live HTTP audit (weekly) + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + path: repo + - name: Checkout PyAutoBuild + uses: actions/checkout@v4 + with: + repository: PyAutoLabs/PyAutoBuild + ref: main + path: PyAutoBuild + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install requests + run: pip install --quiet requests + - name: Run live URL audit + id: audit + run: | + set +e + body=$(bash PyAutoBuild/autobuild/url_check_live.sh repo) + rc=$? + printf '%s\n' "$body" > /tmp/url_audit_body.md + echo "rc=$rc" >> "$GITHUB_OUTPUT" + cat /tmp/url_audit_body.md + exit 0 + - name: Open or update [url-check] tracking issue + if: steps.audit.outputs.rc != '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + existing=$(gh issue list --search '"[url-check]"' --state open --json number --jq '.[0].number // empty') + if [ -n "$existing" ]; then + echo "Updating existing tracking issue #$existing" + gh issue comment "$existing" --body-file /tmp/url_audit_body.md + else + echo "Opening new tracking issue" + gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md \ + --label url-check 2>/dev/null \ + || gh issue create \ + --title "[url-check] New broken URLs detected" \ + --body-file /tmp/url_audit_body.md + fi + - name: Close stale tracking issue if audit is clean + if: steps.audit.outputs.rc == '0' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd repo + for n in $(gh issue list --search '"[url-check]"' --state open --json number --jq '.[].number'); do + echo "Closing now-clean tracking issue #$n" + gh issue close "$n" --comment "Weekly URL audit is clean — closing automatically." + done diff --git a/.url_check_allowlist.txt b/.url_check_allowlist.txt new file mode 100644 index 0000000..7601887 --- /dev/null +++ b/.url_check_allowlist.txt @@ -0,0 +1,24 @@ +# Known broken URLs grandfathered for url_check_live.sh. +# Format: one URL per line. Lines starting with '#' and blank lines are ignored. +# Sample location follows each URL as a trailing comment. +# +# This file is referenced by .github/workflows/url_check.yml (weekly cron). +# url_check_live.sh fails CI when a URL is broken AND not in this file. +# To accept new breakage: append the URL here. To force a fix: leave it out. + +# Code of Conduct boilerplate +http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy # CODE_OF_CONDUCT.md:305 + +# Colab refs to notebooks no longer in HowTo/workspace +https://colab.research.google.com/github/PyAutoLabs/HowToFit/blob/2026.5.14.2/notebooks/chapter_1_introduction/tutorial_4_complex_models.ipynb # scripts/chapter_1_introduction/README.md:16 +https://colab.research.google.com/github/PyAutoLabs/HowToFit/blob/2026.5.14.2/notebooks/chapter_3_graphical_models/tutorial_4_hierarchical_models.ipynb # scripts/chapter_3_graphical_models/README.md:17 + +# GitHub refs (workspaces / removed tutorials) +https://github.com/Jammy2211/PyAutoLogo/blob/main/gifs/pyautofit.gif?raw=true # README.md:7 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/bad_fit.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:452 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/bad_normalized_residual_map.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:461 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/good_fit.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:454 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/good_normalized_residual_map.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:463 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/okay_fit.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:453 +https://github.com/PyAutoLabs/HowToFit/blob/main/scripts/chapter_1_introduction/images/okay_normalized_residual_map.png?raw=true # scripts/chapter_1_introduction/tutorial_4_why_modeling_is_hard.py:462 +https://github.com/PyAutoLabs/PyAutoFit/blob/main/files/citations.tex # CITATIONS.md:5