From d389815711cb27eb026c5e776ca9de3df1b3e80b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Jun 2026 12:41:32 +0000 Subject: [PATCH 1/3] seo: notify IndexNow of changed pages on production deploy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add IndexNow (Bing/Yandex/Seznam/Naver/DuckDuckGo) integration so newly deployed/updated pages get recrawled promptly. Not used by Google, so this complements rather than replaces sitemap.xml. - web/public/.txt: IndexNow key file, served as a static asset. - web/scripts/indexnow-submit.mjs: derives the URLs changed in a deploy from the commit-range diff, intersects them with the live sitemap (so we never submit 404s, dynamic, or unpublished routes), and POSTs only that delta — per IndexNow guidance, never the whole site. - deploy.yml: post-deploy step (production only, continue-on-error) with fetch-depth: 0 so the diff range is available. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Kj1T1KijwDqFgXjRjiHuCN --- .github/workflows/deploy.yml | 18 +++ .../d15f21b935684761ad607fb06b70b3d5.txt | 1 + web/scripts/indexnow-submit.mjs | 146 ++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 web/public/d15f21b935684761ad607fb06b70b3d5.txt create mode 100644 web/scripts/indexnow-submit.mjs diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f40b2a4..b5bf6c9 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -20,6 +20,9 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + # Full history so the IndexNow step can diff this deploy's commit range. + fetch-depth: 0 - name: Setup Node.js uses: actions/setup-node@v4 @@ -52,3 +55,18 @@ jobs: CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} CLOUDFLARE_ACCOUNT_ID: ${{ vars.CLOUDFLARE_ACCOUNT_ID }} run: npx wrangler deploy + + # Ping IndexNow (Bing/Yandex/Seznam/Naver/DuckDuckGo — not Google) with the + # URLs changed in this deploy. Submits only the delta intersected with the + # live sitemap, never the whole site. Production only; never runs on PR + # previews. The key is public (served at /.txt), so it's fine inline; + # set the INDEXNOW_KEY repo var to override. + - name: Notify IndexNow of changed pages + # Best-effort SEO ping after a successful deploy — never fail the deploy + # over a search-engine notification. + continue-on-error: true + working-directory: web + env: + INDEXNOW_KEY: ${{ vars.INDEXNOW_KEY || 'd15f21b935684761ad607fb06b70b3d5' }} + SITE_URL: https://agentrelay.com + run: node scripts/indexnow-submit.mjs "${{ github.event.before }}" "${{ github.sha }}" diff --git a/web/public/d15f21b935684761ad607fb06b70b3d5.txt b/web/public/d15f21b935684761ad607fb06b70b3d5.txt new file mode 100644 index 0000000..b4e1664 --- /dev/null +++ b/web/public/d15f21b935684761ad607fb06b70b3d5.txt @@ -0,0 +1 @@ +d15f21b935684761ad607fb06b70b3d5 \ No newline at end of file diff --git a/web/scripts/indexnow-submit.mjs b/web/scripts/indexnow-submit.mjs new file mode 100644 index 0000000..99327e0 --- /dev/null +++ b/web/scripts/indexnow-submit.mjs @@ -0,0 +1,146 @@ +#!/usr/bin/env node +// Submit changed URLs to IndexNow (Bing, Yandex, Seznam, Naver, DuckDuckGo). +// +// IndexNow is a "this changed, please recrawl" ping — NOT a sitemap replacement +// and NOT used by Google. We therefore submit only the delta for a deploy, never +// the whole site. Candidate URLs are derived from the files changed between two +// commits, then intersected with the live sitemap.xml so we never submit a 404 +// or an unpublished/dynamic route. +// +// Usage: +// node scripts/indexnow-submit.mjs +// +// Env: +// INDEXNOW_KEY (required) the key, also hosted at /.txt +// SITE_URL (optional) defaults to https://agentrelay.com +// DRY_RUN (optional) if "1"/"true", log the payload but don't POST + +import { execFileSync } from 'node:child_process'; + +const SITE_URL = (process.env.SITE_URL || 'https://agentrelay.com').replace(/\/$/, ''); +const HOST = new URL(SITE_URL).host; +const KEY = process.env.INDEXNOW_KEY; +const DRY_RUN = /^(1|true)$/i.test(process.env.DRY_RUN || ''); +const ENDPOINT = 'https://api.indexnow.org/indexnow'; + +function fail(msg) { + console.error(`indexnow: ${msg}`); + process.exit(1); +} + +if (!KEY) fail('INDEXNOW_KEY is not set — skipping. (set it as a repo variable)'); + +const [, , beforeArg, afterArg] = process.argv; +const after = afterArg || 'HEAD'; +// On the first push to a branch GitHub passes an all-zero "before" SHA; fall +// back to the single-commit diff so we still submit that deploy's changes. +const ZERO = '0000000000000000000000000000000000000000'; +const before = !beforeArg || beforeArg === ZERO ? `${after}~1` : beforeArg; + +function changedFiles() { + try { + const out = execFileSync('git', ['diff', '--name-only', `${before}..${after}`], { + encoding: 'utf8', + }); + return out.split('\n').map((l) => l.trim()).filter(Boolean); + } catch (err) { + fail(`git diff ${before}..${after} failed: ${err.message}`); + } +} + +// Map a repo-relative changed file to the public URL path(s) it affects. +// Paths here are relative to the repo root (the workflow runs from there). +function pathsForFile(file) { + // Blog + docs content mirror the route tree: content/.mdx -> / + let m = file.match(/^web\/content\/(blog\/.+|docs\/.+)\.mdx$/); + if (m) return [`/${m[1]}`]; + + // Static app routes: app//page.(tsx|mdx) -> /. Skip dynamic + // segments ([slug]) — those are covered by their content files above. + m = file.match(/^web\/app\/(.+)\/page\.(tsx|mdx)$/); + if (m && !m[1].includes('[')) return [`/${m[1]}`]; + + // Root page. + if (/^web\/app\/page\.tsx$/.test(file)) return ['/']; + + // The /agents catalog is data-driven (lib/agents.ts), so a change there can + // touch every agent page. Signal "all /agents URLs" and let the sitemap + // intersection narrow it to what's actually published. + if (file === 'web/lib/agents.ts') return ['__AGENTS__']; + + return []; +} + +async function fetchSitemapUrls() { + const res = await fetch(`${SITE_URL}/sitemap.xml`, { + headers: { 'user-agent': 'agentrelay-indexnow/1.0' }, + }); + if (!res.ok) fail(`could not fetch sitemap.xml (${res.status})`); + const xml = await res.text(); + const urls = new Set(); + for (const match of xml.matchAll(/\s*([^<\s]+)\s*<\/loc>/g)) { + urls.add(match[1].trim()); + } + return urls; +} + +const files = changedFiles(); +if (files.length === 0) { + console.log('indexnow: no changed files in range — nothing to submit.'); + process.exit(0); +} + +const wantAllAgents = files.some((f) => pathsForFile(f).includes('__AGENTS__')); +const candidatePaths = new Set( + files.flatMap(pathsForFile).filter((p) => p && p !== '__AGENTS__'), +); + +const sitemapUrls = await fetchSitemapUrls(); +const submit = new Set(); + +for (const path of candidatePaths) { + const url = `${SITE_URL}${path}`; + if (sitemapUrls.has(url)) submit.add(url); + else console.log(`indexnow: skip (not in sitemap): ${url}`); +} + +if (wantAllAgents) { + for (const url of sitemapUrls) { + if (url.startsWith(`${SITE_URL}/agents/`)) submit.add(url); + } +} + +const urlList = [...submit]; +if (urlList.length === 0) { + console.log('indexnow: no publishable changed URLs — nothing to submit.'); + process.exit(0); +} + +const payload = { + host: HOST, + key: KEY, + keyLocation: `${SITE_URL}/${KEY}.txt`, + urlList, +}; + +console.log(`indexnow: submitting ${urlList.length} URL(s):`); +for (const u of urlList) console.log(` ${u}`); + +if (DRY_RUN) { + console.log('indexnow: DRY_RUN set — not posting.'); + process.exit(0); +} + +const res = await fetch(ENDPOINT, { + method: 'POST', + headers: { 'content-type': 'application/json; charset=utf-8' }, + body: JSON.stringify(payload), +}); + +// 200 = accepted, 202 = accepted/validation pending. Both are success. +if (res.status === 200 || res.status === 202) { + console.log(`indexnow: ok (${res.status}).`); +} else { + const body = await res.text().catch(() => ''); + fail(`endpoint returned ${res.status}: ${body}`); +} From c2363e6a6382dbdd671f0173767773cf874d9bc7 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Jun 2026 12:45:56 +0000 Subject: [PATCH 2/3] seo: persist sitemap snapshot for certain IndexNow deltas Replace the live-sitemap-only heuristic with a committed snapshot of the last deploy's published URL set (web/indexnow-state.json). Each deploy now diffs the freshly deployed sitemap against that snapshot for new pages (certain) and the git range for content edits, submits only that union, then commits the refreshed snapshot back to main with [skip ci] so the next deploy has a durable baseline. - Bootstraps once (empty snapshot -> announce all current URLs), deltas after. - Logs URLs dropped from the sitemap rather than auto-submitting deletions. - Caps submissions at IndexNow's 10k per-request limit. - deploy.yml: contents: write + commit-back step. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Kj1T1KijwDqFgXjRjiHuCN --- .github/workflows/deploy.yml | 28 +++++-- web/indexnow-state.json | 5 ++ web/scripts/indexnow-submit.mjs | 129 +++++++++++++++++++++----------- 3 files changed, 114 insertions(+), 48 deletions(-) create mode 100644 web/indexnow-state.json diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index b5bf6c9..377e3ac 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -11,7 +11,8 @@ concurrency: cancel-in-progress: false permissions: - contents: read + # write so the IndexNow step can commit the updated sitemap snapshot back. + contents: write jobs: deploy-production: @@ -57,10 +58,11 @@ jobs: run: npx wrangler deploy # Ping IndexNow (Bing/Yandex/Seznam/Naver/DuckDuckGo — not Google) with the - # URLs changed in this deploy. Submits only the delta intersected with the - # live sitemap, never the whole site. Production only; never runs on PR - # previews. The key is public (served at /.txt), so it's fine inline; - # set the INDEXNOW_KEY repo var to override. + # pages new or changed in this deploy. Diffs the freshly deployed sitemap + # against the committed snapshot (web/indexnow-state.json) for new pages + # and the git range for content edits — never the whole site. Production + # only; never runs on PR previews. The key is public (served at /.txt), + # so it's fine inline; set the INDEXNOW_KEY repo var to override. - name: Notify IndexNow of changed pages # Best-effort SEO ping after a successful deploy — never fail the deploy # over a search-engine notification. @@ -70,3 +72,19 @@ jobs: INDEXNOW_KEY: ${{ vars.INDEXNOW_KEY || 'd15f21b935684761ad607fb06b70b3d5' }} SITE_URL: https://agentrelay.com run: node scripts/indexnow-submit.mjs "${{ github.event.before }}" "${{ github.sha }}" + + # Persist the updated sitemap snapshot so the next deploy diffs against it. + # [skip ci] keeps this commit from re-triggering the deploy workflow. + - name: Commit updated IndexNow snapshot + continue-on-error: true + working-directory: web + run: | + if git diff --quiet -- indexnow-state.json; then + echo "indexnow: snapshot unchanged — nothing to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add indexnow-state.json + git commit -m "chore(seo): update IndexNow sitemap snapshot [skip ci]" + git push origin HEAD:main diff --git a/web/indexnow-state.json b/web/indexnow-state.json new file mode 100644 index 0000000..f87a444 --- /dev/null +++ b/web/indexnow-state.json @@ -0,0 +1,5 @@ +{ + "host": "agentrelay.com", + "count": 0, + "urls": [] +} diff --git a/web/scripts/indexnow-submit.mjs b/web/scripts/indexnow-submit.mjs index 99327e0..4086783 100644 --- a/web/scripts/indexnow-submit.mjs +++ b/web/scripts/indexnow-submit.mjs @@ -2,10 +2,18 @@ // Submit changed URLs to IndexNow (Bing, Yandex, Seznam, Naver, DuckDuckGo). // // IndexNow is a "this changed, please recrawl" ping — NOT a sitemap replacement -// and NOT used by Google. We therefore submit only the delta for a deploy, never -// the whole site. Candidate URLs are derived from the files changed between two -// commits, then intersected with the live sitemap.xml so we never submit a 404 -// or an unpublished/dynamic route. +// and NOT used by Google. We therefore submit only the delta for a deploy. +// +// Certainty comes from a committed snapshot of the last deploy's published URL +// set (indexnow-state.json). Each run: +// 1. current = the URL set from the freshly deployed sitemap.xml (authoritative) +// 2. added = current − snapshot (new pages — certain) +// 3. changed = URLs from this deploy's git diff that already existed +// (content edits don't change the URL set, so the snapshot diff +// alone can't see them) +// 4. submit added ∪ changed, then rewrite the snapshot for the workflow to +// commit back. Anything submitted is always intersected with `current`, so +// we never ping a 404 / unpublished / dynamic route. // // Usage: // node scripts/indexnow-submit.mjs @@ -13,15 +21,20 @@ // Env: // INDEXNOW_KEY (required) the key, also hosted at /.txt // SITE_URL (optional) defaults to https://agentrelay.com -// DRY_RUN (optional) if "1"/"true", log the payload but don't POST +// STATE_FILE (optional) defaults to ./indexnow-state.json (cwd = web/) +// DRY_RUN (optional) "1"/"true" → log the payload, don't POST or write import { execFileSync } from 'node:child_process'; +import { readFileSync, writeFileSync, existsSync } from 'node:fs'; const SITE_URL = (process.env.SITE_URL || 'https://agentrelay.com').replace(/\/$/, ''); const HOST = new URL(SITE_URL).host; const KEY = process.env.INDEXNOW_KEY; +const STATE_FILE = process.env.STATE_FILE || 'indexnow-state.json'; const DRY_RUN = /^(1|true)$/i.test(process.env.DRY_RUN || ''); const ENDPOINT = 'https://api.indexnow.org/indexnow'; +// IndexNow caps a single submission at 10k URLs. +const MAX_URLS = 10000; function fail(msg) { console.error(`indexnow: ${msg}`); @@ -33,23 +46,28 @@ if (!KEY) fail('INDEXNOW_KEY is not set — skipping. (set it as a repo variable const [, , beforeArg, afterArg] = process.argv; const after = afterArg || 'HEAD'; // On the first push to a branch GitHub passes an all-zero "before" SHA; fall -// back to the single-commit diff so we still submit that deploy's changes. +// back to the single-commit diff so we still catch that deploy's content edits. const ZERO = '0000000000000000000000000000000000000000'; const before = !beforeArg || beforeArg === ZERO ? `${after}~1` : beforeArg; function changedFiles() { try { - const out = execFileSync('git', ['diff', '--name-only', `${before}..${after}`], { + return execFileSync('git', ['diff', '--name-only', `${before}..${after}`], { encoding: 'utf8', - }); - return out.split('\n').map((l) => l.trim()).filter(Boolean); + }) + .split('\n') + .map((l) => l.trim()) + .filter(Boolean); } catch (err) { - fail(`git diff ${before}..${after} failed: ${err.message}`); + // A missing range (shallow clone, rewritten history) shouldn't break the + // run — the snapshot diff still catches new pages. Just skip "changed". + console.warn(`indexnow: git diff ${before}..${after} failed (${err.message}); skipping changed-page detection.`); + return []; } } -// Map a repo-relative changed file to the public URL path(s) it affects. -// Paths here are relative to the repo root (the workflow runs from there). +// Map a repo-relative changed file to the public path(s) it affects. Paths are +// relative to the repo root (git diff emits root-relative paths from any cwd). function pathsForFile(file) { // Blog + docs content mirror the route tree: content/.mdx -> / let m = file.match(/^web\/content\/(blog\/.+|docs\/.+)\.mdx$/); @@ -60,12 +78,11 @@ function pathsForFile(file) { m = file.match(/^web\/app\/(.+)\/page\.(tsx|mdx)$/); if (m && !m[1].includes('[')) return [`/${m[1]}`]; - // Root page. if (/^web\/app\/page\.tsx$/.test(file)) return ['/']; - // The /agents catalog is data-driven (lib/agents.ts), so a change there can - // touch every agent page. Signal "all /agents URLs" and let the sitemap - // intersection narrow it to what's actually published. + // The /agents catalog is data-driven (lib/agents.ts): a change there can + // touch every agent page. Signal "all /agents URLs"; the intersection with + // the live sitemap below narrows it to what's actually published. if (file === 'web/lib/agents.ts') return ['__AGENTS__']; return []; @@ -81,65 +98,91 @@ async function fetchSitemapUrls() { for (const match of xml.matchAll(/\s*([^<\s]+)\s*<\/loc>/g)) { urls.add(match[1].trim()); } + if (urls.size === 0) fail('sitemap.xml contained no entries'); return urls; } -const files = changedFiles(); -if (files.length === 0) { - console.log('indexnow: no changed files in range — nothing to submit.'); - process.exit(0); +function readSnapshot() { + if (!existsSync(STATE_FILE)) return new Set(); + try { + const data = JSON.parse(readFileSync(STATE_FILE, 'utf8')); + return new Set(Array.isArray(data.urls) ? data.urls : []); + } catch (err) { + fail(`could not parse ${STATE_FILE}: ${err.message}`); + } +} + +function writeSnapshot(urls) { + const data = { + host: HOST, + count: urls.length, + // Sorted for stable diffs / clean review of the committed file. + urls: [...urls].sort(), + }; + writeFileSync(STATE_FILE, `${JSON.stringify(data, null, 2)}\n`); } +// ── 1. authoritative current set ──────────────────────────────────────────── +const current = await fetchSitemapUrls(); + +// ── 2. new pages: certain, from the committed snapshot ─────────────────────── +const snapshot = readSnapshot(); +const bootstrap = snapshot.size === 0; +const added = [...current].filter((u) => !snapshot.has(u)); + +// ── 3. edited existing pages: from this deploy's git diff ───────────────────── +const files = changedFiles(); const wantAllAgents = files.some((f) => pathsForFile(f).includes('__AGENTS__')); -const candidatePaths = new Set( - files.flatMap(pathsForFile).filter((p) => p && p !== '__AGENTS__'), +const changedCandidates = new Set( + files.flatMap(pathsForFile).filter((p) => p && p !== '__AGENTS__').map((p) => `${SITE_URL}${p}`), ); +if (wantAllAgents) { + for (const u of current) if (u.startsWith(`${SITE_URL}/agents/`)) changedCandidates.add(u); +} +// Only existing, still-published URLs (added ones are already covered above). +const changed = [...changedCandidates].filter((u) => current.has(u) && snapshot.has(u)); -const sitemapUrls = await fetchSitemapUrls(); -const submit = new Set(); +// ── 4. submit the union, then persist the new snapshot ─────────────────────── +const removed = [...snapshot].filter((u) => !current.has(u)); +if (removed.length) console.log(`indexnow: ${removed.length} URL(s) no longer in sitemap (not auto-submitted): ${removed.join(', ')}`); -for (const path of candidatePaths) { - const url = `${SITE_URL}${path}`; - if (sitemapUrls.has(url)) submit.add(url); - else console.log(`indexnow: skip (not in sitemap): ${url}`); +let urlList = [...new Set([...added, ...changed])]; + +if (bootstrap) { + console.log(`indexnow: no prior snapshot — bootstrapping. Announcing all ${urlList.length} published URL(s) once; future deploys submit only the delta.`); } -if (wantAllAgents) { - for (const url of sitemapUrls) { - if (url.startsWith(`${SITE_URL}/agents/`)) submit.add(url); - } +if (urlList.length > MAX_URLS) { + console.log(`indexnow: capping submission at ${MAX_URLS} of ${urlList.length} URLs (IndexNow per-request limit).`); + urlList = urlList.slice(0, MAX_URLS); } -const urlList = [...submit]; if (urlList.length === 0) { - console.log('indexnow: no publishable changed URLs — nothing to submit.'); + console.log('indexnow: no new or changed URLs to submit.'); + // Snapshot already matches current (no added/removed) — nothing to persist. + if (removed.length && !DRY_RUN) writeSnapshot([...current]); process.exit(0); } -const payload = { - host: HOST, - key: KEY, - keyLocation: `${SITE_URL}/${KEY}.txt`, - urlList, -}; - console.log(`indexnow: submitting ${urlList.length} URL(s):`); for (const u of urlList) console.log(` ${u}`); if (DRY_RUN) { - console.log('indexnow: DRY_RUN set — not posting.'); + console.log('indexnow: DRY_RUN set — not posting or writing snapshot.'); process.exit(0); } const res = await fetch(ENDPOINT, { method: 'POST', headers: { 'content-type': 'application/json; charset=utf-8' }, - body: JSON.stringify(payload), + body: JSON.stringify({ host: HOST, key: KEY, keyLocation: `${SITE_URL}/${KEY}.txt`, urlList }), }); // 200 = accepted, 202 = accepted/validation pending. Both are success. if (res.status === 200 || res.status === 202) { console.log(`indexnow: ok (${res.status}).`); + // Persist the authoritative current set so the next deploy diffs against it. + writeSnapshot([...current]); } else { const body = await res.text().catch(() => ''); fail(`endpoint returned ${res.status}: ${body}`); From de26c2d01598cf267bd3efb22d359c11268a3ba9 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 25 Jun 2026 14:31:43 +0000 Subject: [PATCH 3/3] seo: address IndexNow PR review feedback - exit 0 (not 1) when INDEXNOW_KEY is unset, so the best-effort step stays green - wrap sitemap fetch and IndexNow POST in try/catch for clean failure messages - build changed-URL candidates via new URL() so they match the sitemap exactly (incl. the homepage trailing slash) - batch submissions at the 10k/request limit instead of truncating, and persist the snapshot only after every batch is accepted (no URL marked sent-but-unsent) - fix the IndexNow key inline to match the committed public .txt (overriding it would point keyLocation at an unpublished file and fail validation) - harden deploy.yml: persist-credentials: false, scoped push token, and guard the snapshot commit-back to the main ref only Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01Kj1T1KijwDqFgXjRjiHuCN --- .github/workflows/deploy.yml | 20 ++++++--- web/scripts/indexnow-submit.mjs | 73 ++++++++++++++++++++++----------- 2 files changed, 63 insertions(+), 30 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 377e3ac..0794a43 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -24,6 +24,9 @@ jobs: with: # Full history so the IndexNow step can diff this deploy's commit range. fetch-depth: 0 + # Don't leave a write-capable token in git config for the build / + # deploy steps. The snapshot push below supplies its own scoped token. + persist-credentials: false - name: Setup Node.js uses: actions/setup-node@v4 @@ -61,23 +64,30 @@ jobs: # pages new or changed in this deploy. Diffs the freshly deployed sitemap # against the committed snapshot (web/indexnow-state.json) for new pages # and the git range for content edits — never the whole site. Production - # only; never runs on PR previews. The key is public (served at /.txt), - # so it's fine inline; set the INDEXNOW_KEY repo var to override. + # only; never runs on PR previews. The key is intentionally public (it is + # served at /.txt for IndexNow to verify), so it lives inline. It is + # fixed to match the committed web/public/.txt — overriding it would + # point keyLocation at a .txt this repo doesn't publish, failing validation. - name: Notify IndexNow of changed pages # Best-effort SEO ping after a successful deploy — never fail the deploy # over a search-engine notification. continue-on-error: true working-directory: web env: - INDEXNOW_KEY: ${{ vars.INDEXNOW_KEY || 'd15f21b935684761ad607fb06b70b3d5' }} + INDEXNOW_KEY: d15f21b935684761ad607fb06b70b3d5 SITE_URL: https://agentrelay.com run: node scripts/indexnow-submit.mjs "${{ github.event.before }}" "${{ github.sha }}" # Persist the updated sitemap snapshot so the next deploy diffs against it. - # [skip ci] keeps this commit from re-triggering the deploy workflow. + # [skip ci] keeps this commit from re-triggering the deploy workflow. Guard + # to main so a workflow_dispatch from another ref can't fast-forward main + # with that ref's commits. Auth is a scoped token here, not persisted creds. - name: Commit updated IndexNow snapshot + if: github.ref == 'refs/heads/main' continue-on-error: true working-directory: web + env: + GH_TOKEN: ${{ github.token }} run: | if git diff --quiet -- indexnow-state.json; then echo "indexnow: snapshot unchanged — nothing to commit." @@ -87,4 +97,4 @@ jobs: git config user.email "github-actions[bot]@users.noreply.github.com" git add indexnow-state.json git commit -m "chore(seo): update IndexNow sitemap snapshot [skip ci]" - git push origin HEAD:main + git push "https://x-access-token:${GH_TOKEN}@github.com/${{ github.repository }}.git" HEAD:main diff --git a/web/scripts/indexnow-submit.mjs b/web/scripts/indexnow-submit.mjs index 4086783..36ec367 100644 --- a/web/scripts/indexnow-submit.mjs +++ b/web/scripts/indexnow-submit.mjs @@ -41,7 +41,12 @@ function fail(msg) { process.exit(1); } -if (!KEY) fail('INDEXNOW_KEY is not set — skipping. (set it as a repo variable)'); +if (!KEY) { + // Expected on forks / non-production runs where the var isn't exposed. Exit 0 + // so the (continue-on-error) step stays green rather than showing a red X. + console.log('indexnow: INDEXNOW_KEY is not set — skipping.'); + process.exit(0); +} const [, , beforeArg, afterArg] = process.argv; const after = afterArg || 'HEAD'; @@ -89,9 +94,14 @@ function pathsForFile(file) { } async function fetchSitemapUrls() { - const res = await fetch(`${SITE_URL}/sitemap.xml`, { - headers: { 'user-agent': 'agentrelay-indexnow/1.0' }, - }); + let res; + try { + res = await fetch(`${SITE_URL}/sitemap.xml`, { + headers: { 'user-agent': 'agentrelay-indexnow/1.0' }, + }); + } catch (err) { + fail(`could not reach ${SITE_URL}/sitemap.xml: ${err.message}`); + } if (!res.ok) fail(`could not fetch sitemap.xml (${res.status})`); const xml = await res.text(); const urls = new Set(); @@ -134,7 +144,12 @@ const added = [...current].filter((u) => !snapshot.has(u)); const files = changedFiles(); const wantAllAgents = files.some((f) => pathsForFile(f).includes('__AGENTS__')); const changedCandidates = new Set( - files.flatMap(pathsForFile).filter((p) => p && p !== '__AGENTS__').map((p) => `${SITE_URL}${p}`), + files + .flatMap(pathsForFile) + .filter((p) => p && p !== '__AGENTS__') + // Build URLs the same way sitemap.ts does (new URL against the origin) so + // they match the sitemap exactly — including the homepage's trailing slash. + .map((p) => new URL(p, SITE_URL).toString()), ); if (wantAllAgents) { for (const u of current) if (u.startsWith(`${SITE_URL}/agents/`)) changedCandidates.add(u); @@ -146,17 +161,12 @@ const changed = [...changedCandidates].filter((u) => current.has(u) && snapshot. const removed = [...snapshot].filter((u) => !current.has(u)); if (removed.length) console.log(`indexnow: ${removed.length} URL(s) no longer in sitemap (not auto-submitted): ${removed.join(', ')}`); -let urlList = [...new Set([...added, ...changed])]; +const urlList = [...new Set([...added, ...changed])]; if (bootstrap) { console.log(`indexnow: no prior snapshot — bootstrapping. Announcing all ${urlList.length} published URL(s) once; future deploys submit only the delta.`); } -if (urlList.length > MAX_URLS) { - console.log(`indexnow: capping submission at ${MAX_URLS} of ${urlList.length} URLs (IndexNow per-request limit).`); - urlList = urlList.slice(0, MAX_URLS); -} - if (urlList.length === 0) { console.log('indexnow: no new or changed URLs to submit.'); // Snapshot already matches current (no added/removed) — nothing to persist. @@ -172,18 +182,31 @@ if (DRY_RUN) { process.exit(0); } -const res = await fetch(ENDPOINT, { - method: 'POST', - headers: { 'content-type': 'application/json; charset=utf-8' }, - body: JSON.stringify({ host: HOST, key: KEY, keyLocation: `${SITE_URL}/${KEY}.txt`, urlList }), -}); - -// 200 = accepted, 202 = accepted/validation pending. Both are success. -if (res.status === 200 || res.status === 202) { - console.log(`indexnow: ok (${res.status}).`); - // Persist the authoritative current set so the next deploy diffs against it. - writeSnapshot([...current]); -} else { - const body = await res.text().catch(() => ''); - fail(`endpoint returned ${res.status}: ${body}`); +// Split into IndexNow's per-request limit so nothing is silently dropped (and +// never recorded as submitted when it wasn't). +const batches = []; +for (let i = 0; i < urlList.length; i += MAX_URLS) batches.push(urlList.slice(i, i + MAX_URLS)); + +for (let i = 0; i < batches.length; i++) { + const batch = batches[i]; + let res; + try { + res = await fetch(ENDPOINT, { + method: 'POST', + headers: { 'content-type': 'application/json; charset=utf-8' }, + body: JSON.stringify({ host: HOST, key: KEY, keyLocation: `${SITE_URL}/${KEY}.txt`, urlList: batch }), + }); + } catch (err) { + fail(`could not reach IndexNow endpoint: ${err.message}`); + } + // 200 = accepted, 202 = accepted/validation pending. Both are success. + if (res.status !== 200 && res.status !== 202) { + const body = await res.text().catch(() => ''); + fail(`endpoint returned ${res.status}: ${body}`); + } + console.log(`indexnow: batch ${i + 1}/${batches.length} ok (${res.status}, ${batch.length} URL(s)).`); } + +// Persist the authoritative current set only after every batch was accepted, so +// nothing is marked submitted unless it actually was. +writeSnapshot([...current]);