From 3b212cea0a1a8ffbc8ee6c31c5493e0aa22ff80a Mon Sep 17 00:00:00 2001 From: critesjosh Date: Thu, 7 May 2026 12:14:12 -0400 Subject: [PATCH] fix(docs): apply api-nr selectors to nargo-doc pages The follow-up to #23042: that PR fixed the indexing rate-limit problem but every aztec-nr-api page still emitted 0 records. Root cause: the docsearch-scraper resolves a URL's selectors_key by walking start_urls in order and matching with `re.search` (substring), breaking on first match. With the homepage URL listed first, every aztec-nr-api URL matched it (since "https://docs.aztec.network/" is a substring of every aztec-nr-api URL) and was assigned the default selectors. The default selectors target Docusaurus-only markup (`header h1`, `article p`, `menu__list ... active` XPath), none of which exist on rustdoc-style nargo-doc pages, so the scraper found no nodes and emitted no records. Fix: list the more-specific aztec-nr-api start_url first so it wins the selectors_key match for those URLs. The homepage start_url then serves as the catch-all for everything else. Reference: scraper/src/strategies/abstract_strategy.py get_selectors_set_key() iterates start_urls in declaration order and breaks on the first re.search hit. --- docs/typesense.config.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/typesense.config.json b/docs/typesense.config.json index 0111ebdbd7f7..33d7feb7c5fd 100644 --- a/docs/typesense.config.json +++ b/docs/typesense.config.json @@ -1,14 +1,14 @@ { "index_name": "aztec-docs", "start_urls": [ - { - "url": "https://docs.aztec.network/", - "page_rank": 10 - }, { "url": "https://docs.aztec.network/aztec-nr-api/mainnet/", "selectors_key": "api-nr", "page_rank": 2 + }, + { + "url": "https://docs.aztec.network/", + "page_rank": 10 } ], "stop_urls": [