From 9afcb1c5166aedfb26c4bc405a108543ab2bb0e5 Mon Sep 17 00:00:00 2001 From: Justin Clark Date: Mon, 29 Jun 2026 08:27:30 -0700 Subject: [PATCH] feat(arborist): extend replace-registry-host with URL prefix matching (#6110) (#9672) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves where registries with a path component (e.g. https://myregistry.example.com/npm/b) caused resolved URLs in the lockfile to have their path duplicated when reify rewrote them. Rewrote `#registryResolved` to unify all match-and-rewrite logic into a single pass. A single `new URL()` parse of `replaceRegistryHost` now drives all three match modes: - **Keyword / bare hostname** (`never`, `always`, `npmjs`, `registry.npmjs.org`): behaviour unchanged. `new URL()` throws for keywords, leaving `matchURL` null and falling through to the existing host-comparison or keyword short-circuit. - **Host-only URL** (e.g. `https://old.example.com/`): trailing slash normalises to an empty `matchPath`; treated identically to a bare hostname — only the host is replaced, and the registry path is prepended if not already present. - **Full URL with path** (e.g. `https://old.example.com/npm/b`): when the resolved URL's host and path both match the prefix, the entire matched prefix is replaced with the configured registry's full URL (host + path). The remainder of the resolved path is appended without duplication. If the resolved URL's path does not start with the configured prefix, `#registryResolved` returns the URL unchanged. Updated the `hint` and `description` for `replace-registry-host` to document the new full-URL-with-path mode and the host-only-URL mode. Added three integration tests using `tnock` (mismatched URL → nock throws, making URL assertions implicit and precise): 1. Full URL prefix match: `replaceRegistryHost=https://old.example.com/npm/b`, registry `https://new.example.com/npm/a` — verifies the old `/npm/b` prefix is replaced with `/npm/a`, not doubled to `/npm/a/npm/b/…`. 2. Host-only URL: `replaceRegistryHost=https://old.example.com/` — verifies only the host is swapped and the resolved path is preserved. 3. Path mismatch: `replaceRegistryHost=https://old.example.com/npm/c` but tarball under `/npm/b` — verifies the URL is left unchanged when the path prefix does not match. ## References - Fixes #6110 (cherry picked from commit b51d156fd9858f418775df9c70859377eba1fa0c) --- tap-snapshots/test/lib/docs.js.test.cjs | 9 +- workspaces/arborist/lib/arborist/reify.js | 52 +++--- workspaces/arborist/test/arborist/reify.js | 148 ++++++++++++++++++ .../config/lib/definitions/definitions.js | 11 +- 4 files changed, 194 insertions(+), 26 deletions(-) diff --git a/tap-snapshots/test/lib/docs.js.test.cjs b/tap-snapshots/test/lib/docs.js.test.cjs index 686a10f794521..072d6619a95d1 100644 --- a/tap-snapshots/test/lib/docs.js.test.cjs +++ b/tap-snapshots/test/lib/docs.js.test.cjs @@ -1694,7 +1694,14 @@ registry (https://registry.npmjs.org) to the configured registry. If set to "never", then use the registry value. If set to "always", then replace the registry host with the configured host every time. -You may also specify a bare hostname (e.g., "registry.npmjs.org"). +You may also specify a bare hostname (e.g., "registry.npmjs.org") to only +replace URLs coming from that host. + +You may also specify a full URL including a path (e.g., +"https://old-registry.example.com/npm/path"). In that case, resolved URLs +whose host and path begin with that prefix will have the entire prefix +replaced with the configured registry URL (host and path), without +duplicating path segments. diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 6b5ab00269a84..db5171ff7367b 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -997,36 +997,42 @@ module.exports = cls => class Reifier extends cls { // the default reg as the magical animal that it has been. try { const resolvedURL = hgi.parseUrl(resolved) + const registryURL = new URL(this.registry) + const registryPath = registryURL.pathname.replace(/\/$/, '') - if ((this.options.replaceRegistryHost === resolvedURL.hostname) || - this.options.replaceRegistryHost === 'always') { - const registryURL = new URL(this.registry) + let matchURL = null + try { + matchURL = new URL(this.options.replaceRegistryHost) + } catch { + // keep matchURL null + } - // Replace the host with the registry host while keeping the path intact - resolvedURL.hostname = registryURL.hostname - resolvedURL.port = registryURL.port - resolvedURL.protocol = registryURL.protocol + const matchHost = matchURL?.hostname ?? this.options.replaceRegistryHost + const matchPath = matchURL?.pathname.replace(/\/$/, '') ?? null + const hasPathPrefix = (pathname, prefix) => + pathname === prefix || pathname.startsWith(`${prefix}/`) - // Make sure we don't double-include the path if it's already there - const registryPath = registryURL.pathname.replace(/\/$/, '') + const hostMatches = this.options.replaceRegistryHost === 'always' || matchHost === resolvedURL.hostname + const pathMatches = !matchPath || hasPathPrefix(resolvedURL.pathname, matchPath) - if (registryPath && registryPath !== '/') { - // Check if the resolved pathname already starts with the registry path - // We need to ensure it's a proper path prefix, not just a string prefix - // e.g., registry path '/npm' should not match '/npm-run-path' - const hasRegistryPath = resolvedURL.pathname === registryPath || - resolvedURL.pathname.startsWith(registryPath + '/') + if (!hostMatches || !pathMatches) { + return resolved + } - if (!hasRegistryPath) { - // Since hostname is changed, we need to ensure the registry path is included - resolvedURL.pathname = registryPath + resolvedURL.pathname - } - } + resolvedURL.protocol = registryURL.protocol + resolvedURL.hostname = registryURL.hostname + resolvedURL.port = registryURL.port - return resolvedURL.toString() + if (matchPath) { + // full-URL prefix: swap old path prefix for the registry path + resolvedURL.pathname = registryPath + resolvedURL.pathname.slice(matchPath.length) + } else if (registryPath && !hasPathPrefix(resolvedURL.pathname, registryPath)) { + // host-only: prepend registry path if not already present + resolvedURL.pathname = registryPath + resolvedURL.pathname } - return resolved - } catch (e) { + + return resolvedURL.toString() + } catch { // if we could not parse the url at all then returning nothing // here means it will get removed from the tree in the next step return undefined diff --git a/workspaces/arborist/test/arborist/reify.js b/workspaces/arborist/test/arborist/reify.js index bfc8fd5937a3e..64859a4b71630 100644 --- a/workspaces/arborist/test/arborist/reify.js +++ b/workspaces/arborist/test/arborist/reify.js @@ -3905,6 +3905,154 @@ t.test('should preserve exact ranges, missing actual tree', async (t) => { await t.resolves(arb.reify(), 'reify should complete successfully') }) + // Validates both URL-prefix matching modes for replace-registry-host: + // A) full URL with path → entire prefix (host + old path) is replaced with registry URL + // B) host-only URL → only the host is swapped, resolved path is left unchanged + t.test('replace-registry-host as full URL with path replaces entire prefix', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball lives under /npm/b on the old host + tarball: 'https://old.example.com/npm/b/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument lookup goes through new host + new path prefix + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev') + .reply(200, packument) + + // tarball: /npm/b prefix replaced with /npm/a — NOT /npm/a/npm/b/… + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/npm/a', + cache: resolve(testdir, 'cache'), + replaceRegistryHost: 'https://old.example.com/npm/b', + }) + + await t.resolves(arb.reify(), 'prefix is replaced without duplication') + }) + + t.test('replace-registry-host as host-only URL leaves resolved path unchanged', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball has its own path on the old host + tarball: 'https://old.example.com/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument lookup: host swapped, path unchanged + tnock(t, 'https://new.example.com') + .get('/abbrev') + .reply(200, packument) + + // tarball: host swapped only — /abbrev/-/… path is preserved as-is + tnock(t, 'https://new.example.com') + .get('/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/', + cache: resolve(testdir, 'cache'), + // trailing slash only → host-only replacement, path left unchanged + replaceRegistryHost: 'https://old.example.com/', + }) + + await t.resolves(arb.reify(), 'only host is replaced; resolved path is unchanged') + }) + + t.test('replace-registry-host as full URL with path does not replace non-matching path', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball is under /npm/b, but replaceRegistryHost specifies /npm/c + tarball: 'https://old.example.com/npm/b/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument comes from configured registry + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev') + .reply(200, packument) + + // tarball is NOT replaced because /npm/b does not start with /npm/c + tnock(t, 'https://old.example.com') + .get('/npm/b/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/npm/a', + cache: resolve(testdir, 'cache'), + replaceRegistryHost: 'https://old.example.com/npm/c', + }) + + await t.resolves(arb.reify(), 'non-matching path prefix leaves resolved URL unchanged') + }) + t.test('allowRemote=none allows registry tarball under registry path without trailing slash', async t => { const abbrevPackument5 = JSON.stringify({ _id: 'abbrev', diff --git a/workspaces/config/lib/definitions/definitions.js b/workspaces/config/lib/definitions/definitions.js index acf2d3e93a48e..2bb1713458af9 100644 --- a/workspaces/config/lib/definitions/definitions.js +++ b/workspaces/config/lib/definitions/definitions.js @@ -1930,7 +1930,7 @@ const definitions = { }), 'replace-registry-host': new Definition('replace-registry-host', { default: 'npmjs', - hint: ' | hostname', + hint: ' | hostname | url', type: ['npmjs', 'never', 'always', String], description: ` Defines behavior for replacing the registry host in a lockfile with the @@ -1941,7 +1941,14 @@ const definitions = { "never", then use the registry value. If set to "always", then replace the registry host with the configured host every time. - You may also specify a bare hostname (e.g., "registry.npmjs.org"). + You may also specify a bare hostname (e.g., "registry.npmjs.org") to only + replace URLs coming from that host. + + You may also specify a full URL including a path (e.g., + "https://old-registry.example.com/npm/path"). In that case, resolved URLs + whose host and path begin with that prefix will have the entire prefix + replaced with the configured registry URL (host and path), without + duplicating path segments. `, flatten, }),