diff --git a/tap-snapshots/test/lib/docs.js.test.cjs b/tap-snapshots/test/lib/docs.js.test.cjs index 686a10f794521..072d6619a95d1 100644 --- a/tap-snapshots/test/lib/docs.js.test.cjs +++ b/tap-snapshots/test/lib/docs.js.test.cjs @@ -1694,7 +1694,14 @@ registry (https://registry.npmjs.org) to the configured registry. If set to "never", then use the registry value. If set to "always", then replace the registry host with the configured host every time. -You may also specify a bare hostname (e.g., "registry.npmjs.org"). +You may also specify a bare hostname (e.g., "registry.npmjs.org") to only +replace URLs coming from that host. + +You may also specify a full URL including a path (e.g., +"https://old-registry.example.com/npm/path"). In that case, resolved URLs +whose host and path begin with that prefix will have the entire prefix +replaced with the configured registry URL (host and path), without +duplicating path segments. diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 6b5ab00269a84..db5171ff7367b 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -997,36 +997,42 @@ module.exports = cls => class Reifier extends cls { // the default reg as the magical animal that it has been. try { const resolvedURL = hgi.parseUrl(resolved) + const registryURL = new URL(this.registry) + const registryPath = registryURL.pathname.replace(/\/$/, '') - if ((this.options.replaceRegistryHost === resolvedURL.hostname) || - this.options.replaceRegistryHost === 'always') { - const registryURL = new URL(this.registry) + let matchURL = null + try { + matchURL = new URL(this.options.replaceRegistryHost) + } catch { + // keep matchURL null + } - // Replace the host with the registry host while keeping the path intact - resolvedURL.hostname = registryURL.hostname - resolvedURL.port = registryURL.port - resolvedURL.protocol = registryURL.protocol + const matchHost = matchURL?.hostname ?? this.options.replaceRegistryHost + const matchPath = matchURL?.pathname.replace(/\/$/, '') ?? null + const hasPathPrefix = (pathname, prefix) => + pathname === prefix || pathname.startsWith(`${prefix}/`) - // Make sure we don't double-include the path if it's already there - const registryPath = registryURL.pathname.replace(/\/$/, '') + const hostMatches = this.options.replaceRegistryHost === 'always' || matchHost === resolvedURL.hostname + const pathMatches = !matchPath || hasPathPrefix(resolvedURL.pathname, matchPath) - if (registryPath && registryPath !== '/') { - // Check if the resolved pathname already starts with the registry path - // We need to ensure it's a proper path prefix, not just a string prefix - // e.g., registry path '/npm' should not match '/npm-run-path' - const hasRegistryPath = resolvedURL.pathname === registryPath || - resolvedURL.pathname.startsWith(registryPath + '/') + if (!hostMatches || !pathMatches) { + return resolved + } - if (!hasRegistryPath) { - // Since hostname is changed, we need to ensure the registry path is included - resolvedURL.pathname = registryPath + resolvedURL.pathname - } - } + resolvedURL.protocol = registryURL.protocol + resolvedURL.hostname = registryURL.hostname + resolvedURL.port = registryURL.port - return resolvedURL.toString() + if (matchPath) { + // full-URL prefix: swap old path prefix for the registry path + resolvedURL.pathname = registryPath + resolvedURL.pathname.slice(matchPath.length) + } else if (registryPath && !hasPathPrefix(resolvedURL.pathname, registryPath)) { + // host-only: prepend registry path if not already present + resolvedURL.pathname = registryPath + resolvedURL.pathname } - return resolved - } catch (e) { + + return resolvedURL.toString() + } catch { // if we could not parse the url at all then returning nothing // here means it will get removed from the tree in the next step return undefined diff --git a/workspaces/arborist/test/arborist/reify.js b/workspaces/arborist/test/arborist/reify.js index bfc8fd5937a3e..64859a4b71630 100644 --- a/workspaces/arborist/test/arborist/reify.js +++ b/workspaces/arborist/test/arborist/reify.js @@ -3905,6 +3905,154 @@ t.test('should preserve exact ranges, missing actual tree', async (t) => { await t.resolves(arb.reify(), 'reify should complete successfully') }) + // Validates both URL-prefix matching modes for replace-registry-host: + // A) full URL with path → entire prefix (host + old path) is replaced with registry URL + // B) host-only URL → only the host is swapped, resolved path is left unchanged + t.test('replace-registry-host as full URL with path replaces entire prefix', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball lives under /npm/b on the old host + tarball: 'https://old.example.com/npm/b/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument lookup goes through new host + new path prefix + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev') + .reply(200, packument) + + // tarball: /npm/b prefix replaced with /npm/a — NOT /npm/a/npm/b/… + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/npm/a', + cache: resolve(testdir, 'cache'), + replaceRegistryHost: 'https://old.example.com/npm/b', + }) + + await t.resolves(arb.reify(), 'prefix is replaced without duplication') + }) + + t.test('replace-registry-host as host-only URL leaves resolved path unchanged', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball has its own path on the old host + tarball: 'https://old.example.com/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument lookup: host swapped, path unchanged + tnock(t, 'https://new.example.com') + .get('/abbrev') + .reply(200, packument) + + // tarball: host swapped only — /abbrev/-/… path is preserved as-is + tnock(t, 'https://new.example.com') + .get('/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/', + cache: resolve(testdir, 'cache'), + // trailing slash only → host-only replacement, path left unchanged + replaceRegistryHost: 'https://old.example.com/', + }) + + await t.resolves(arb.reify(), 'only host is replaced; resolved path is unchanged') + }) + + t.test('replace-registry-host as full URL with path does not replace non-matching path', async t => { + const packument = JSON.stringify({ + _id: 'abbrev', + _rev: 'lkjadflkjasdf', + name: 'abbrev', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'abbrev', + version: '1.1.1', + dist: { + // tarball is under /npm/b, but replaceRegistryHost specifies /npm/c + tarball: 'https://old.example.com/npm/b/abbrev/-/abbrev-1.1.1.tgz', + }, + }, + }, + }) + + const testdir = t.testdir({ + project: { + 'package.json': JSON.stringify({ + name: 'myproject', + version: '1.0.0', + dependencies: { abbrev: '1.1.1' }, + }), + }, + }) + + // packument comes from configured registry + tnock(t, 'https://new.example.com') + .get('/npm/a/abbrev') + .reply(200, packument) + + // tarball is NOT replaced because /npm/b does not start with /npm/c + tnock(t, 'https://old.example.com') + .get('/npm/b/abbrev/-/abbrev-1.1.1.tgz') + .reply(200, abbrevTGZ) + + const arb = new Arborist({ + path: resolve(testdir, 'project'), + registry: 'https://new.example.com/npm/a', + cache: resolve(testdir, 'cache'), + replaceRegistryHost: 'https://old.example.com/npm/c', + }) + + await t.resolves(arb.reify(), 'non-matching path prefix leaves resolved URL unchanged') + }) + t.test('allowRemote=none allows registry tarball under registry path without trailing slash', async t => { const abbrevPackument5 = JSON.stringify({ _id: 'abbrev', diff --git a/workspaces/config/lib/definitions/definitions.js b/workspaces/config/lib/definitions/definitions.js index acf2d3e93a48e..2bb1713458af9 100644 --- a/workspaces/config/lib/definitions/definitions.js +++ b/workspaces/config/lib/definitions/definitions.js @@ -1930,7 +1930,7 @@ const definitions = { }), 'replace-registry-host': new Definition('replace-registry-host', { default: 'npmjs', - hint: ' | hostname', + hint: ' | hostname | url', type: ['npmjs', 'never', 'always', String], description: ` Defines behavior for replacing the registry host in a lockfile with the @@ -1941,7 +1941,14 @@ const definitions = { "never", then use the registry value. If set to "always", then replace the registry host with the configured host every time. - You may also specify a bare hostname (e.g., "registry.npmjs.org"). + You may also specify a bare hostname (e.g., "registry.npmjs.org") to only + replace URLs coming from that host. + + You may also specify a full URL including a path (e.g., + "https://old-registry.example.com/npm/path"). In that case, resolved URLs + whose host and path begin with that prefix will have the entire prefix + replaced with the configured registry URL (host and path), without + duplicating path segments. `, flatten, }),