diff --git a/CHANGELOG.md b/CHANGELOG.md index f7e5ae7b21..58a37a01bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,12 +14,27 @@ The following changes are pending, and will be applied on the next major release ## [Unreleased] -### [Patch] +### New features + +- `validateContainedResourcesAll`: In addition to the change to `getContainedResourcesAll` + described in the Bugfix section, a new function is added to the API to help detecting + incorrect containment claims. + +### Patch The following changes have been implemented but not released yet: - `getProfileAll` now also follows `rdfs:seeAlso` when discovering extended profiles. +### Bugfix + +- When listing contained resources with `getContainedResourcesAll`, resources that + are not direct child resources of the target container from a URL path semantics + perspective are no longer returned. This means `https://pod.example.org/foo/bar/moo` + cannot be considered a child resource of `https://pod.example.org/foo/`, regardless + of the `ldp:contains` statements in the container. Resources from a different + origin are also be excluded by this change. + ## [1.29.0] - 2023-05-18 ### New feature diff --git a/src/index.test.ts b/src/index.test.ts index ac2fd858e5..c1e893864b 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -46,6 +46,7 @@ import { createContainerInContainer, deleteContainer, getContainedResourceUrlAll, + validateContainedResourceAll, saveAclFor, deleteAclFor, getThing, @@ -234,6 +235,7 @@ it("exports the public API from the entry file", () => { expect(createContainerInContainer).toBeDefined(); expect(deleteContainer).toBeDefined(); expect(getContainedResourceUrlAll).toBeDefined(); + expect(validateContainedResourceAll).toBeDefined(); expect(saveAclFor).toBeDefined(); expect(deleteAclFor).toBeDefined(); expect(getThing).toBeDefined(); diff --git a/src/index.ts b/src/index.ts index 151b1c5b42..0b5e183304 100644 --- a/src/index.ts +++ b/src/index.ts @@ -51,6 +51,7 @@ export { createContainerInContainer, deleteContainer, getContainedResourceUrlAll, + validateContainedResourceAll, solidDatasetAsMarkdown, changeLogAsMarkdown, Parser, diff --git a/src/resource/solidDataset.test.ts b/src/resource/solidDataset.test.ts index bd0888b562..727b5a93a4 100644 --- a/src/resource/solidDataset.test.ts +++ b/src/resource/solidDataset.test.ts @@ -39,6 +39,7 @@ import { getContainedResourceUrlAll, responseToSolidDataset, getWellKnownSolid, + validateContainedResourceAll, } from "./solidDataset"; import type { WithChangeLog, @@ -2894,14 +2895,14 @@ describe("deleteContainer", () => { describe("getContainedResourceUrlAll", () => { const mockContainer = ( containerUrl: string, - containedResourceNames: UrlString[] + containedResourceUrls: UrlString[] ) => { let childrenIndex = createThing({ url: containerUrl }); let mockedContainer = mockContainerFrom(containerUrl); - containedResourceNames.forEach((resourceName) => { + containedResourceUrls.forEach((resourceUrl) => { let childListing = createThing({ - url: `${containerUrl + resourceName}.ttl`, + url: resourceUrl, }); childListing = addUrl(childListing, rdf.type, ldp.Resource); @@ -2917,15 +2918,88 @@ describe("getContainedResourceUrlAll", () => { it("gets all URLs for contained Resources from a Container", () => { const containerUrl = "https://arbitrary.pod/container/"; - const containedThings = ["resource1", "resource2", "resource3"]; + const containedThings = [ + "https://arbitrary.pod/container/resource1", + "https://arbitrary.pod/container/resource2/", + ]; const container = mockContainer(containerUrl, containedThings); - const expectedReturnUrls = containedThings.map( - (thingName) => `${containerUrl}${thingName}.ttl` - ); expect(getContainedResourceUrlAll(container)).toStrictEqual( - expectedReturnUrls + containedThings ); + expect(validateContainedResourceAll(container)).toStrictEqual({ + isValid: true, + invalidContainedResources: [], + }); + }); + + it("does not include non-direct children of target Container", () => { + const containerUrl = "https://arbitrary.pod/container/"; + const indirectChildren = [ + "https://arbitrary.pod/container/container/resource1/", + "https://arbitrary.pod/container//c/resource2", + "https://arbitrary.pod/resource3", + "https://other.pod/container/resource4", + ]; + expect( + getContainedResourceUrlAll(mockContainer(containerUrl, indirectChildren)) + ).toHaveLength(0); + expect( + validateContainedResourceAll( + mockContainer(containerUrl, indirectChildren) + ) + ).toStrictEqual({ + isValid: false, + invalidContainedResources: [...indirectChildren], + }); + }); + + it("does not include children having a similar URL path as the parent", () => { + expect( + getContainedResourceUrlAll( + mockContainer("http://example.org/a/", ["http://example.org/a/"]) + ) + ).toHaveLength(0); + expect( + validateContainedResourceAll( + mockContainer("http://example.org/a/", ["http://example.org/a/"]) + ) + ).toStrictEqual({ + isValid: false, + invalidContainedResources: ["http://example.org/a/"], + }); + + expect( + getContainedResourceUrlAll( + mockContainer("http://example.org/a/?q1=a/", [ + "http://example.org/a/?q1=a/a", + ]) + ) + ).toHaveLength(0); + expect( + validateContainedResourceAll( + mockContainer("http://example.org/a/?q1=a/", [ + "http://example.org/a/?q1=a/a", + ]) + ) + ).toStrictEqual({ + isValid: false, + invalidContainedResources: ["http://example.org/a/?q1=a/a"], + }); + + expect( + getContainedResourceUrlAll( + mockContainer("http://example.org/a/", ["http://example.org/a//"]) + ) + ).toHaveLength(0); + expect( + validateContainedResourceAll( + mockContainer("http://example.org/a/", ["http://example.org/a//"]) + ) + ).toStrictEqual({ + isValid: false, + invalidContainedResources: ["http://example.org/a//"], + }); }); it("returns an empty array if the Container contains no Resources", () => { @@ -2941,6 +3015,10 @@ describe("getContainedResourceUrlAll", () => { it("returns an empty array if the Container contains no index of contained Resources", () => { const dataset = mockSolidDatasetFrom("https://arbitrary.pod/dataset"); expect(getContainedResourceUrlAll(dataset)).toStrictEqual([]); + expect(validateContainedResourceAll(dataset)).toStrictEqual({ + isValid: true, + invalidContainedResources: [], + }); }); }); diff --git a/src/resource/solidDataset.ts b/src/resource/solidDataset.ts index 28885977fc..c93907bb88 100644 --- a/src/resource/solidDataset.ts +++ b/src/resource/solidDataset.ts @@ -827,10 +827,25 @@ export async function deleteContainer( } } +function isChildResource(a: string, b: string): boolean { + const parent = new URL(b); + const child = new URL(a); + // Explicitly test on the whole URL to enforce similar origins. + const isAncestor = child.href.startsWith(parent.href); + const relativePath = child.pathname + .substring(parent.pathname.length, child.pathname.length) + .replace(/(^\/)|(\/$)/g, ""); + // The child path component that isn't present in the parent should only + // potentially include slashes at the end (if it is a container). + return isAncestor && relativePath.length >= 1 && !relativePath.includes("/"); +} + /** * Given a [[SolidDataset]] representing a Container (see [[isContainer]]), fetch the URLs of all * contained resources. * If the solidDataset given is not a container, or is missing resourceInfo, throw an error. + * If the containment of some resources is invalid (see {@link validateContainedResourceAll}), + * they are not included in the result. * * @param solidDataset The container from which to fetch all contained Resource URLs. * @returns A list of URLs, each of which points to a contained Resource of the given SolidDataset. @@ -840,11 +855,54 @@ export async function deleteContainer( export function getContainedResourceUrlAll( solidDataset: SolidDataset & WithResourceInfo ): UrlString[] { - const container = getThing(solidDataset, getSourceUrl(solidDataset)); + const containerUrl = getSourceUrl(solidDataset); + const container = getThing(solidDataset, containerUrl); + if (container === null) { + return []; + } // See https://www.w3.org/TR/2015/REC-ldp-20150226/#h-ldpc-http_post: // > a containment triple MUST be added to the state of the LDPC whose subject is the LDPC URI, // > whose predicate is ldp:contains and whose object is the URI for the newly created document - return container !== null ? getIriAll(container, ldp.contains) : []; + return ( + getIriAll(container, ldp.contains) + // See https://solidproject.org/TR/protocol#resource-containment + .filter((childUrl) => isChildResource(childUrl, containerUrl)) + ); +} + +/** + * Given a {@link SolidDataset} representing a Container (see {@link isContainer}), verify that + * all its containĂ¹ent claims are valid. Containment of a resource is invalid if it doesn't + * respect slash semantics, see https://solidproject.org/TR/protocol#resource-containment for + * more details. + * + * Resources for which containment is invalid are not included in the result set returned by + * {@link getContainedResourceUrlAll}. + * + * @param solidDataset The container from which containment claims are validated. + * @returns A validation report, including the offending contained resources URL if any. + * @since unreleased + */ +export function validateContainedResourceAll( + solidDataset: SolidDataset & WithResourceInfo +): { isValid: boolean; invalidContainedResources: string[] } { + const containerUrl = getSourceUrl(solidDataset); + const container = getThing(solidDataset, containerUrl); + if (container === null) { + return { isValid: true, invalidContainedResources: [] }; + } + + // See https://www.w3.org/TR/2015/REC-ldp-20150226/#h-ldpc-http_post: + // > a containment triple MUST be added to the state of the LDPC whose subject is the LDPC URI, + // > whose predicate is ldp:contains and whose object is the URI for the newly created document + const invalidChildren = getIriAll(container, ldp.contains) + // See https://solidproject.org/TR/protocol#resource-containment + .filter((childUrl) => !isChildResource(childUrl, containerUrl)); + + if (invalidChildren.length > 0) { + return { isValid: false, invalidContainedResources: invalidChildren }; + } + return { isValid: true, invalidContainedResources: [] }; } /**