diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cbef4f7..1fe9af60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] - ReleaseDate +### Added + +- It is now possible to retrieve part of a file using `OcflObjectVersionFile.getRange()`. https://github.com/OCFL/ocfl-java/issues/118 + ## [2.2.0] - 2024-07-15 ### Added diff --git a/ocfl-java-api/src/main/java/io/ocfl/api/OcflFileRetriever.java b/ocfl-java-api/src/main/java/io/ocfl/api/OcflFileRetriever.java index 33de48b6..5c7ed2e5 100644 --- a/ocfl-java-api/src/main/java/io/ocfl/api/OcflFileRetriever.java +++ b/ocfl-java-api/src/main/java/io/ocfl/api/OcflFileRetriever.java @@ -25,6 +25,7 @@ package io.ocfl.api; import io.ocfl.api.io.FixityCheckInputStream; +import java.io.InputStream; /** * This class is used to lazy-load object files. A new instance should be created for each file that's intended to be load. @@ -41,4 +42,17 @@ public interface OcflFileRetriever { * @return FixityCheckInputStream of the file's content */ FixityCheckInputStream retrieveFile(); + + /** + * Returns an input stream of the file's content between the specified byte range. startPosition and endPosition + * may be null, depending on the underlying implementation, and the meaning of a null value is also implementation + * dependent. + * + *
The caller is responsible for closing the stream. The input stream is buffered. + * + * @param startPosition the byte offset in the file to start reading, inclusive + * @param endPosition the byte offset in the file to stop reading, inclusive + * @return a buffered input stream containing the specified file data + */ + InputStream retrieveRange(Long startPosition, Long endPosition); } diff --git a/ocfl-java-api/src/main/java/io/ocfl/api/model/OcflObjectVersionFile.java b/ocfl-java-api/src/main/java/io/ocfl/api/model/OcflObjectVersionFile.java index 163c4d0c..7eb45c38 100644 --- a/ocfl-java-api/src/main/java/io/ocfl/api/model/OcflObjectVersionFile.java +++ b/ocfl-java-api/src/main/java/io/ocfl/api/model/OcflObjectVersionFile.java @@ -27,6 +27,7 @@ import io.ocfl.api.OcflFileRetriever; import io.ocfl.api.io.FixityCheckInputStream; import io.ocfl.api.util.Enforce; +import java.io.InputStream; import java.util.Map; /** @@ -81,6 +82,21 @@ public FixityCheckInputStream getStream() { return fileRetriever.retrieveFile(); } + /** + * Returns an input stream of the file's content between the specified byte range. startPosition and endPosition + * may be null, depending on the underlying implementation, and the meaning of a null value is also implementation + * dependent. + * + *
The caller is responsible for closing the stream. The input stream is buffered. + * + * @param startPosition the byte offset in the file to start reading, inclusive + * @param endPosition the byte offset in the file to stop reading, inclusive + * @return a buffered input stream containing the specified file data + */ + public InputStream getRange(Long startPosition, Long endPosition) { + return fileRetriever.retrieveRange(startPosition, endPosition); + } + @Override public String toString() { return "OcflObjectVersionFile{" + "fileDetails='" + fileDetails + '\'' + '}'; diff --git a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java index 10cafaaf..979623b8 100644 --- a/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java +++ b/ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java @@ -323,6 +323,32 @@ public InputStream downloadStream(String srcPath) { } } + /** + * {@inheritDoc} + */ + @Override + public InputStream downloadStreamRange(String srcPath, String range) { + var srcKey = keyBuilder.buildFromPath(srcPath); + LOG.debug("Streaming from bucket {} key {} range {}", bucket, srcKey, range); + + try { + return s3Client.getObject( + GetObjectRequest.builder() + .bucket(bucket) + .key(srcKey.getKey()) + .range(range) + .build(), + AsyncResponseTransformer.toBlockingInputStream()) + .join(); + } catch (RuntimeException e) { + var cause = OcflS3Util.unwrapCompletionEx(e); + if (wasNotFound(cause)) { + throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause); + } + throw new OcflS3Exception("Failed to download " + srcKey, cause); + } + } + /** * {@inheritDoc} */ diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java index bfda2a32..33e50b8e 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudClient.java @@ -130,6 +130,16 @@ public interface CloudClient { */ InputStream downloadStream(String srcPath); + /** + * Downloads the specified range of an object. The range string is as defined in RFC 9110. + * + * @param srcPath object key + * @param range the range to download + * @return stream of object content + * @throws KeyNotFoundException when srcPath not found + */ + InputStream downloadStreamRange(String srcPath, String range); + /** * Downloads an object to a string. This assumes that the object is UTF-8 encoded. * diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudOcflFileRetriever.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudOcflFileRetriever.java index 9017814a..62c4393a 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudOcflFileRetriever.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/cloud/CloudOcflFileRetriever.java @@ -29,6 +29,7 @@ import io.ocfl.api.model.DigestAlgorithm; import io.ocfl.api.util.Enforce; import java.io.BufferedInputStream; +import java.io.InputStream; /** * OcflFileRetriever implementation for lazy-loading files from cloud storage. @@ -71,8 +72,26 @@ public CloudOcflFileRetriever( */ @Override public FixityCheckInputStream retrieveFile() { - // TODO caching? return new FixityCheckInputStream( new BufferedInputStream(cloudClient.downloadStream(key)), digestAlgorithm, digestValue); } + + /** + * Returns an input stream of the file's content between the specified byte range. startPosition and endPosition + * may be null. When they are null, they are translated into an empty string. startPosition and endPosition are + * used to construct byte range as specified in RFC 9110. + * + *
The caller is responsible for closing the stream. The input stream is buffered. + * + * @param startPosition the byte offset in the file to start reading, inclusive + * @param endPosition the byte offset in the file to stop reading, inclusive + * @return a buffered input stream containing the specified file data + */ + @Override + public InputStream retrieveRange(Long startPosition, Long endPosition) { + var start = startPosition == null ? "" : startPosition; + var end = endPosition == null ? "" : endPosition; + var range = "bytes=" + start + "-" + end; + return new BufferedInputStream(cloudClient.downloadStreamRange(key, range)); + } } diff --git a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemOcflFileRetriever.java b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemOcflFileRetriever.java index ddd1fd44..ec5912ea 100644 --- a/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemOcflFileRetriever.java +++ b/ocfl-java-core/src/main/java/io/ocfl/core/storage/filesystem/FileSystemOcflFileRetriever.java @@ -31,6 +31,8 @@ import io.ocfl.api.util.Enforce; import java.io.BufferedInputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; import java.nio.file.Files; import java.nio.file.Path; @@ -61,4 +63,55 @@ public FixityCheckInputStream retrieveFile() { throw OcflIOException.from(e); } } + + /** + * Returns an input stream of the file's content between the specified byte range. startPosition and endPosition + * may not be null. + * + *
The caller is responsible for closing the stream. The input stream is buffered. + * + * @param startPosition the byte offset in the file to start reading, inclusive + * @param endPosition the byte offset in the file to stop reading, inclusive + * @return a buffered input stream containing the specified file data + */ + @Override + public InputStream retrieveRange(Long startPosition, Long endPosition) { + try { + var length = endPosition - startPosition + 1; + var file = new RandomAccessFile(filePath.toFile(), "r"); + if (startPosition > 0) { + file.seek(startPosition); + } + return new InputStream() { + long bytesRead = 0; + + @Override + public int read() throws IOException { + if (bytesRead >= length) { + return -1; + } + bytesRead++; + return file.read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (bytesRead >= length) { + return -1; + } + var maxLen = length - bytesRead; + var modifiedLen = Math.min(len, maxLen); + bytesRead += modifiedLen; + return file.read(b, off, (int) modifiedLen); + } + + @Override + public void close() throws IOException { + file.close(); + } + }; + } catch (IOException e) { + throw OcflIOException.from(e); + } + } } diff --git a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java index 05a1e0de..2eb71bad 100644 --- a/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java +++ b/ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java @@ -2832,6 +2832,30 @@ public void sizeBasedFixityShouldFailValidationWhenInvalid() { }); } + @Test + public void fileRangeRequests() throws IOException { + var repoName = "range"; + var repo = defaultRepo(repoName); + + var objectId = "obj1"; + + repo.updateObject(ObjectVersionId.head(objectId), null, updater -> { + updater.writeFile(ITestHelper.streamString("asdf".repeat(99_999)), "file1"); + }); + + var file1 = repo.getObject(ObjectVersionId.head(objectId)).getFile("file1"); + + try (var is = file1.getRange(0L, 3L)) { + var value = IOUtils.toString(is, StandardCharsets.UTF_8); + assertEquals("asdf", value); + } + + try (var is = file1.getRange(10_002L, 18_004L)) { + var value = IOUtils.toString(is, StandardCharsets.UTF_8); + assertEquals("df" + "asdf".repeat(2_000) + "a", value); + } + } + private Path writeFile(String content) { try { return Files.writeString(