Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased] - ReleaseDate

### Added

- It is now possible to retrieve part of a file using `OcflObjectVersionFile.getRange()`. https://github.com/OCFL/ocfl-java/issues/118

## [2.2.0] - 2024-07-15

### Added
Expand Down
14 changes: 14 additions & 0 deletions ocfl-java-api/src/main/java/io/ocfl/api/OcflFileRetriever.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
package io.ocfl.api;

import io.ocfl.api.io.FixityCheckInputStream;
import java.io.InputStream;

/**
* This class is used to lazy-load object files. A new instance should be created for each file that's intended to be load.
Expand All @@ -41,4 +42,17 @@ public interface OcflFileRetriever {
* @return FixityCheckInputStream of the file's content
*/
FixityCheckInputStream retrieveFile();

/**
* Returns an input stream of the file's content between the specified byte range. startPosition and endPosition
* may be null, depending on the underlying implementation, and the meaning of a null value is also implementation
* dependent.
*
* <p>The caller is responsible for closing the stream. The input stream is buffered.
*
* @param startPosition the byte offset in the file to start reading, inclusive
* @param endPosition the byte offset in the file to stop reading, inclusive
* @return a buffered input stream containing the specified file data
*/
InputStream retrieveRange(Long startPosition, Long endPosition);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pwinckles I would use the primitive datatype long instead of a Long object as parameter

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't because nulls are valid

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import io.ocfl.api.OcflFileRetriever;
import io.ocfl.api.io.FixityCheckInputStream;
import io.ocfl.api.util.Enforce;
import java.io.InputStream;
import java.util.Map;

/**
Expand Down Expand Up @@ -81,6 +82,21 @@ public FixityCheckInputStream getStream() {
return fileRetriever.retrieveFile();
}

/**
* Returns an input stream of the file's content between the specified byte range. startPosition and endPosition
* may be null, depending on the underlying implementation, and the meaning of a null value is also implementation
* dependent.
*
* <p>The caller is responsible for closing the stream. The input stream is buffered.
*
* @param startPosition the byte offset in the file to start reading, inclusive
* @param endPosition the byte offset in the file to stop reading, inclusive
* @return a buffered input stream containing the specified file data
*/
public InputStream getRange(Long startPosition, Long endPosition) {
return fileRetriever.retrieveRange(startPosition, endPosition);
}

@Override
public String toString() {
return "OcflObjectVersionFile{" + "fileDetails='" + fileDetails + '\'' + '}';
Expand Down
26 changes: 26 additions & 0 deletions ocfl-java-aws/src/main/java/io/ocfl/aws/OcflS3Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,32 @@ public InputStream downloadStream(String srcPath) {
}
}

/**
* {@inheritDoc}
*/
@Override
public InputStream downloadStreamRange(String srcPath, String range) {
var srcKey = keyBuilder.buildFromPath(srcPath);
LOG.debug("Streaming from bucket {} key {} range {}", bucket, srcKey, range);

try {
return s3Client.getObject(
GetObjectRequest.builder()
.bucket(bucket)
.key(srcKey.getKey())
.range(range)
.build(),
AsyncResponseTransformer.toBlockingInputStream())
.join();
} catch (RuntimeException e) {
var cause = OcflS3Util.unwrapCompletionEx(e);
if (wasNotFound(cause)) {
throw new KeyNotFoundException("Key " + srcKey + " not found in bucket " + bucket, cause);
}
throw new OcflS3Exception("Failed to download " + srcKey, cause);
}
}

/**
* {@inheritDoc}
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ public interface CloudClient {
*/
InputStream downloadStream(String srcPath);

/**
* Downloads the specified range of an object. The range string is as defined in <a href="https://www.rfc-editor.org/rfc/rfc9110.html#name-byte-ranges">RFC 9110</a>.
*
* @param srcPath object key
* @param range the range to download
* @return stream of object content
* @throws KeyNotFoundException when srcPath not found
*/
InputStream downloadStreamRange(String srcPath, String range);

/**
* Downloads an object to a string. This assumes that the object is UTF-8 encoded.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import io.ocfl.api.model.DigestAlgorithm;
import io.ocfl.api.util.Enforce;
import java.io.BufferedInputStream;
import java.io.InputStream;

/**
* OcflFileRetriever implementation for lazy-loading files from cloud storage.
Expand Down Expand Up @@ -71,8 +72,26 @@ public CloudOcflFileRetriever(
*/
@Override
public FixityCheckInputStream retrieveFile() {
// TODO caching?
return new FixityCheckInputStream(
new BufferedInputStream(cloudClient.downloadStream(key)), digestAlgorithm, digestValue);
}

/**
* Returns an input stream of the file's content between the specified byte range. startPosition and endPosition
* may be null. When they are null, they are translated into an empty string. startPosition and endPosition are
* used to construct byte range as specified in <a href="https://www.rfc-editor.org/rfc/rfc9110.html#name-byte-ranges">RFC 9110</a>.
*
* <p>The caller is responsible for closing the stream. The input stream is buffered.
*
* @param startPosition the byte offset in the file to start reading, inclusive
* @param endPosition the byte offset in the file to stop reading, inclusive
* @return a buffered input stream containing the specified file data
*/
@Override
public InputStream retrieveRange(Long startPosition, Long endPosition) {
var start = startPosition == null ? "" : startPosition;
var end = endPosition == null ? "" : endPosition;
var range = "bytes=" + start + "-" + end;
return new BufferedInputStream(cloudClient.downloadStreamRange(key, range));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import io.ocfl.api.util.Enforce;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.file.Files;
import java.nio.file.Path;

Expand Down Expand Up @@ -61,4 +63,55 @@ public FixityCheckInputStream retrieveFile() {
throw OcflIOException.from(e);
}
}

/**
* Returns an input stream of the file's content between the specified byte range. startPosition and endPosition
* may not be null.
*
* <p>The caller is responsible for closing the stream. The input stream is buffered.
*
* @param startPosition the byte offset in the file to start reading, inclusive
* @param endPosition the byte offset in the file to stop reading, inclusive
* @return a buffered input stream containing the specified file data
*/
@Override
public InputStream retrieveRange(Long startPosition, Long endPosition) {
try {
var length = endPosition - startPosition + 1;
var file = new RandomAccessFile(filePath.toFile(), "r");
if (startPosition > 0) {
file.seek(startPosition);
}
return new InputStream() {
long bytesRead = 0;

@Override
public int read() throws IOException {
if (bytesRead >= length) {
return -1;
}
bytesRead++;
return file.read();
}

@Override
public int read(byte[] b, int off, int len) throws IOException {
if (bytesRead >= length) {
return -1;
}
var maxLen = length - bytesRead;
var modifiedLen = Math.min(len, maxLen);
bytesRead += modifiedLen;
return file.read(b, off, (int) modifiedLen);
}

@Override
public void close() throws IOException {
file.close();
}
};
} catch (IOException e) {
throw OcflIOException.from(e);
}
}
}
24 changes: 24 additions & 0 deletions ocfl-java-itest/src/test/java/io/ocfl/itest/OcflITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,30 @@ public void sizeBasedFixityShouldFailValidationWhenInvalid() {
});
}

@Test
public void fileRangeRequests() throws IOException {
var repoName = "range";
var repo = defaultRepo(repoName);

var objectId = "obj1";

repo.updateObject(ObjectVersionId.head(objectId), null, updater -> {
updater.writeFile(ITestHelper.streamString("asdf".repeat(99_999)), "file1");
});

var file1 = repo.getObject(ObjectVersionId.head(objectId)).getFile("file1");

try (var is = file1.getRange(0L, 3L)) {
var value = IOUtils.toString(is, StandardCharsets.UTF_8);
assertEquals("asdf", value);
}

try (var is = file1.getRange(10_002L, 18_004L)) {
var value = IOUtils.toString(is, StandardCharsets.UTF_8);
assertEquals("df" + "asdf".repeat(2_000) + "a", value);
}
}

private Path writeFile(String content) {
try {
return Files.writeString(
Expand Down