From 2ff34c0a936c5141add45497935fe8792f60e8f9 Mon Sep 17 00:00:00 2001 From: lind Date: Thu, 31 Oct 2024 16:51:56 -0700 Subject: [PATCH 1/3] add file descriptor support to file_data_loader Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .../executor_runner/executor_runner.cpp | 14 ++- .../executor_runner/open_file_by_fd.sh | 6 ++ extension/data_loader/file_data_loader.cpp | 70 +++++++++++++ extension/data_loader/file_data_loader.h | 18 ++++ .../test/file_data_loader_test.cpp | 97 +++++++++++++++++++ 5 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 examples/portable/executor_runner/open_file_by_fd.sh diff --git a/examples/portable/executor_runner/executor_runner.cpp b/examples/portable/executor_runner/executor_runner.cpp index 93c150c0b90..f1a2d3b8f2f 100644 --- a/examples/portable/executor_runner/executor_runner.cpp +++ b/examples/portable/executor_runner/executor_runner.cpp @@ -22,6 +22,9 @@ #include +#include +#include + #include #include #include @@ -36,6 +39,10 @@ DEFINE_string( model_path, "model.pte", "Model serialized in flatbuffer format."); +DEFINE_bool( + is_fd_uri, + false, + "True if the model_path passed is a file descriptor with the prefix \"fd:///\"."); using executorch::extension::FileDataLoader; using executorch::runtime::Error; @@ -66,7 +73,12 @@ int main(int argc, char** argv) { // DataLoaders that use mmap() or point to data that's already in memory, and // users can create their own DataLoaders to load from arbitrary sources. const char* model_path = FLAGS_model_path.c_str(); - Result loader = FileDataLoader::from(model_path); + const bool is_fd_uri = FLAGS_is_fd_uri; + + Result loader = is_fd_uri + ? FileDataLoader::fromFileDescriptorUri(model_path) + : FileDataLoader::from(model_path); + ET_CHECK_MSG( loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, diff --git a/examples/portable/executor_runner/open_file_by_fd.sh b/examples/portable/executor_runner/open_file_by_fd.sh new file mode 100644 index 00000000000..b6940934c3e --- /dev/null +++ b/examples/portable/executor_runner/open_file_by_fd.sh @@ -0,0 +1,6 @@ +#!/bin/bash +FILENAME="$1" +exec {FD}<${FILENAME} # open file for read, assign descriptor +echo "Opened ${FILENAME} for read using descriptor ${FD}" +out/host/linux-x86/bin/executor_runner --model_path fd:///${FD} --is_fd_uri=true +exec {FD}<&- # close file diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 1d097cfd989..30c8f3ffc41 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -43,6 +43,8 @@ namespace extension { namespace { +static constexpr char kFdFilesystemPrefix[] = "fd:///"; + /** * Returns true if the value is an integer power of 2. */ @@ -74,6 +76,74 @@ FileDataLoader::~FileDataLoader() { ::close(fd_); } +Result getFDFromUri(const char* file_descriptor_uri) { + // check if the uri starts with the prefix "fd://" + ET_CHECK_OR_RETURN_ERROR( + strncmp( + file_descriptor_uri, + kFdFilesystemPrefix, + strlen(kFdFilesystemPrefix)) == 0, + InvalidArgument, + "File descriptor uri (%s) does not start with %s", + file_descriptor_uri, + kFdFilesystemPrefix); + + // strip "fd:///" from the uri + int fd_len = strlen(file_descriptor_uri) - strlen(kFdFilesystemPrefix); + char fd_without_prefix[fd_len + 1]; + memcpy( + fd_without_prefix, + &file_descriptor_uri[strlen(kFdFilesystemPrefix)], + fd_len); + fd_without_prefix[fd_len] = '\0'; + + // check if remaining fd string is a valid integer + int fd = ::atoi(fd_without_prefix); + return fd; +} + +Result FileDataLoader::fromFileDescriptorUri( + const char* file_descriptor_uri, + size_t alignment) { + ET_CHECK_OR_RETURN_ERROR( + is_power_of_2(alignment), + InvalidArgument, + "Alignment %zu is not a power of 2", + alignment); + + auto parsed_fd = getFDFromUri(file_descriptor_uri); + if (!parsed_fd.ok()) { + return parsed_fd.error(); + } + + int fd = parsed_fd.get(); + + // Cache the file size. + struct stat st; + int err = ::fstat(fd, &st); + if (err < 0) { + ET_LOG( + Error, + "Could not get length of %s: %s (%d)", + file_descriptor_uri, + ::strerror(errno), + errno); + ::close(fd); + return Error::AccessFailed; + } + size_t file_size = st.st_size; + + // Copy the filename so we can print better debug messages if reads fail. + const char* file_name_copy = ::strdup(file_descriptor_uri); + if (file_name_copy == nullptr) { + ET_LOG(Error, "strdup(%s) failed", file_descriptor_uri); + ::close(fd); + return Error::MemoryAllocationFailed; + } + + return FileDataLoader(fd, file_size, alignment, file_name_copy); +} + Result FileDataLoader::from( const char* file_name, size_t alignment) { diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index 7cf2a92c4ad..15f5f29ff3c 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -26,6 +26,24 @@ namespace extension { */ class FileDataLoader final : public executorch::runtime::DataLoader { public: + /** + * Creates a new FileDataLoader that wraps the named file descriptor. + * + * @param[in] file_descriptor_uri File descriptor with the prefix "fd:///", + * followed by the file descriptor number. + * @param[in] alignment Alignment in bytes of pointers returned by this + * instance. Must be a power of two. + * + * @returns A new FileDataLoader on success. + * @retval Error::InvalidArgument `alignment` is not a power of two. + * @retval Error::AccessFailed `file_name` could not be opened, or its size + * could not be found. + * @retval Error::MemoryAllocationFailed Internal memory allocation failure. + */ + static executorch::runtime::Result fromFileDescriptorUri( + const char* file_descriptor_uri, + size_t alignment = alignof(std::max_align_t)); + /** * Creates a new FileDataLoader that wraps the named file. * diff --git a/extension/data_loader/test/file_data_loader_test.cpp b/extension/data_loader/test/file_data_loader_test.cpp index 1d4f4c16196..b8921aebb54 100644 --- a/extension/data_loader/test/file_data_loader_test.cpp +++ b/extension/data_loader/test/file_data_loader_test.cpp @@ -40,6 +40,103 @@ class FileDataLoaderTest : public ::testing::TestWithParam { } }; +TEST_P(FileDataLoaderTest, InBoundsFileDescriptorLoadsSucceed) { + // Write some heterogeneous data to a file. + uint8_t data[256]; + for (int i = 0; i < sizeof(data); ++i) { + data[i] = i; + } + TempFile tf(data, sizeof(data)); + + int fd = ::open(tf.path().c_str(), O_RDONLY); + + // Wrap it in a loader. + Result fdl = FileDataLoader::fromFileDescriptorUri( + ("fd:///" + std::to_string(fd)).c_str(), alignment()); + ASSERT_EQ(fdl.error(), Error::Ok); + + // size() should succeed and reflect the total size. + Result size = fdl->size(); + ASSERT_EQ(size.error(), Error::Ok); + EXPECT_EQ(*size, sizeof(data)); + + // Load the first bytes of the data. + { + Result fb = fdl->load( + /*offset=*/0, + /*size=*/8, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok); + EXPECT_ALIGNED(fb->data(), alignment()); + EXPECT_EQ(fb->size(), 8); + EXPECT_EQ( + 0, + std::memcmp( + fb->data(), + "\x00\x01\x02\x03" + "\x04\x05\x06\x07", + fb->size())); + + // Freeing should release the buffer and clear out the segment. + fb->Free(); + EXPECT_EQ(fb->size(), 0); + EXPECT_EQ(fb->data(), nullptr); + + // Safe to call multiple times. + fb->Free(); + } + + // Load the last few bytes of the data, a different size than the first time. + { + Result fb = fdl->load( + /*offset=*/sizeof(data) - 3, + /*size=*/3, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok); + EXPECT_ALIGNED(fb->data(), alignment()); + EXPECT_EQ(fb->size(), 3); + EXPECT_EQ(0, std::memcmp(fb->data(), "\xfd\xfe\xff", fb->size())); + } + + // Loading all of the data succeeds. + { + Result fb = fdl->load( + /*offset=*/0, + /*size=*/sizeof(data), + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok); + EXPECT_ALIGNED(fb->data(), alignment()); + EXPECT_EQ(fb->size(), sizeof(data)); + EXPECT_EQ(0, std::memcmp(fb->data(), data, fb->size())); + } + + // Loading zero-sized data succeeds, even at the end of the data. + { + Result fb = fdl->load( + /*offset=*/sizeof(data), + /*size=*/0, + DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program)); + ASSERT_EQ(fb.error(), Error::Ok); + EXPECT_EQ(fb->size(), 0); + } +} + +TEST_P(FileDataLoaderTest, FileDescriptorLoadPrefixFail) { + // Write some heterogeneous data to a file. + uint8_t data[256]; + for (int i = 0; i < sizeof(data); ++i) { + data[i] = i; + } + TempFile tf(data, sizeof(data)); + + int fd = ::open(tf.path().c_str(), O_RDONLY); + + // Wrap it in a loader. + Result fdl = FileDataLoader::fromFileDescriptorUri( + std::to_string(fd).c_str(), alignment()); + ASSERT_EQ(fdl.error(), Error::InvalidArgument); +} + TEST_P(FileDataLoaderTest, InBoundsLoadsSucceed) { // Write some heterogeneous data to a file. uint8_t data[256]; From a67fd4f83a0100e826a64905508a1dc35f9df822 Mon Sep 17 00:00:00 2001 From: lind Date: Mon, 4 Nov 2024 08:54:50 -0800 Subject: [PATCH 2/3] address comments --- examples/portable/executor_runner/open_file_by_fd.sh | 6 ------ extension/data_loader/file_data_loader.h | 5 ++++- 2 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 examples/portable/executor_runner/open_file_by_fd.sh diff --git a/examples/portable/executor_runner/open_file_by_fd.sh b/examples/portable/executor_runner/open_file_by_fd.sh deleted file mode 100644 index b6940934c3e..00000000000 --- a/examples/portable/executor_runner/open_file_by_fd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -FILENAME="$1" -exec {FD}<${FILENAME} # open file for read, assign descriptor -echo "Opened ${FILENAME} for read using descriptor ${FD}" -out/host/linux-x86/bin/executor_runner --model_path fd:///${FD} --is_fd_uri=true -exec {FD}<&- # close file diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index 15f5f29ff3c..4006d435d34 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -27,7 +27,10 @@ namespace extension { class FileDataLoader final : public executorch::runtime::DataLoader { public: /** - * Creates a new FileDataLoader that wraps the named file descriptor. + * Creates a new FileDataLoader that wraps the named file descriptor, and the + * ownership of the file descriptor is passed. This helper is used when ET is + * running in a process that does not have access to the filesystem, and the + * caller is able to open the file and pass the file descriptor. * * @param[in] file_descriptor_uri File descriptor with the prefix "fd:///", * followed by the file descriptor number. From 83f3ff907e6861661d1d2997333b4a4482a3b7c8 Mon Sep 17 00:00:00 2001 From: lind Date: Mon, 4 Nov 2024 14:46:50 -0800 Subject: [PATCH 3/3] centralize logic --- extension/data_loader/file_data_loader.cpp | 68 +++++++++------------- extension/data_loader/file_data_loader.h | 5 ++ 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 30c8f3ffc41..0324751bfa4 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -102,22 +102,10 @@ Result getFDFromUri(const char* file_descriptor_uri) { return fd; } -Result FileDataLoader::fromFileDescriptorUri( - const char* file_descriptor_uri, +Result FileDataLoader::fromFileDescriptor( + const char* file_name, + const int fd, size_t alignment) { - ET_CHECK_OR_RETURN_ERROR( - is_power_of_2(alignment), - InvalidArgument, - "Alignment %zu is not a power of 2", - alignment); - - auto parsed_fd = getFDFromUri(file_descriptor_uri); - if (!parsed_fd.ok()) { - return parsed_fd.error(); - } - - int fd = parsed_fd.get(); - // Cache the file size. struct stat st; int err = ::fstat(fd, &st); @@ -125,7 +113,7 @@ Result FileDataLoader::fromFileDescriptorUri( ET_LOG( Error, "Could not get length of %s: %s (%d)", - file_descriptor_uri, + file_name, ::strerror(errno), errno); ::close(fd); @@ -134,9 +122,9 @@ Result FileDataLoader::fromFileDescriptorUri( size_t file_size = st.st_size; // Copy the filename so we can print better debug messages if reads fail. - const char* file_name_copy = ::strdup(file_descriptor_uri); + const char* file_name_copy = ::strdup(file_name); if (file_name_copy == nullptr) { - ET_LOG(Error, "strdup(%s) failed", file_descriptor_uri); + ET_LOG(Error, "strdup(%s) failed", file_name); ::close(fd); return Error::MemoryAllocationFailed; } @@ -144,6 +132,25 @@ Result FileDataLoader::fromFileDescriptorUri( return FileDataLoader(fd, file_size, alignment, file_name_copy); } +Result FileDataLoader::fromFileDescriptorUri( + const char* file_descriptor_uri, + size_t alignment) { + ET_CHECK_OR_RETURN_ERROR( + is_power_of_2(alignment), + InvalidArgument, + "Alignment %zu is not a power of 2", + alignment); + + auto parsed_fd = getFDFromUri(file_descriptor_uri); + if (!parsed_fd.ok()) { + return parsed_fd.error(); + } + + int fd = parsed_fd.get(); + + return fromFileDescriptor(file_descriptor_uri, fd, alignment); +} + Result FileDataLoader::from( const char* file_name, size_t alignment) { @@ -163,30 +170,7 @@ Result FileDataLoader::from( return Error::AccessFailed; } - // Cache the file size. - struct stat st; - int err = ::fstat(fd, &st); - if (err < 0) { - ET_LOG( - Error, - "Could not get length of %s: %s (%d)", - file_name, - ::strerror(errno), - errno); - ::close(fd); - return Error::AccessFailed; - } - size_t file_size = st.st_size; - - // Copy the filename so we can print better debug messages if reads fail. - const char* file_name_copy = ::strdup(file_name); - if (file_name_copy == nullptr) { - ET_LOG(Error, "strdup(%s) failed", file_name); - ::close(fd); - return Error::MemoryAllocationFailed; - } - - return FileDataLoader(fd, file_size, alignment, file_name_copy); + return fromFileDescriptor(file_name, fd, alignment); } namespace { diff --git a/extension/data_loader/file_data_loader.h b/extension/data_loader/file_data_loader.h index 4006d435d34..959684137b8 100644 --- a/extension/data_loader/file_data_loader.h +++ b/extension/data_loader/file_data_loader.h @@ -100,6 +100,11 @@ class FileDataLoader final : public executorch::runtime::DataLoader { void* buffer) const override; private: + static executorch::runtime::Result fromFileDescriptor( + const char* file_name, + const int fd, + size_t alignment = alignof(std::max_align_t)); + FileDataLoader( int fd, size_t file_size,