Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions c++/src/Reader.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -884,14 +884,21 @@
void ReaderImpl::readMetadata() const {
uint64_t metadataSize = contents_->postscript->metadata_length();
uint64_t footerLength = contents_->postscript->footer_length();
if (fileLength_ < metadataSize + footerLength + postscriptLength_ + 1) {

// Check for overflow in length calculations
uint64_t totalTail;
if (__builtin_add_overflow(footerLength, metadataSize, &totalTail) ||
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not work for Windows unfortunately

__builtin_add_overflow(totalTail, postscriptLength_, &totalTail) ||
__builtin_add_overflow(totalTail, 1ULL, &totalTail) ||
totalTail > fileLength_) {
std::stringstream msg;
msg << "Invalid Metadata length: fileLength=" << fileLength_
<< ", metadataLength=" << metadataSize << ", footerLength=" << footerLength
<< ", postscriptLength=" << postscriptLength_;
throw ParseError(msg.str());
}
uint64_t metadataStart = fileLength_ - metadataSize - footerLength - postscriptLength_ - 1;

uint64_t metadataStart = fileLength_ - totalTail;
if (metadataSize != 0) {
std::unique_ptr<SeekableInputStream> pbStream = createDecompressor(
contents_->compression,
Expand Down Expand Up @@ -1226,16 +1233,25 @@
do {
currentStripeInfo_ = footer_->stripes(static_cast<int>(currentStripe_));
uint64_t fileLength = contents_->stream->getLength();
if (currentStripeInfo_.offset() + currentStripeInfo_.index_length() +
currentStripeInfo_.data_length() + currentStripeInfo_.footer_length() >=
fileLength) {

uint64_t stripeOffset = currentStripeInfo_.offset();
uint64_t indexLength = currentStripeInfo_.index_length();
uint64_t dataLength = currentStripeInfo_.data_length();
uint64_t footerLength = currentStripeInfo_.footer_length();

// Check for overflow and bounds validity
uint64_t stripeTotalLength;
if (__builtin_add_overflow(indexLength, dataLength, &stripeTotalLength) ||
__builtin_add_overflow(stripeTotalLength, footerLength, &stripeTotalLength) ||
__builtin_add_overflow(stripeOffset, stripeTotalLength, &stripeTotalLength) ||
stripeTotalLength >= fileLength) {
std::stringstream msg;
msg << "Malformed StripeInformation at stripe index " << currentStripe_
<< ": fileLength=" << fileLength
<< ", StripeInfo=(offset=" << currentStripeInfo_.offset()
<< ", indexLength=" << currentStripeInfo_.index_length()
<< ", dataLength=" << currentStripeInfo_.data_length()
<< ", footerLength=" << currentStripeInfo_.footer_length() << ")";
<< ", StripeInfo=(offset=" << stripeOffset
<< ", indexLength=" << indexLength
<< ", dataLength=" << dataLength
<< ", footerLength=" << footerLength << ")";
throw ParseError(msg.str());
}
rowsInCurrentStripe_ = currentStripeInfo_.number_of_rows();
Expand Down Expand Up @@ -1635,10 +1651,16 @@
postscriptLength = buffer->data()[readSize - 1] & 0xff;
contents->postscript = readPostscript(stream.get(), buffer.get(), postscriptLength);
uint64_t footerSize = contents->postscript->footer_length();
uint64_t tailSize = 1 + postscriptLength + footerSize;
if (tailSize >= fileLength) {

// Check for overflow before calculating tailSize
uint64_t tailSize;
if (__builtin_add_overflow(1ULL, postscriptLength, &tailSize) ||
__builtin_add_overflow(tailSize, footerSize, &tailSize) ||
tailSize >= fileLength) {
std::stringstream msg;
msg << "Invalid ORC tailSize=" << tailSize << ", fileLength=" << fileLength;
msg << "Invalid tail size: footerSize=" << footerSize
<< ", postscriptLength=" << postscriptLength
<< ", fileLength=" << fileLength;
throw ParseError(msg.str());
}
uint64_t footerOffset;
Expand Down
113 changes: 113 additions & 0 deletions c++/test/TestReader.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -1240,4 +1240,117 @@
EXPECT_LT(largeLimitIOCount, smallLimitIOCount);
}

/**
* Test that reading a malformed ORC file with extremely large footer_length
* throws ParseError instead of causing integer overflow or crash.
* This tests the fix for ORC-2167.
*/
TEST(TestReader, testMalformedFooterLengthOverflow) {
// Construct a minimal malformed ORC file with footer_length = UINT64_MAX
// ORC file tail structure:
// - Footer (not included here, will be invalid anyway)
// - PostScript (protobuf encoded)
// - 1 byte: postscript length
//
// PostScript protobuf fields:
// - field 1: footer_length (varint) - we set this to UINT64_MAX
// - field 8000: magic "ORC"
//
// Protobuf encoding for footer_length = UINT64_MAX:
// - tag = (field_number << 3 | wire_type) = (1 << 3 | 0) = 0x08
// - value = varint(UINT64_MAX) = 0xFF,FF,FF,FF,FF,FF,FF,FF,FF,01 (10 bytes)
//
// Protobuf encoding for magic "ORC":
// - field 8000 is a special field, but simpler to include "ORC" at end

// Create a minimal byte sequence simulating malicious file
// We need at least: PostScript + postscriptLength byte
// PostScript: footer_length(UINT64_MAX) encoded as protobuf varint

std::vector<char> maliciousData;

// Add some dummy footer bytes (not valid, but we're testing bounds check)
for (int i = 0; i < 20; i++) {
maliciousData.push_back(0x00);
}

// PostScript with footer_length = UINT64_MAX
// tag (field 1, varint): 0x08
maliciousData.push_back(0x08);
// value: UINT64_MAX as varint (10 bytes)
for (int i = 0; i < 9; i++) {
maliciousData.push_back(0xFF);
}
maliciousData.push_back(0x01);

// Add compression field (field 2) - NONE = 0
maliciousData.push_back(0x10); // tag: field 2, wire type 0
maliciousData.push_back(0x00); // value: compression NONE

// Add magic "ORC" at the end (this is a simplified approach)
maliciousData.push_back('O');
maliciousData.push_back('R');
maliciousData.push_back('C');

// PostScript length byte - points to all bytes before it (excluding magic)
// This is a simplified structure; real ORC has magic inside PostScript
uint8_t postscriptLen = 14; // footer_length encoding + compression encoding
maliciousData.push_back(static_cast<char>(postscriptLen));

// Create reader and expect ParseError
auto stream = std::make_unique<MemoryInputStream>(
maliciousData.data(), maliciousData.size());

ReaderOptions options;
EXPECT_THROW(createReader(std::move(stream), options), ParseError);
}

/**
* Test that reading a malformed ORC file with extremely large metadata_length
* throws ParseError instead of causing integer overflow.
*/
TEST(TestReader, testMalformedMetadataLengthOverflow) {
// Similar to testMalformedFooterLengthOverflow but with metadata_length overflow
// PostScript field 3 is metadata_length (actually it's field 4 in proto)
// Let's create a file with both footer_length and metadata_length set to large values

std::vector<char> maliciousData;

// Add some dummy bytes
for (int i = 0; i < 20; i++) {
maliciousData.push_back(0x00);
}

// PostScript:
// footer_length = 100 (reasonable small value)
maliciousData.push_back(0x08); // tag: field 1
maliciousData.push_back(0x64); // value: 100

// metadata_length = UINT64_MAX (field 4)
maliciousData.push_back(0x20); // tag: field 4, wire type 0 (4 << 3 | 0 = 32 = 0x20)
for (int i = 0; i < 9; i++) {
maliciousData.push_back(0xFF);
}
maliciousData.push_back(0x01);

// compression = NONE
maliciousData.push_back(0x10);
maliciousData.push_back(0x00);

// magic "ORC"
maliciousData.push_back('O');
maliciousData.push_back('R');
maliciousData.push_back('C');

// postscript length
uint8_t postscriptLen = 16;
maliciousData.push_back(static_cast<char>(postscriptLen));

auto stream = std::make_unique<MemoryInputStream>(
maliciousData.data(), maliciousData.size());

ReaderOptions options;
EXPECT_THROW(createReader(std::move(stream), options), ParseError);
}

} // namespace orc
Loading