Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions tree/ntuple/inc/ROOT/RPageStorage.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,16 @@ The page source also gives access to the ntuple's metadata.
*/
// clang-format on
class RPageSource : public RPageStorage {
protected:
/// Summarizes meta-data is necessary to load a certain page. Used by LoadPageImpl().
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Summarizes meta-data is necessary to load a certain page. Used by LoadPageImpl().
/// Summarizes meta-data that is necessary to load a certain page. Used by LoadPageImpl().

? or just meta-data necessary?

struct RPageSummary {
ROOT::DescriptorId_t fClusterId = 0;
/// The first element number of the page's column in the given cluster
std::uint64_t fColumnOffset = 0;
/// Location of the page on disk
ROOT::RClusterDescriptor::RPageInfoExtended fPageInfo;
};

public:
/// Used in SetEntryRange / GetEntryRange
struct REntryRange {
Expand Down Expand Up @@ -640,6 +650,9 @@ private:
/// Must not be called when the descriptor guard is taken.
void UpdateLastUsedCluster(ROOT::DescriptorId_t clusterId);

// Common treatment of zero pages that would otherwise needed to be handled in LoadPageImpl()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Common treatment of zero pages that would otherwise needed to be handled in LoadPageImpl()
// Common treatment of zero pages that would otherwise need to be handled in LoadPageImpl()

ROOT::Internal::RPageRef LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary);

protected:
/// Default I/O performance counters that get registered in `fMetrics`
struct RCounters {
Expand Down Expand Up @@ -693,16 +706,6 @@ protected:
}
};

/// Summarizes cluster-level information that are necessary to load a certain page.
/// Used by LoadPageImpl().
struct RClusterInfo {
ROOT::DescriptorId_t fClusterId = 0;
/// Location of the page on disk
ROOT::RClusterDescriptor::RPageInfoExtended fPageInfo;
/// The first element number of the page's column in the given cluster
std::uint64_t fColumnOffset = 0;
};

std::unique_ptr<RCounters> fCounters;

ROOT::RNTupleReadOptions fOptions;
Expand All @@ -726,9 +729,11 @@ protected:
virtual std::unique_ptr<RPageSource> CloneImpl() const = 0;
// Only called if a task scheduler is set. No-op be default.
virtual void UnzipClusterImpl(ROOT::Internal::RCluster *cluster);
// Returns a page from storage if not found in the page pool. Should be able to handle zero page locators.
virtual ROOT::Internal::RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) = 0;
// Returns a page from storage if not found in the page pool. Will never receive requests for zero pages.
virtual ROOT::Internal::RPage LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) = 0;
// Returns a sealed page from storage without adding it to the page pool. The sealed pages buffer and buffer size
// is already initialized.
virtual void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) = 0;

/// Prepare a page range read for the column set in `clusterKey`. Specifically, pages referencing the
/// `kTypePageZero` locator are filled in `pageZeroMap`; otherwise, `perPageFunc` is called for each page. This is
Expand Down Expand Up @@ -816,8 +821,7 @@ public:
/// The `fSize` and `fNElements` member of the sealedPage parameters are always set. If `sealedPage.fBuffer` is
/// `nullptr`, no data will be copied but the returned size information can be used by the caller to allocate a large
/// enough buffer and call `LoadSealedPage` again.
virtual void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) = 0;
void LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage);

/// Populates all the pages of the given cluster ids and columns; it is possible that some columns do not
/// contain any pages. The page source may load more columns than the minimal necessary set from `columns`.
Expand Down
7 changes: 2 additions & 5 deletions tree/ntuple/inc/ROOT/RPageStorageDaos.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ private:

ROOT::Internal::RNTupleDescriptorBuilder fDescriptorBuilder;

ROOT::Internal::RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
ROOT::Internal::RPage LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final;
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;

protected:
void LoadStructureImpl() final {}
Expand All @@ -172,9 +172,6 @@ public:
RPageSourceDaos(std::string_view ntupleName, std::string_view uri, const ROOT::RNTupleReadOptions &options);
~RPageSourceDaos() override;

void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;

Expand Down
7 changes: 2 additions & 5 deletions tree/ntuple/inc/ROOT/RPageStorageFile.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ protected:
/// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
std::unique_ptr<RPageSource> CloneImpl() const final;

RPageRef
LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
RPage LoadPageImpl(ColumnHandle_t columnHandle, const RPageSummary &pageSummary) final;
void LoadSealedPageImpl(const RNTupleLocator &locator, RSealedPage &sealedPage) final;

public:
RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
Expand All @@ -201,9 +201,6 @@ public:
std::unique_ptr<RPageSource> OpenWithDifferentAnchor(const ROOT::Internal::RNTupleLink &anchorLink,
const ROOT::RNTupleReadOptions &options = {}) final;

void
LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;

std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;

Expand Down
103 changes: 77 additions & 26 deletions tree/ntuple/src/RPageStorage.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -392,54 +392,101 @@ void ROOT::Internal::RPageSource::UpdateLastUsedCluster(ROOT::DescriptorId_t clu
fLastUsedCluster = clusterId;
}

void ROOT::Internal::RPageSource::LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex,
RSealedPage &sealedPage)
{
const auto clusterId = localIndex.GetClusterId();

ROOT::RClusterDescriptor::RPageInfo pageInfo;
{
auto descriptorGuard = GetSharedDescriptorGuard();
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
pageInfo = clusterDescriptor.GetPageRange(physicalColumnId).Find(localIndex.GetIndexInCluster());
}

sealedPage.SetBufferSize(pageInfo.GetLocator().GetNBytesOnStorage() + pageInfo.HasChecksum() * kNBytesPageChecksum);
sealedPage.SetNElements(pageInfo.GetNElements());
sealedPage.SetHasChecksum(pageInfo.HasChecksum());

if (!sealedPage.GetBuffer())
return;

if (pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
assert(!pageInfo.HasChecksum());
memcpy(const_cast<void *>(sealedPage.GetBuffer()), ROOT::Internal::RPage::GetPageZeroBuffer(),
sealedPage.GetBufferSize());
return;
}

LoadSealedPageImpl(pageInfo.GetLocator(), sealedPage);
sealedPage.VerifyChecksumIfEnabled().ThrowOnError();
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadZeroPage(ColumnHandle_t columnHandle, const RPageSummary &pageSummary)
{
const auto &pageInfo = pageSummary.fPageInfo;
assert(pageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero);

const auto element = columnHandle.fColumn->GetElement();
const auto elementSize = element->GetSize();
const auto elementInMemoryType = element->GetIdentifier().fInMemoryType;

auto pageZero = fPageAllocator->NewPage(elementSize, pageInfo.GetNElements());
pageZero.GrowUnchecked(pageInfo.GetNElements());
std::memset(pageZero.GetBuffer(), 0, pageZero.GetNBytes());
pageZero.SetWindow(pageSummary.fColumnOffset + pageInfo.GetFirstElementIndex(),
RPage::RClusterInfo(pageSummary.fClusterId, pageSummary.fColumnOffset));
return fPagePool.RegisterPage(std::move(pageZero), RPagePool::RKey{columnHandle.fPhysicalId, elementInMemoryType});
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, ROOT::NTupleSize_t globalIndex)
{
const auto columnId = columnHandle.fPhysicalId;
const auto columnElementId = columnHandle.fColumn->GetElement()->GetIdentifier();
auto cachedPageRef =
fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, columnElementId.fInMemoryType}, globalIndex);
const auto elementInMemoryType = columnHandle.fColumn->GetElement()->GetIdentifier().fInMemoryType;
auto cachedPageRef = fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}, globalIndex);
if (!cachedPageRef.Get().IsNull()) {
UpdateLastUsedCluster(cachedPageRef.Get().GetClusterInfo().GetId());
return cachedPageRef;
}

std::uint64_t idxInCluster;
RClusterInfo clusterInfo;
RPageSummary pageSummary;
{
auto descriptorGuard = GetSharedDescriptorGuard();
clusterInfo.fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);
pageSummary.fClusterId = descriptorGuard->FindClusterId(columnId, globalIndex);

if (clusterInfo.fClusterId == ROOT::kInvalidDescriptorId)
if (pageSummary.fClusterId == ROOT::kInvalidDescriptorId)
throw RException(R__FAIL("entry with index " + std::to_string(globalIndex) + " out of bounds"));

const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterInfo.fClusterId);
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(pageSummary.fClusterId);
const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
if (columnRange.IsSuppressed())
return ROOT::Internal::RPageRef();

clusterInfo.fColumnOffset = columnRange.GetFirstElementIndex();
R__ASSERT(clusterInfo.fColumnOffset <= globalIndex);
idxInCluster = globalIndex - clusterInfo.fColumnOffset;
clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
R__ASSERT(pageSummary.fColumnOffset <= globalIndex);
pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(globalIndex - pageSummary.fColumnOffset);
}

if (clusterInfo.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown)
if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) {
throw RException(R__FAIL("tried to read a page with an unknown locator"));
} else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
return LoadZeroPage(columnHandle, pageSummary);
}

UpdateLastUsedCluster(clusterInfo.fClusterId);
return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
UpdateLastUsedCluster(pageSummary.fClusterId);
return fPagePool.RegisterPage(LoadPageImpl(columnHandle, pageSummary),
RPagePool::RKey{columnId, elementInMemoryType});
}

ROOT::Internal::RPageRef
ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, RNTupleLocalIndex localIndex)
{
const auto clusterId = localIndex.GetClusterId();
const auto idxInCluster = localIndex.GetIndexInCluster();
const auto columnId = columnHandle.fPhysicalId;
const auto columnElementId = columnHandle.fColumn->GetElement()->GetIdentifier();
auto cachedPageRef =
fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, columnElementId.fInMemoryType}, localIndex);
const auto elementInMemoryType = columnHandle.fColumn->GetElement()->GetIdentifier().fInMemoryType;
auto cachedPageRef = fPagePool.GetPage(ROOT::Internal::RPagePool::RKey{columnId, elementInMemoryType}, localIndex);
if (!cachedPageRef.Get().IsNull()) {
UpdateLastUsedCluster(clusterId);
return cachedPageRef;
Expand All @@ -448,24 +495,28 @@ ROOT::Internal::RPageSource::LoadPage(ColumnHandle_t columnHandle, RNTupleLocalI
if (clusterId == kInvalidDescriptorId)
throw RException(R__FAIL("entry out of bounds"));

RClusterInfo clusterInfo;
RPageSummary pageSummary;
{
auto descriptorGuard = GetSharedDescriptorGuard();
const auto &clusterDescriptor = descriptorGuard->GetClusterDescriptor(clusterId);
const auto &columnRange = clusterDescriptor.GetColumnRange(columnId);
if (columnRange.IsSuppressed())
return ROOT::Internal::RPageRef();

clusterInfo.fClusterId = clusterId;
clusterInfo.fColumnOffset = columnRange.GetFirstElementIndex();
clusterInfo.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(idxInCluster);
pageSummary.fClusterId = clusterId;
pageSummary.fColumnOffset = columnRange.GetFirstElementIndex();
pageSummary.fPageInfo = clusterDescriptor.GetPageRange(columnId).Find(localIndex.GetIndexInCluster());
}

if (clusterInfo.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown)
if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypeUnknown) {
throw RException(R__FAIL("tried to read a page with an unknown locator"));
} else if (pageSummary.fPageInfo.GetLocator().GetType() == RNTupleLocator::kTypePageZero) {
return LoadZeroPage(columnHandle, pageSummary);
}

UpdateLastUsedCluster(clusterInfo.fClusterId);
return LoadPageImpl(columnHandle, clusterInfo, idxInCluster);
UpdateLastUsedCluster(clusterId);
return fPagePool.RegisterPage(LoadPageImpl(columnHandle, pageSummary),
RPagePool::RKey{columnId, elementInMemoryType});
}

void ROOT::Internal::RPageSource::EnableDefaultMetrics(const std::string &prefix)
Expand Down
Loading
Loading