Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion docs/src/format/table/layout.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ A Lance dataset in its basic form stores all files within the dataset root direc
data/
*.lance -- Data files containing column data
_versions/
*.manifest -- Manifest files (one per version)
*.manifest -- Manifest files (one per version)
latest_version_hint.json -- Optional hint of the latest version (see below)
_transactions/
*.txn -- Transaction files for commit coordination
_deletions/
Expand Down Expand Up @@ -201,3 +202,15 @@ Manifest files are stored in the `_versions/` directory with naming schemes that

See [Manifest Naming Schemes](transaction.md#manifest-naming-schemes) for details on the V1 and V2 patterns and their implications for version discovery.

### Version Hint

The optional file `_versions/latest_version_hint.json` records the latest committed version as JSON:

```json
{"version": 42}
```

It exists to accelerate latest-version discovery on stores where listing `_versions/` is expensive: a reader can read the hint and probe higher versions with HEAD requests instead of listing the whole directory, falling back to a full listing if the hint is missing or stale.

The hint is purely an optimization. It is always safe to delete, never affects correctness, and can be ignored by readers that don't understand it. Writers may choose not to write it.

7 changes: 6 additions & 1 deletion java/src/test/java/org/lance/DatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,12 @@ void testOpenSerializedManifest(@TempDir Path tempDir) throws IOException {
assertEquals(1, dataset1.version());
Path manifestPath = datasetPath.resolve("_versions");
try (Stream<Path> fileStream = Files.list(manifestPath)) {
assertEquals(1, fileStream.count());
// Ignore the version hint file, which is not a manifest.
assertEquals(
1,
fileStream
.filter(p -> !p.getFileName().toString().startsWith("latest_version_hint"))
.count());
ByteBuffer manifestBuffer = readManifest(manifestPath.resolve("1.manifest"));
try (Dataset dataset2 = testDataset.write(1, 5)) {
assertEquals(2, dataset2.version());
Expand Down
17 changes: 13 additions & 4 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,18 +436,27 @@ def test_has_stable_row_ids_property(tmp_path: Path):
assert lance.dataset(non_stable_path).has_stable_row_ids is False


def _list_manifests(versions_dir):
# Ignore the version hint file, which is not a manifest.
return [
name
for name in os.listdir(versions_dir)
if not name.startswith("latest_version_hint")
]


def test_v2_manifest_paths(tmp_path: Path):
lance.write_dataset(
pa.table({"a": range(100)}), tmp_path, enable_v2_manifest_paths=True
)
manifest_path = os.listdir(tmp_path / "_versions")
manifest_path = _list_manifests(tmp_path / "_versions")
assert len(manifest_path) == 1
assert re.match(r"\d{20}\.manifest", manifest_path[0])


def test_default_v2_manifest_paths(tmp_path: Path):
lance.write_dataset(pa.table({"a": range(100)}), tmp_path)
manifest_path = os.listdir(tmp_path / "_versions")
manifest_path = _list_manifests(tmp_path / "_versions")
assert len(manifest_path) == 1
assert re.match(r"\d{20}\.manifest", manifest_path[0])

Expand All @@ -457,12 +466,12 @@ def test_v2_manifest_paths_migration(tmp_path: Path):
lance.write_dataset(
pa.table({"a": range(100)}), tmp_path, enable_v2_manifest_paths=False
)
manifest_path = os.listdir(tmp_path / "_versions")
manifest_path = _list_manifests(tmp_path / "_versions")
assert manifest_path == ["1.manifest"]

# Migrate to v2 manifest paths
lance.dataset(tmp_path).migrate_manifest_paths_v2()
manifest_path = os.listdir(tmp_path / "_versions")
manifest_path = _list_manifests(tmp_path / "_versions")
assert len(manifest_path) == 1
assert re.match(r"\d{20}\.manifest", manifest_path[0])

Expand Down
17 changes: 9 additions & 8 deletions rust/lance-namespace-impls/src/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9546,8 +9546,9 @@ mod tests {
.await
.unwrap();

// table_exists first checks __manifest (one list on __manifest/_versions),
// then falls back to the table directory (one list_with_delimiter on test_table.lance).
// table_exists first checks __manifest (which on local FS uses the
// version hint and does no list call), then falls back to the table
// directory (one list_with_delimiter on test_table.lance).
listing_count.store(0, Ordering::SeqCst);

let mut exists_req = TableExistsRequest::new();
Expand All @@ -9556,9 +9557,9 @@ mod tests {

let count = listing_count.load(Ordering::SeqCst);
assert_eq!(
count, 2,
"Expected exactly 2 listing calls for table_exists with migration mode \
(manifest reload + table directory fallback), but got {}",
count, 1,
"Expected exactly 1 listing call for table_exists with migration mode \
(table directory fallback; manifest reload uses the version hint), but got {}",
count
);

Expand All @@ -9571,9 +9572,9 @@ mod tests {

let count = listing_count.load(Ordering::SeqCst);
assert_eq!(
count, 2,
"Expected exactly 2 listing calls for describe_table with migration mode \
(manifest reload + table directory fallback), but got {}",
count, 1,
"Expected exactly 1 listing call for describe_table with migration mode \
(table directory fallback; manifest reload uses the version hint), but got {}",
count
);
}
Expand Down
Loading
Loading