1- package ldb
1+ package leveldb
22
33// https://github.com/google/leveldb/blob/main/doc/table_format.md
44// https://github.com/google/leveldb/blob/main/doc/impl.md
55// https://github.com/google/leveldb/blob/main/doc/index.md
66
77import (
88 "bytes"
9- "encoding/binary"
10- "fmt"
9+ "embed"
1110 "hash/crc32"
1211
1312 "github.com/golang/snappy"
@@ -18,6 +17,9 @@ import (
1817 "github.com/wader/fq/pkg/scalar"
1918)
2019
20+ //go:embed leveldb_ldb.md
21+ var leveldbFS embed.FS
22+
2123func init () {
2224 interp .RegisterFormat (
2325 format .LDB ,
@@ -26,19 +28,18 @@ func init() {
2628 Groups : []* decode.Group {format .Probe },
2729 DecodeFn : ldbDecode ,
2830 })
31+ interp .RegisterFS (leveldbFS )
2932}
3033
3134const (
3235 // four varints (each max 10 bytes) + magic number (8 bytes)
3336 // https://github.com/google/leveldb/blob/main/table/format.h#L53
34- footerEncodedLength = 4 * 10 + 8
37+ footerEncodedLength = (4 * 10 + 8 ) * 8
38+ magicNumberLength = 8 * 8
3539 // leading 64 bits of
3640 // echo http://code.google.com/p/leveldb/ | sha1sum
3741 // https://github.com/google/leveldb/blob/main/table/format.h#L76
3842 tableMagicNumber = 0xdb4775248b80fb57
39- // 1-byte compression type + 4-bytes CRC
40- // https://github.com/google/leveldb/blob/main/table/format.h#L79
41- blockTrailerSize = 5
4243)
4344
4445// https://github.com/google/leveldb/blob/main/include/leveldb/options.h#L25
@@ -50,8 +51,8 @@ const (
5051
5152var compressionTypes = scalar.UintMapSymStr {
5253 compressionTypeNone : "none" ,
53- compressionTypeSnappy : "Snappy " ,
54- compressionTypeZstandard : "Zstandard " ,
54+ compressionTypeSnappy : "snappy " ,
55+ compressionTypeZstandard : "zstd " ,
5556}
5657
5758// https://github.com/google/leveldb/blob/main/db/dbformat.h#L54
@@ -70,35 +71,37 @@ func ldbDecode(d *decode.D) any {
7071
7172 // footer
7273
73- d .SeekAbs (d .Len () - footerEncodedLength * 8 )
7474 var indexOffset int64
7575 var indexSize int64
7676 var metaIndexOffset int64
7777 var metaIndexSize int64
7878
79+ d .SeekAbs (d .Len () - footerEncodedLength )
7980 d .FieldStruct ("footer" , func (d * decode.D ) {
80- d .FieldStruct ("metaindex_handle" , func (d * decode.D ) {
81- metaIndexOffset = int64 (d .FieldUintFn ("offset" , decodeVarInt ))
82- metaIndexSize = int64 (d .FieldUintFn ("size" , decodeVarInt ))
83- })
84- d .FieldStruct ("index_handle" , func (d * decode.D ) {
85- indexOffset = int64 (d .FieldUintFn ("offset" , decodeVarInt ))
86- indexSize = int64 (d .FieldUintFn ("size" , decodeVarInt ))
81+ handleLength := d .LimitedFn (footerEncodedLength , func (d * decode.D ) {
82+ d .FieldStruct ("metaindex_handle" , func (d * decode.D ) {
83+ metaIndexOffset = int64 (d .FieldULEB128 ("offset" ))
84+ metaIndexSize = int64 (d .FieldULEB128 ("size" ))
85+ })
86+ d .FieldStruct ("index_handle" , func (d * decode.D ) {
87+ indexOffset = int64 (d .FieldULEB128 ("offset" ))
88+ indexSize = int64 (d .FieldULEB128 ("size" ))
89+ })
8790 })
88- d .FieldRawLen ("padding" , d . Len () - d . Pos () - 8 * 8 )
91+ d .FieldRawLen ("padding" , footerEncodedLength - handleLength - magicNumberLength )
8992 d .FieldU64 ("magic_number" , d .UintAssert (tableMagicNumber ), scalar .UintHex )
9093 })
9194
9295 // metaindex
9396
9497 d .SeekAbs (metaIndexOffset * 8 )
9598 var metaHandles []BlockHandle
96- fieldStructBlock ("metaindex" , metaIndexSize , readKeyValueContent , func (d * decode.D ) {
99+ readBlock ("metaindex" , metaIndexSize , readKeyValueContent , func (d * decode.D ) {
97100 // BlockHandle
98101 // https://github.com/google/leveldb/blob/main/table/format.cc#L24
99102 handle := BlockHandle {
100- Offset : d .FieldUintFn ("offset" , decodeVarInt ),
101- Size : d .FieldUintFn ("size" , decodeVarInt ),
103+ Offset : d .FieldULEB128 ("offset" ),
104+ Size : d .FieldULEB128 ("size" ),
102105 }
103106 metaHandles = append (metaHandles , handle )
104107 }, d )
@@ -107,12 +110,12 @@ func ldbDecode(d *decode.D) any {
107110
108111 d .SeekAbs (indexOffset * 8 )
109112 var dataHandles []BlockHandle
110- fieldStructBlock ("index" , indexSize , readKeyValueContent , func (d * decode.D ) {
113+ readBlock ("index" , indexSize , readKeyValueContent , func (d * decode.D ) {
111114 // BlockHandle
112115 // https://github.com/google/leveldb/blob/main/table/format.cc#L24
113116 handle := BlockHandle {
114- Offset : d .FieldUintFn ("offset" , decodeVarInt ),
115- Size : d .FieldUintFn ("size" , decodeVarInt ),
117+ Offset : d .FieldULEB128 ("offset" ),
118+ Size : d .FieldULEB128 ("size" ),
116119 }
117120 dataHandles = append (dataHandles , handle )
118121 }, d )
@@ -123,7 +126,7 @@ func ldbDecode(d *decode.D) any {
123126 d .FieldArray ("meta" , func (d * decode.D ) {
124127 for _ , handle := range metaHandles {
125128 d .SeekAbs (int64 (handle .Offset ) * 8 )
126- fieldStructBlock ("meta_block" , int64 (handle .Size ), readMetaContent , nil , d )
129+ readBlock ("meta_block" , int64 (handle .Size ), readMetaContent , nil , d )
127130 }
128131 })
129132 }
@@ -134,28 +137,29 @@ func ldbDecode(d *decode.D) any {
134137 d .FieldArray ("data" , func (d * decode.D ) {
135138 for _ , handle := range dataHandles {
136139 d .SeekAbs (int64 (handle .Offset ) * 8 )
137- fieldStructBlock ("data_block" , int64 (handle .Size ), readKeyValueContent , nil , d )
140+ readBlock ("data_block" , int64 (handle .Size ), readKeyValueContent , nil , d )
138141 }
139142 })
140143 }
141144
142145 return nil
143146}
144147
145- // Helpers
148+ // Readers
146149
147- func fieldStructBlock (name string , size int64 , readBlockContent func (size int64 , valueCallbackFn func (d * decode.D ), d * decode.D ), valueCallbackFn func (d * decode.D ), d * decode.D ) * decode. D {
150+ func readBlock (name string , size int64 , readBlockContent func (size int64 , valueCallbackFn func (d * decode.D ), d * decode.D ), valueCallbackFn func (d * decode.D ), d * decode.D ) {
148151 // ReadBlock: https://github.com/google/leveldb/blob/main/table/format.cc#L69
149- return d .FieldStruct (name , func (d * decode.D ) {
152+ d .FieldStruct (name , func (d * decode.D ) {
150153 start := d .Pos ()
151154 br := d .RawLen (size * 8 )
155+ // compression (1 byte)
152156 compressionType := d .FieldU8 ("compression" , compressionTypes , scalar .UintHex )
153- // validate crc
157+ // crc (4 bytes)
154158 data := d .ReadAllBits (br )
155159 bytesToCheck := append (data , uint8 (compressionType ))
156160 maskedCRCInt := maskedCrc32 (bytesToCheck )
157161 d .FieldU32 ("crc" , d .UintAssert (uint64 (maskedCRCInt )), scalar .UintHex )
158-
162+ // decompress if needed
159163 d .SeekAbs (start )
160164 if compressionType == compressionTypeNone {
161165 d .FieldStruct ("uncompressed" , func (d * decode.D ) {
@@ -165,7 +169,6 @@ func fieldStructBlock(name string, size int64, readBlockContent func(size int64,
165169 compressedSize := size
166170 compressed := data
167171 bb := & bytes.Buffer {}
168- _ = bb
169172 switch compressionType {
170173 case compressionTypeSnappy :
171174 decompressed , err := snappy .Decode (nil , compressed )
@@ -206,17 +209,17 @@ func readKeyValueContent(size int64, valueCallbackFn func(d *decode.D), d *decod
206209 })
207210 })
208211 // TK: how do you make an empty entries-array appear _above_ the trailer?
209- // Right now, its omited if empty.
212+ // Right now, its omitted if empty.
210213 if restartOffset <= 0 {
211214 return
212215 }
213216 d .SeekAbs (start )
214217 d .FieldArray ("entries" , func (d * decode.D ) {
215218 for d .Pos () < start + restartOffset {
216219 d .FieldStruct ("entry" , func (d * decode.D ) {
217- d .FieldUintFn ("shared_bytes" , decodeVarInt )
218- unshared := int64 (d .FieldUintFn ("unshared_bytes" , decodeVarInt ))
219- valueLength := d .FieldUintFn ("value_length" , decodeVarInt )
220+ d .FieldULEB128 ("shared_bytes" )
221+ unshared := int64 (d .FieldULEB128 ("unshared_bytes" ))
222+ valueLength := d .FieldULEB128 ("value_length" )
220223 // InternalKey
221224 // https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
222225 d .FieldStruct ("key_delta" , func (d * decode.D ) {
@@ -241,39 +244,18 @@ func readMetaContent(size int64, valueCallbackFn func(d *decode.D), d *decode.D)
241244 d .FieldRawLen ("raw" , size * 8 )
242245}
243246
244- func decodeVarInt (d * decode.D ) uint64 {
245- var value uint64 = 0
246- var shift uint64 = 0
247-
248- for {
249- b := d .U8 ()
250- value |= (b & 0b01111111 ) << shift
251- shift += 7
252- if b & 0b10000000 == 0 {
253- break
254- }
255- }
247+ // Helpers
256248
257- return value
249+ func maskedCrc32 (bytes []uint8 ) uint32 {
250+ crc32C := crc32 .New (crc32 .MakeTable (crc32 .Castagnoli ))
251+ crc32C .Write (bytes )
252+ return mask (crc32C .Sum32 ())
258253}
259254
260- // Return a masked representation of the crc .
255+ // Return a masked representation of a CRC .
261256// https://github.com/google/leveldb/blob/main/util/crc32c.h#L29
262257func mask (crc uint32 ) uint32 {
263258 const kMaskDelta = 0xa282ead8
264259 // Rotate right by 15 bits and add a constant.
265260 return ((crc >> 15 ) | (crc << 17 )) + kMaskDelta
266261}
267-
268- func maskedCrc32 (bytes []uint8 ) uint32 {
269- crc32C := crc32 .New (crc32 .MakeTable (crc32 .Castagnoli ))
270- crc32C .Write (bytes )
271- return mask (crc32C .Sum32 ())
272- }
273-
274- // Print the hexadecimal representation in little-endian format.
275- func printLE (name string , value uint32 ) {
276- buf := make ([]byte , 4 )
277- binary .LittleEndian .PutUint32 (buf , value )
278- fmt .Printf ("%s: % x\n " , name , buf )
279- }
0 commit comments