@@ -5,11 +5,14 @@ package ldb
55// https://github.com/google/leveldb/blob/main/doc/index.md
66
77import (
8+ "bytes"
89 "encoding/binary"
910 "fmt"
1011 "hash/crc32"
1112
13+ "github.com/golang/snappy"
1214 "github.com/wader/fq/format"
15+ "github.com/wader/fq/pkg/bitio"
1316 "github.com/wader/fq/pkg/decode"
1417 "github.com/wader/fq/pkg/interp"
1518 "github.com/wader/fq/pkg/scalar"
@@ -39,10 +42,16 @@ const (
3942)
4043
4144// https://github.com/google/leveldb/blob/main/include/leveldb/options.h#L25
45+ const (
46+ compressionTypeNone = 0x0
47+ compressionTypeSnappy = 0x1
48+ compressionTypeZstandard = 0x2
49+ )
50+
4251var compressionTypes = scalar.UintMapSymStr {
43- 0x0 : "none" ,
44- 0x1 : "Snappy" ,
45- 0x2 : "Zstandard" ,
52+ compressionTypeNone : "none" ,
53+ compressionTypeSnappy : "Snappy" ,
54+ compressionTypeZstandard : "Zstandard" ,
4655}
4756
4857// https://github.com/google/leveldb/blob/main/db/dbformat.h#L54
@@ -59,15 +68,15 @@ type BlockHandle struct {
5968func ldbDecode (d * decode.D ) any {
6069 d .Endian = decode .LittleEndian
6170
62- // Read the footer (last 48 bytes)
71+ // footer
72+
6373 d .SeekAbs (d .Len () - footerEncodedLength * 8 )
6474 var indexOffset int64
6575 var indexSize int64
6676 var metaIndexOffset int64
6777 var metaIndexSize int64
6878
6979 d .FieldStruct ("footer" , func (d * decode.D ) {
70- // Extract varints for metaindex offset and size, index offset and size
7180 d .FieldStruct ("metaindex_handle" , func (d * decode.D ) {
7281 metaIndexOffset = int64 (d .FieldUintFn ("offset" , decodeVarInt ))
7382 metaIndexSize = int64 (d .FieldUintFn ("size" , decodeVarInt ))
@@ -80,12 +89,25 @@ func ldbDecode(d *decode.D) any {
8089 d .FieldU64 ("magic_number" , d .UintAssert (tableMagicNumber ), scalar .UintHex )
8190 })
8291
92+ // metaindex
93+
8394 d .SeekAbs (metaIndexOffset * 8 )
84- fieldStructBlock ("metaindex_block" , metaIndexSize , nil , d )
95+ var metaHandles []BlockHandle
96+ fieldStructBlock ("metaindex" , metaIndexSize , readKeyValueContent , func (d * decode.D ) {
97+ // BlockHandle
98+ // https://github.com/google/leveldb/blob/main/table/format.cc#L24
99+ handle := BlockHandle {
100+ Offset : d .FieldUintFn ("offset" , decodeVarInt ),
101+ Size : d .FieldUintFn ("size" , decodeVarInt ),
102+ }
103+ metaHandles = append (metaHandles , handle )
104+ }, d )
105+
106+ // index
85107
86108 d .SeekAbs (indexOffset * 8 )
87109 var dataHandles []BlockHandle
88- fieldStructBlock ("index_block " , indexSize , func (d * decode.D ) {
110+ fieldStructBlock ("index " , indexSize , readKeyValueContent , func (d * decode.D ) {
89111 // BlockHandle
90112 // https://github.com/google/leveldb/blob/main/table/format.cc#L24
91113 handle := BlockHandle {
@@ -95,77 +117,128 @@ func ldbDecode(d *decode.D) any {
95117 dataHandles = append (dataHandles , handle )
96118 }, d )
97119
98- fmt .Println ("total handles" , len (dataHandles ))
99- d .FieldArray ("data_blocks" , func (d * decode.D ) {
100- for _ , handle := range dataHandles {
101- d .SeekAbs (int64 (handle .Offset ) * 8 )
102- fieldStructBlock ("data_block" , int64 (handle .Size ), nil , d )
103- }
104- })
120+ // meta
121+
122+ if len (metaHandles ) > 0 {
123+ d .FieldArray ("meta" , func (d * decode.D ) {
124+ for _ , handle := range metaHandles {
125+ d .SeekAbs (int64 (handle .Offset ) * 8 )
126+ fieldStructBlock ("meta_block" , int64 (handle .Size ), readMetaContent , nil , d )
127+ }
128+ })
129+ }
130+
131+ // data
132+
133+ if len (dataHandles ) > 0 {
134+ d .FieldArray ("data" , func (d * decode.D ) {
135+ for _ , handle := range dataHandles {
136+ d .SeekAbs (int64 (handle .Offset ) * 8 )
137+ fieldStructBlock ("data_block" , int64 (handle .Size ), readKeyValueContent , nil , d )
138+ }
139+ })
140+ }
105141
106142 return nil
107143}
108144
109145// Helpers
110146
111- func fieldStructBlock (name string , size int64 , valueCallbackFn func (d * decode.D ), d * decode.D ) * decode.D {
147+ func fieldStructBlock (name string , size int64 , readBlockContent func ( size int64 , valueCallbackFn func ( d * decode. D ), d * decode. D ), valueCallbackFn func (d * decode.D ), d * decode.D ) * decode.D {
112148 // ReadBlock: https://github.com/google/leveldb/blob/main/table/format.cc#L69
113- uint32Size := int64 (32 )
114- uint64Size := int64 (64 )
115149 return d .FieldStruct (name , func (d * decode.D ) {
116150 start := d .Pos ()
117151 br := d .RawLen (size * 8 )
118- end := d .Pos ()
119152 compressionType := d .FieldU8 ("compression" , compressionTypes , scalar .UintHex )
120153 // validate crc
121154 data := d .ReadAllBits (br )
122155 bytesToCheck := append (data , uint8 (compressionType ))
123156 maskedCRCInt := maskedCrc32 (bytesToCheck )
124157 d .FieldU32 ("crc" , d .UintAssert (uint64 (maskedCRCInt )), scalar .UintHex )
125- d .FieldStruct ("data" , func (d * decode.D ) {
126- // https://github.com/google/leveldb/blob/main/table/block_builder.cc#L16
127- // https://github.com/google/leveldb/blob/main/table/block.cc
128- var restartOffset int64
129- d .SeekAbs (end - uint32Size )
130- d .FieldStruct ("trailer" , func (d * decode.D ) {
131- numRestarts := int64 (d .FieldU32 ("num_restarts" ))
132- restartOffset = size * 8 - (1 + numRestarts )* uint32Size
133- d .SeekAbs (start + restartOffset )
134- d .FieldArray ("restarts" , func (d * decode.D ) {
135- for i := 0 ; i < int (numRestarts ); i ++ {
136- d .FieldU32 ("restart" )
137- }
138- })
158+
159+ d .SeekAbs (start )
160+ if compressionType == compressionTypeNone {
161+ d .FieldStruct ("uncompressed" , func (d * decode.D ) {
162+ readBlockContent (size , valueCallbackFn , d )
139163 })
140- // TK: how do you make an empty entries-array appear _above_ the trailer?
141- // Right now, its omited if empty.
142- if restartOffset <= 0 {
143- return
144- }
145- d .SeekAbs (start )
146- d .FieldArray ("entries" , func (d * decode.D ) {
147- for d .Pos () < start + restartOffset {
148- d .FieldStruct ("entry" , func (d * decode.D ) {
149- d .FieldUintFn ("shared_bytes" , decodeVarInt )
150- unshared := int64 (d .FieldUintFn ("unshared_bytes" , decodeVarInt ))
151- valueLength := d .FieldUintFn ("value_length" , decodeVarInt )
152- // InternalKey
153- // https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
154- d .FieldStruct ("key_delta" , func (d * decode.D ) {
155- d .FieldUTF8 ("user_key" , int (unshared - uint64Size / 8 ))
156- d .FieldU8 ("type" , valueTypes , scalar .UintHex )
157- d .FieldU56 ("sequence_number" )
158- })
159- if valueCallbackFn == nil {
160- d .FieldUTF8 ("value" , int (valueLength ))
161- } else {
162- d .FieldStruct ("value" , valueCallbackFn )
163- }
164- })
164+ } else {
165+ compressedSize := size
166+ compressed := data
167+ bb := & bytes.Buffer {}
168+ _ = bb
169+ switch compressionType {
170+ case compressionTypeSnappy :
171+ decompressed , err := snappy .Decode (nil , compressed )
172+ if err != nil {
173+ d .Fatalf ("failed decompressing data: %v" , err )
165174 }
175+ d .Copy (bb , bytes .NewReader (decompressed ))
176+ default :
177+ d .Fatalf ("Unsupported compression type: %x" , compressionType )
178+ }
179+ d .FieldStructRootBitBufFn ("uncompressed" , bitio .NewBitReader (bb .Bytes (), - 1 ), func (d * decode.D ) {
180+ readBlockContent (int64 (bb .Len ()), valueCallbackFn , d )
166181 })
182+ d .FieldRawLen ("compressed" , compressedSize * 8 )
183+ }
184+
185+ })
186+ }
187+
188+ func readKeyValueContent (size int64 , valueCallbackFn func (d * decode.D ), d * decode.D ) {
189+ // https://github.com/google/leveldb/blob/main/table/block_builder.cc#L16
190+ // https://github.com/google/leveldb/blob/main/table/block.cc
191+ uint32Size := int64 (32 )
192+ uint64Size := int64 (64 )
193+ start := d .Pos ()
194+ end := start + size * 8
195+
196+ var restartOffset int64
197+ d .SeekAbs (end - uint32Size )
198+ d .FieldStruct ("trailer" , func (d * decode.D ) {
199+ numRestarts := int64 (d .FieldU32 ("num_restarts" ))
200+ restartOffset = size * 8 - (1 + numRestarts )* uint32Size
201+ d .SeekAbs (start + restartOffset )
202+ d .FieldArray ("restarts" , func (d * decode.D ) {
203+ for i := 0 ; i < int (numRestarts ); i ++ {
204+ d .FieldU32 ("restart" )
205+ }
167206 })
168207 })
208+ // TK: how do you make an empty entries-array appear _above_ the trailer?
209+ // Right now, its omited if empty.
210+ if restartOffset <= 0 {
211+ return
212+ }
213+ d .SeekAbs (start )
214+ d .FieldArray ("entries" , func (d * decode.D ) {
215+ for d .Pos () < start + restartOffset {
216+ d .FieldStruct ("entry" , func (d * decode.D ) {
217+ d .FieldUintFn ("shared_bytes" , decodeVarInt )
218+ unshared := int64 (d .FieldUintFn ("unshared_bytes" , decodeVarInt ))
219+ valueLength := d .FieldUintFn ("value_length" , decodeVarInt )
220+ // InternalKey
221+ // https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
222+ d .FieldStruct ("key_delta" , func (d * decode.D ) {
223+ d .FieldUTF8 ("user_key" , int (unshared - uint64Size / 8 ))
224+ d .FieldU8 ("type" , valueTypes , scalar .UintHex )
225+ d .FieldU56 ("sequence_number" )
226+ })
227+ if valueCallbackFn == nil {
228+ d .FieldUTF8 ("value" , int (valueLength ))
229+ } else {
230+ d .FieldStruct ("value" , valueCallbackFn )
231+ }
232+ })
233+ }
234+ })
235+ }
236+
237+ func readMetaContent (size int64 , valueCallbackFn func (d * decode.D ), d * decode.D ) {
238+ // TK(2023-12-04)
239+ // https://github.com/google/leveldb/blob/main/doc/table_format.md#filter-meta-block
240+ // https://github.com/google/leveldb/blob/main/table/filter_block.cc
241+ d .FieldRawLen ("raw" , size * 8 )
169242}
170243
171244func decodeVarInt (d * decode.D ) uint64 {
0 commit comments