Skip to content
/ fq Public
forked from wader/fq

Commit b05aa99

Browse files
committed
leveldb: address PR comments
1 parent efc59a8 commit b05aa99

27 files changed

+219
-72
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ ipv6_packet,
102102
jpeg,
103103
json,
104104
jsonl,
105+
[leveldb_ldb](doc/formats.md#leveldb_ldb),
105106
[luajit](doc/formats.md#luajit),
106107
[macho](doc/formats.md#macho),
107108
macho_fat,

doc/formats.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
|`jpeg` |Joint&nbsp;Photographic&nbsp;Experts&nbsp;Group&nbsp;file |<sub>`exif` `icc_profile`</sub>|
7373
|`json` |JavaScript&nbsp;Object&nbsp;Notation |<sub></sub>|
7474
|`jsonl` |JavaScript&nbsp;Object&nbsp;Notation&nbsp;Lines |<sub></sub>|
75+
|[`leveldb_ldb`](#leveldb_ldb) |LevelDB&nbsp;Table |<sub></sub>|
7576
|[`luajit`](#luajit) |LuaJIT&nbsp;2.0&nbsp;bytecode |<sub></sub>|
7677
|[`macho`](#macho) |Mach-O&nbsp;macOS&nbsp;executable |<sub></sub>|
7778
|`macho_fat` |Fat&nbsp;Mach-O&nbsp;macOS&nbsp;executable&nbsp;(multi-architecture) |<sub>`macho`</sub>|
@@ -131,7 +132,7 @@
131132
|`ip_packet` |Group |<sub>`icmp` `icmpv6` `tcp_segment` `udp_datagram`</sub>|
132133
|`link_frame` |Group |<sub>`bsd_loopback_frame` `ether8023_frame` `ipv4_packet` `ipv6_packet` `sll2_packet` `sll_packet`</sub>|
133134
|`mp3_frame_tags` |Group |<sub>`mp3_frame_vbri` `mp3_frame_xing`</sub>|
134-
|`probe` |Group |<sub>`adts` `aiff` `apple_bookmark` `ar` `avi` `avro_ocf` `bitcoin_blkdat` `bplist` `bzip2` `caff` `elf` `flac` `gif` `gzip` `html` `jpeg` `json` `jsonl` `luajit` `macho` `macho_fat` `matroska` `moc3` `mp3` `mp4` `mpeg_ts` `ogg` `opentimestamps` `pcap` `pcapng` `png` `tar` `tiff` `toml` `tzif` `wasm` `wav` `webp` `xml` `yaml` `zip`</sub>|
135+
|`probe` |Group |<sub>`adts` `aiff` `apple_bookmark` `ar` `avi` `avro_ocf` `bitcoin_blkdat` `bplist` `bzip2` `caff` `elf` `flac` `gif` `gzip` `html` `jpeg` `json` `jsonl` `leveldb_ldb` `luajit` `macho` `macho_fat` `matroska` `moc3` `mp3` `mp4` `mpeg_ts` `ogg` `opentimestamps` `pcap` `pcapng` `png` `tar` `tiff` `toml` `tzif` `wasm` `wav` `webp` `xml` `yaml` `zip`</sub>|
135136
|`tcp_stream` |Group |<sub>`dns_tcp` `rtmp` `tls`</sub>|
136137
|`udp_payload` |Group |<sub>`dns`</sub>|
137138

@@ -690,6 +691,23 @@ $ fq -n -d html '[inputs | {key: input_filename, value: .html.head.title?}] | fr
690691
$ fq -r -o array=true -d html '.. | select(.[0] == "a" and .[1].href)?.[1].href' file.html
691692
```
692693

694+
## leveldb_ldb
695+
696+
### Limitations
697+
698+
- no Meta Blocks (like "filter") are decoded yet.
699+
- Zstandard uncompression is not implemented yet.
700+
701+
### Authors
702+
703+
- [@mikez](https://github.com/mikez), original author
704+
705+
### References
706+
707+
- https://github.com/google/leveldb/blob/main/doc/table_format.md
708+
- https://github.com/google/leveldb/blob/main/doc/impl.md
709+
- https://github.com/google/leveldb/blob/main/doc/index.md
710+
693711
## luajit
694712

695713
### Authors

format/all/all.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import (
3030
_ "github.com/wader/fq/format/inet"
3131
_ "github.com/wader/fq/format/jpeg"
3232
_ "github.com/wader/fq/format/json"
33-
_ "github.com/wader/fq/format/ldb"
33+
_ "github.com/wader/fq/format/leveldb"
3434
_ "github.com/wader/fq/format/luajit"
3535
_ "github.com/wader/fq/format/markdown"
3636
_ "github.com/wader/fq/format/math"

format/format.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ var (
125125
JPEG = &decode.Group{Name: "jpeg"}
126126
JSON = &decode.Group{Name: "json"}
127127
JSONL = &decode.Group{Name: "jsonl"}
128-
LDB = &decode.Group{Name: "ldb"}
128+
LDB = &decode.Group{Name: "leveldb_ldb"}
129129
LuaJIT = &decode.Group{Name: "luajit"}
130130
MachO = &decode.Group{Name: "macho"}
131131
MachO_Fat = &decode.Group{Name: "macho_fat"}
Lines changed: 45 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
package ldb
1+
package leveldb
22

33
// https://github.com/google/leveldb/blob/main/doc/table_format.md
44
// https://github.com/google/leveldb/blob/main/doc/impl.md
55
// https://github.com/google/leveldb/blob/main/doc/index.md
66

77
import (
88
"bytes"
9-
"encoding/binary"
10-
"fmt"
9+
"embed"
1110
"hash/crc32"
1211

1312
"github.com/golang/snappy"
@@ -18,6 +17,9 @@ import (
1817
"github.com/wader/fq/pkg/scalar"
1918
)
2019

20+
//go:embed leveldb_ldb.md
21+
var leveldbFS embed.FS
22+
2123
func init() {
2224
interp.RegisterFormat(
2325
format.LDB,
@@ -26,19 +28,18 @@ func init() {
2628
Groups: []*decode.Group{format.Probe},
2729
DecodeFn: ldbDecode,
2830
})
31+
interp.RegisterFS(leveldbFS)
2932
}
3033

3134
const (
3235
// four varints (each max 10 bytes) + magic number (8 bytes)
3336
// https://github.com/google/leveldb/blob/main/table/format.h#L53
34-
footerEncodedLength = 4*10 + 8
37+
footerEncodedLength = (4*10 + 8) * 8
38+
magicNumberLength = 8 * 8
3539
// leading 64 bits of
3640
// echo http://code.google.com/p/leveldb/ | sha1sum
3741
// https://github.com/google/leveldb/blob/main/table/format.h#L76
3842
tableMagicNumber = 0xdb4775248b80fb57
39-
// 1-byte compression type + 4-bytes CRC
40-
// https://github.com/google/leveldb/blob/main/table/format.h#L79
41-
blockTrailerSize = 5
4243
)
4344

4445
// https://github.com/google/leveldb/blob/main/include/leveldb/options.h#L25
@@ -50,8 +51,8 @@ const (
5051

5152
var compressionTypes = scalar.UintMapSymStr{
5253
compressionTypeNone: "none",
53-
compressionTypeSnappy: "Snappy",
54-
compressionTypeZstandard: "Zstandard",
54+
compressionTypeSnappy: "snappy",
55+
compressionTypeZstandard: "zstd",
5556
}
5657

5758
// https://github.com/google/leveldb/blob/main/db/dbformat.h#L54
@@ -70,35 +71,37 @@ func ldbDecode(d *decode.D) any {
7071

7172
// footer
7273

73-
d.SeekAbs(d.Len() - footerEncodedLength*8)
7474
var indexOffset int64
7575
var indexSize int64
7676
var metaIndexOffset int64
7777
var metaIndexSize int64
7878

79+
d.SeekAbs(d.Len() - footerEncodedLength)
7980
d.FieldStruct("footer", func(d *decode.D) {
80-
d.FieldStruct("metaindex_handle", func(d *decode.D) {
81-
metaIndexOffset = int64(d.FieldUintFn("offset", decodeVarInt))
82-
metaIndexSize = int64(d.FieldUintFn("size", decodeVarInt))
83-
})
84-
d.FieldStruct("index_handle", func(d *decode.D) {
85-
indexOffset = int64(d.FieldUintFn("offset", decodeVarInt))
86-
indexSize = int64(d.FieldUintFn("size", decodeVarInt))
81+
handleLength := d.LimitedFn(footerEncodedLength, func(d *decode.D) {
82+
d.FieldStruct("metaindex_handle", func(d *decode.D) {
83+
metaIndexOffset = int64(d.FieldULEB128("offset"))
84+
metaIndexSize = int64(d.FieldULEB128("size"))
85+
})
86+
d.FieldStruct("index_handle", func(d *decode.D) {
87+
indexOffset = int64(d.FieldULEB128("offset"))
88+
indexSize = int64(d.FieldULEB128("size"))
89+
})
8790
})
88-
d.FieldRawLen("padding", d.Len()-d.Pos()-8*8)
91+
d.FieldRawLen("padding", footerEncodedLength-handleLength-magicNumberLength)
8992
d.FieldU64("magic_number", d.UintAssert(tableMagicNumber), scalar.UintHex)
9093
})
9194

9295
// metaindex
9396

9497
d.SeekAbs(metaIndexOffset * 8)
9598
var metaHandles []BlockHandle
96-
fieldStructBlock("metaindex", metaIndexSize, readKeyValueContent, func(d *decode.D) {
99+
readBlock("metaindex", metaIndexSize, readKeyValueContent, func(d *decode.D) {
97100
// BlockHandle
98101
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
99102
handle := BlockHandle{
100-
Offset: d.FieldUintFn("offset", decodeVarInt),
101-
Size: d.FieldUintFn("size", decodeVarInt),
103+
Offset: d.FieldULEB128("offset"),
104+
Size: d.FieldULEB128("size"),
102105
}
103106
metaHandles = append(metaHandles, handle)
104107
}, d)
@@ -107,12 +110,12 @@ func ldbDecode(d *decode.D) any {
107110

108111
d.SeekAbs(indexOffset * 8)
109112
var dataHandles []BlockHandle
110-
fieldStructBlock("index", indexSize, readKeyValueContent, func(d *decode.D) {
113+
readBlock("index", indexSize, readKeyValueContent, func(d *decode.D) {
111114
// BlockHandle
112115
// https://github.com/google/leveldb/blob/main/table/format.cc#L24
113116
handle := BlockHandle{
114-
Offset: d.FieldUintFn("offset", decodeVarInt),
115-
Size: d.FieldUintFn("size", decodeVarInt),
117+
Offset: d.FieldULEB128("offset"),
118+
Size: d.FieldULEB128("size"),
116119
}
117120
dataHandles = append(dataHandles, handle)
118121
}, d)
@@ -123,7 +126,7 @@ func ldbDecode(d *decode.D) any {
123126
d.FieldArray("meta", func(d *decode.D) {
124127
for _, handle := range metaHandles {
125128
d.SeekAbs(int64(handle.Offset) * 8)
126-
fieldStructBlock("meta_block", int64(handle.Size), readMetaContent, nil, d)
129+
readBlock("meta_block", int64(handle.Size), readMetaContent, nil, d)
127130
}
128131
})
129132
}
@@ -134,28 +137,29 @@ func ldbDecode(d *decode.D) any {
134137
d.FieldArray("data", func(d *decode.D) {
135138
for _, handle := range dataHandles {
136139
d.SeekAbs(int64(handle.Offset) * 8)
137-
fieldStructBlock("data_block", int64(handle.Size), readKeyValueContent, nil, d)
140+
readBlock("data_block", int64(handle.Size), readKeyValueContent, nil, d)
138141
}
139142
})
140143
}
141144

142145
return nil
143146
}
144147

145-
// Helpers
148+
// Readers
146149

147-
func fieldStructBlock(name string, size int64, readBlockContent func(size int64, valueCallbackFn func(d *decode.D), d *decode.D), valueCallbackFn func(d *decode.D), d *decode.D) *decode.D {
150+
func readBlock(name string, size int64, readBlockContent func(size int64, valueCallbackFn func(d *decode.D), d *decode.D), valueCallbackFn func(d *decode.D), d *decode.D) {
148151
// ReadBlock: https://github.com/google/leveldb/blob/main/table/format.cc#L69
149-
return d.FieldStruct(name, func(d *decode.D) {
152+
d.FieldStruct(name, func(d *decode.D) {
150153
start := d.Pos()
151154
br := d.RawLen(size * 8)
155+
// compression (1 byte)
152156
compressionType := d.FieldU8("compression", compressionTypes, scalar.UintHex)
153-
// validate crc
157+
// crc (4 bytes)
154158
data := d.ReadAllBits(br)
155159
bytesToCheck := append(data, uint8(compressionType))
156160
maskedCRCInt := maskedCrc32(bytesToCheck)
157161
d.FieldU32("crc", d.UintAssert(uint64(maskedCRCInt)), scalar.UintHex)
158-
162+
// decompress if needed
159163
d.SeekAbs(start)
160164
if compressionType == compressionTypeNone {
161165
d.FieldStruct("uncompressed", func(d *decode.D) {
@@ -165,7 +169,6 @@ func fieldStructBlock(name string, size int64, readBlockContent func(size int64,
165169
compressedSize := size
166170
compressed := data
167171
bb := &bytes.Buffer{}
168-
_ = bb
169172
switch compressionType {
170173
case compressionTypeSnappy:
171174
decompressed, err := snappy.Decode(nil, compressed)
@@ -206,17 +209,17 @@ func readKeyValueContent(size int64, valueCallbackFn func(d *decode.D), d *decod
206209
})
207210
})
208211
// TK: how do you make an empty entries-array appear _above_ the trailer?
209-
// Right now, its omited if empty.
212+
// Right now, its omitted if empty.
210213
if restartOffset <= 0 {
211214
return
212215
}
213216
d.SeekAbs(start)
214217
d.FieldArray("entries", func(d *decode.D) {
215218
for d.Pos() < start+restartOffset {
216219
d.FieldStruct("entry", func(d *decode.D) {
217-
d.FieldUintFn("shared_bytes", decodeVarInt)
218-
unshared := int64(d.FieldUintFn("unshared_bytes", decodeVarInt))
219-
valueLength := d.FieldUintFn("value_length", decodeVarInt)
220+
d.FieldULEB128("shared_bytes")
221+
unshared := int64(d.FieldULEB128("unshared_bytes"))
222+
valueLength := d.FieldULEB128("value_length")
220223
// InternalKey
221224
// https://github.com/google/leveldb/blob/main/db/dbformat.h#L171
222225
d.FieldStruct("key_delta", func(d *decode.D) {
@@ -241,39 +244,18 @@ func readMetaContent(size int64, valueCallbackFn func(d *decode.D), d *decode.D)
241244
d.FieldRawLen("raw", size*8)
242245
}
243246

244-
func decodeVarInt(d *decode.D) uint64 {
245-
var value uint64 = 0
246-
var shift uint64 = 0
247-
248-
for {
249-
b := d.U8()
250-
value |= (b & 0b01111111) << shift
251-
shift += 7
252-
if b&0b10000000 == 0 {
253-
break
254-
}
255-
}
247+
// Helpers
256248

257-
return value
249+
func maskedCrc32(bytes []uint8) uint32 {
250+
crc32C := crc32.New(crc32.MakeTable(crc32.Castagnoli))
251+
crc32C.Write(bytes)
252+
return mask(crc32C.Sum32())
258253
}
259254

260-
// Return a masked representation of the crc.
255+
// Return a masked representation of a CRC.
261256
// https://github.com/google/leveldb/blob/main/util/crc32c.h#L29
262257
func mask(crc uint32) uint32 {
263258
const kMaskDelta = 0xa282ead8
264259
// Rotate right by 15 bits and add a constant.
265260
return ((crc >> 15) | (crc << 17)) + kMaskDelta
266261
}
267-
268-
func maskedCrc32(bytes []uint8) uint32 {
269-
crc32C := crc32.New(crc32.MakeTable(crc32.Castagnoli))
270-
crc32C.Write(bytes)
271-
return mask(crc32C.Sum32())
272-
}
273-
274-
// Print the hexadecimal representation in little-endian format.
275-
func printLE(name string, value uint32) {
276-
buf := make([]byte, 4)
277-
binary.LittleEndian.PutUint32(buf, value)
278-
fmt.Printf("%s: % x\n", name, buf)
279-
}

format/leveldb/leveldb_ldb.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
### Limitations
2+
3+
- no Meta Blocks (like "filter") are decoded yet.
4+
- Zstandard uncompression is not implemented yet.
5+
6+
### Authors
7+
8+
- [@mikez](https://github.com/mikez), original author
9+
10+
### References
11+
12+
- https://github.com/google/leveldb/blob/main/doc/table_format.md
13+
- https://github.com/google/leveldb/blob/main/doc/impl.md
14+
- https://github.com/google/leveldb/blob/main/doc/index.md
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
$ fq -h leveldb_ldb
2+
leveldb_ldb: LevelDB Table decoder
3+
4+
Decode examples
5+
===============
6+
7+
# Decode file as leveldb_ldb
8+
$ fq -d leveldb_ldb . file
9+
# Decode value as leveldb_ldb
10+
... | leveldb_ldb
11+
12+
Limitations
13+
===========
14+
- no Meta Blocks (like "filter") are decoded yet.
15+
- Zstandard uncompression is not implemented yet.
16+
17+
Authors
18+
=======
19+
- @mikez (https://github.com/mikez), original author
20+
21+
References
22+
==========
23+
- https://github.com/google/leveldb/blob/main/doc/table_format.md
24+
- https://github.com/google/leveldb/blob/main/doc/impl.md
25+
- https://github.com/google/leveldb/blob/main/doc/index.md

0 commit comments

Comments
 (0)