Skip to content

Commit 17eb596

Browse files
authored
[Issue pixelsdb#806] move submodule pixels-duckdb to pixels-duckdb/duckdb (pixelsdb#807)
Also specify protobuf and googletest versions in 'make update', and move extension cpp files into pixels-duckdb.
1 parent a30f863 commit 17eb596

15 files changed

+50
-50
lines changed

.gitmodules

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
[submodule "cpp/third_party/protobuf"]
22
path = cpp/third-party/protobuf
33
url = https://github.com/protocolbuffers/protobuf
4-
[submodule "cpp/pixels-duckdb"]
5-
path = cpp/pixels-duckdb
4+
[submodule "cpp/pixels-duckdb/duckdb"]
5+
path = cpp/pixels-duckdb/duckdb
66
url = git@github.com:pixelsdb/duckdb.git
7-
[submodule "writer_cpp_test/third-party/protobuf"]
8-
path = writer_cpp_test/third-party/protobuf
9-
url = https://github.com/protocolbuffers/protobuf
107
[submodule "cpp/third-party/googletest"]
118
path = cpp/third-party/googletest
129
url = git@github.com:google/googletest.git

cpp/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ project(${TARGET_NAME})
88
include_directories(include)
99

1010
set(EXTENSION_SOURCES
11-
pixels_extension.cpp
12-
PixelsScanFunction.cpp
11+
pixels-duckdb/pixels_extension.cpp
12+
pixels-duckdb/PixelsScanFunction.cpp
1313
)
1414
add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES})
1515

cpp/Makefile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ pull:
3535
git submodule update --recursive --init
3636

3737
update:
38-
git submodule update --remote --merge
38+
git submodule update --remote --merge pixels-duckdb/duckdb
39+
git -C third-party/googletest checkout v1.15.2
40+
git -C third-party/protobuf checkout v3.21.6
3941

4042
deps:
4143
mkdir -p "${PROTOBUF_DIR}/cmake/build" && cd "third-party/protobuf/cmake/build" && \
@@ -45,13 +47,13 @@ deps:
4547

4648
clean:
4749
rm -rf build
48-
cd pixels-duckdb && make clean
50+
cd pixels-duckdb/duckdb && make clean
4951

5052
# Main build
5153
debug: deps
52-
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S pixels-duckdb/ -B build/debug && \
54+
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S pixels-duckdb/duckdb -B build/debug && \
5355
cmake --build build/debug --config Debug
5456

5557
release: deps
56-
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S pixels-duckdb/ -B build/release && \
58+
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S pixels-duckdb/duckdb -B build/release && \
5759
cmake --build build/release --config Release

cpp/README.md

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
# Pixels Reader C++ Implementation
1+
# Pixels C++ Implementation
22

33
## Usage
44

55
### Compilation
66

7-
The repository relies on [duckdb](https://github.com/yuly16/duckdb), It is refered from [pixels reader](https://github.com/yuly16/pixels-reader-cxx).
8-
It also relies on protobuf and liburing. We don't need to manually install these prerequisites, since our compilation code would
9-
automatically download them.
7+
Pixels C++ relies on protobuf and liburing. And it builds the pixels extension of duckdb by default,
8+
which relies on [duckdb](https://github.com/pixelsdb/duckdb).
9+
We don't need to manually install these prerequisites, since the Makefile would automatically download them.
1010

11-
Pixels C++ reader uses `iouring` system call. You can use the following command to check if iouring is supported in your system:
11+
Pixels C++ reader uses `iouring` system calls. You can use the following command to check if iouring is supported in your system:
1212

1313
```shell
1414
grep io_uring_setup /proc/kallsyms
@@ -35,27 +35,34 @@ Then set the `PIXELS_SRC` and `PIXELS_HOME` environment variable (Ignore it if y
3535

3636
Pull the dependency code:
3737

38-
```
38+
```shell
3939
make pull
4040
```
4141

42-
Finally, compile the code:
42+
If it is not the first time to execute `make pull`, you can check out the latest submodules:
4343

44+
```shell
45+
make update
4446
```
47+
48+
Finally, compile the code:
49+
50+
```shell
4551
make -j
4652
```
4753

4854
### Example
4955

50-
Here is a pixels reader example in the directory `duckdb/examples/pixels-example`. This example validates the correctness of compilation and gives you an idea how to load the pixels data.
56+
Here is a pixels reader example in the directory `duckdb/examples/pixels-example`.
57+
This example validates the correctness of compilation and gives you an idea how to load the Pixels data.
5158

5259
To run this binary:
5360

5461
```
5562
./build/release/examples/pixels-example/pixels-example
5663
```
5764

58-
### Run pixels reader in CLion
65+
### Run pixels C++ in CLion
5966
In order to run the code in CLion, we should set the cmake configuration in CLion.
6067

6168
In `"setting"`->`"Build,Execution,Deployment"`->`"CMake"`, set `"Generator"` as `"Let CMake decide"`, and
@@ -74,55 +81,55 @@ We run one simple query by the following command:
7481
```
7582
build/release/benchmark/benchmark_runner "benchmark/tpch/pixels/tpch_1/q01.benchmark"
7683
```
77-
#### 2. TPCH benchmark
84+
#### 2. TPC-H benchmark
7885
The benchmark script is `run_benchmark.py` in `duckdb/scripts` directory.
7986

8087
Check the usage:
8188

8289
```
83-
cd pixels-duckdb
90+
cd pixels-duckdb/duckdb
8491
python scripts/run_benchmark.py --help
8592
```
8693

8794
Run TPC-H 1 benchmarks:
8895

8996
```
90-
cd pixels-duckdb
97+
cd pixels-duckdb/duckdb
9198
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_1/" --parquet "benchmark/tpch/parquet/tpch_1/" -v --repeat-time-disk 1
9299
```
93100

94101
or enabling the encoding of pixels:
95102

96103
```
97-
cd pixels-duckdb
104+
cd pixels-duckdb/duckdb
98105
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_1_encoding/" --parquet "benchmark/tpch/parquet/tpch_1/" -v --repeat-time-disk 1
99106
```
100107

101108
Run TPCH 300 benchmark:
102109

103110
```
104-
cd pixels-duckdb
111+
cd pixels-duckdb/duckdb
105112
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_300/" --parquet "benchmark/tpch/parquet/tpch_300/" -v --repeat-time-disk 1
106113
```
107114

108115
or enabling the encoding of pixels:
109116

110117
```
111-
cd pixels-duckdb
118+
cd pixels-duckdb/duckdb
112119
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_300_encoding/" --parquet "benchmark/tpch/parquet/tpch_300/" -v --repeat-time-disk 1
113120
```
114121

115122
We also support to just run pixels or parquet:
116123

117124
```
118-
cd pixels-duckdb
125+
cd pixels-duckdb/duckdb
119126
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_1/" -v --repeat-time-disk 1
120127
```
121128

122129
We also offer self-defined queries:
123130

124131
```
125-
cd pixels-duckdb
132+
cd pixels-duckdb/duckdb
126133
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/micro-benchmark/tpch_1/" -v --repeat-time-disk 1
127134
```
128135

@@ -138,7 +145,7 @@ SSD array needs pixels data to be in multiple directories. The following instruc
138145
We offer a convenient python script to distribute the pixels data in a single directory to multiple directories. For example:
139146

140147
```
141-
cd pixels-duckdb
148+
cd pixels-duckdb/duckdb
142149
python scripts/pixels-multidir-generator.py -i /data/s1725-1/liyu/pixels_data/pixels-tpch-1-small-endian \
143150
-o /data/s1725-1/liyu/pixels_data/pixels-tpch-1-small-endian-partition1 \
144151
/data/s1725-1/liyu/pixels_data/pixels-tpch-1-small-endian-partition2 \
@@ -152,7 +159,7 @@ The python script copies files from the input directory to all output paths. The
152159
(e.g. the file index of `20230809035030_4630.pxl` is 4630) and then copied to the output paths in the round-rubin fashion. Pixels C++ reader reads
153160
the pixels data sorted by the file index, so all SSDs can be utilized simultaneously.
154161

155-
#### 2. Modity the benchmark
162+
#### 2. Modify the benchmark
156163

157164
The above python script outputs the following queries:
158165

@@ -171,7 +178,7 @@ CREATE VIEW region AS SELECT * FROM pixels_scan(["/data/s1725-1/liyu/pixels_data
171178
We replace these `CREATE` queries in `pixels_tpch_template.benchmark.in`, and run the benchmark:
172179

173180
```
174-
cd pixels-duckdb
181+
cd pixels-duckdb/duckdb
175182
python scripts/run_benchmark.py --pixels "benchmark/tpch/pixels/tpch_1/" -v --repeat-time-disk 1
176183
```
177184

@@ -208,15 +215,9 @@ make update
208215

209216
### 2. The compilation fails in duckdb
210217

211-
Please make sure you don't use the official `duckdb` repository. The official `duckdb` has some name conflicts with `iouring` (which is a linux async IO library), which would lead to the compilation failure.
212-
213-
214-
### 3. I can't load the pixels data via pixels C++ reader
215-
216-
Currently, the pixels Java writer and reader uses big endian to write/read pixels data. We find that small endian is more efficient for pixels c++ reader. Therefore, in order to generate the pixels data with small endian, please use Pixels in [little-endian branch](https://github.com/pixelsdb/pixels/tree/little-endian). We will merge the small endian to pixels java reader in the future.
217-
218+
Please make sure you don't use the official `duckdb` repository. The official `duckdb` has some name conflicts with `iouring` (which is a linux async IO library), which would lead to the compilation failure.
218219

219-
### 4. I fail to run the pixels and parquet benchmark
220+
### 3. I fail to run the pixels and parquet benchmark
220221

221222
This code is tested in diascld31 server. I hardcode the pixels data directory and parquet data directory in `parquet_tpch_template.benchmark.in`, `parquet_tpch_template_no_verification.benchmark.in`, `pixels_tpch_template.benchmark.in` and `pixels_tpch_template_no_verification.benchmark.in`. If you want to run this benchmark in another machine, make sure to modify the pixels and parquet directory to the correct location. `TODO`: In the future I will rewrite this to a more user-friendly benchmark.
222223

@@ -244,10 +245,10 @@ def clean_page_cache():
244245
os.system(cmd)
245246
```
246247

247-
### 5. The protobuf version issue
248-
I use protobuf [v3.21.6](https://github.com/protocolbuffers/protobuf/releases/tag/v3.21.6). The latest protobuf version doesn't work for pixels c++ reader.
248+
### 4. The protobuf version issue
249+
We use protobuf [v3.21.6](https://github.com/protocolbuffers/protobuf/releases/tag/v3.21.6). It is pulled as a submodule. The latest protobuf version doesn't work for pixels c++ reader.
249250

250-
### 6. Install Boost C++ Libraries
251+
### 5. Install Boost C++ Libraries
251252
We need the Boost C++ Libraries in pixels-cli, which can be installed with the following command.
252253

253254
```
File renamed without changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <stdlib.h>
22
#include <jni.h>
3-
#include "memory_mapped_file.h"
3+
#include "MemoryMappedFile.h"
44

55
#ifndef _Included_utils
66
#define _Included_utils

cpp/pixels-cache/lib/io_pixelsdb_pixels_cache_NativeHashIndexReader.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <stdint.h>
33
#include <string.h>
44
#include <byteswap.h>
5-
#include "../include/memory_mapped_file.h"
5+
#include "../include/MemoryMappedFile.h"
66
#include "../include/io_pixelsdb_pixels_cache_NativeHashIndexReader.h"
77

88
#define INDEX_HASH_HEAD_OFFSET 24 // 16 + 8

cpp/pixels-cache/lib/io_pixelsdb_pixels_cache_NativeRadixIndexReader.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#include <stdint.h>
33
#include <string.h>
44
#include <byteswap.h>
5-
#include "../include/memory_mapped_file.h"
5+
#include "../include/MemoryMappedFile.h"
66
#include "../include/io_pixelsdb_pixels_cache_NativeRadixIndexReader.h"
77

88
#define INDEX_RADIX_OFFSET 16

cpp/pixels-cache/lib/io_pixelsdb_pixels_cache_utils_RadixIndexEndianRewriter.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include <stdio.h>
22
#include <string.h>
33
#include <byteswap.h>
4-
#include "../include/utils.h"
5-
#include "../include/memory_mapped_file.h"
4+
#include "../include/Utils.h"
5+
#include "../include/MemoryMappedFile.h"
66
#include "../include/io_pixelsdb_pixels_cache_utils_RadixIndexEndianRewriter.h"
77

88
#define INDEX_RADIX_OFFSET 16

cpp/pixels-cpp.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pixel.stride=2
1818
pixel.threads=-1
1919
# column size path. It is optional. If no column size path is designated, the
2020
# size of first pixels data is used. For example:
21-
# pixel.column.size.path=/scratch/liyu/opt/pixels/cpp/pixels-duckdb/benchmark/clickbench/clickbench-size.csv
21+
# pixel.column.size.path=/scratch/liyu/opt/pixels/cpp/pixels-duckdb/duckdb/benchmark/clickbench/clickbench-size.csv
2222
pixel.column.size.path=
2323

2424
# the work thread to run parquet. -1 means using all CPU cores

0 commit comments

Comments
 (0)