Skip to content

Commit f9c408d

Browse files
authored
Merge pull request #1100 from suketa/feature/table-function-phase3-data-chunk
feat: Add DuckDB::DataChunk and DuckDB::Vector for table function output (Phase 3)
2 parents 9be164c + 0f01acd commit f9c408d

File tree

11 files changed

+396
-0
lines changed

11 files changed

+396
-0
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@ All notable changes to this project will be documented in this file.
1616
- add DuckDB::BindInfo#add_result_column for defining output schema.
1717
- add DuckDB::BindInfo#set_cardinality for performance hints.
1818
- add DuckDB::BindInfo#set_error for reporting bind errors.
19+
- add DuckDB::DataChunk class for table function output data (Phase 3).
20+
- add DuckDB::DataChunk#column_count for getting number of columns.
21+
- add DuckDB::DataChunk#size for getting number of rows.
22+
- add DuckDB::DataChunk#size= for setting number of rows.
23+
- add DuckDB::DataChunk#get_vector for accessing column vectors.
24+
- add DuckDB::Vector class for column data access (Phase 3).
25+
- add DuckDB::Vector#get_data for raw data pointer access.
26+
- add DuckDB::Vector#get_validity for validity mask access.
27+
- add DuckDB::Vector#assign_string_element for writing string values.
1928
- bump duckdb to 1.4.4 on CI.
2029
- add inline style to DuckDB::Connection#register_scalar_function (accepts keyword arguments + block).
2130
- add DuckDB::ScalarFunction.create class method for declarative API.

ext/duckdb/data_chunk.c

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#include "ruby-duckdb.h"
2+
3+
VALUE cDuckDBDataChunk;
4+
extern VALUE cDuckDBVector;
5+
6+
static void deallocate(void *ctx);
7+
static VALUE allocate(VALUE klass);
8+
static size_t memsize(const void *p);
9+
static VALUE rbduckdb_data_chunk_column_count(VALUE self);
10+
static VALUE rbduckdb_data_chunk_get_size(VALUE self);
11+
static VALUE rbduckdb_data_chunk_set_size(VALUE self, VALUE size);
12+
static VALUE rbduckdb_data_chunk_get_vector(VALUE self, VALUE col_idx);
13+
14+
static const rb_data_type_t data_chunk_data_type = {
15+
"DuckDB/DataChunk",
16+
{NULL, deallocate, memsize,},
17+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
18+
};
19+
20+
static void deallocate(void *ctx) {
21+
rubyDuckDBDataChunk *p = (rubyDuckDBDataChunk *)ctx;
22+
xfree(p);
23+
}
24+
25+
static VALUE allocate(VALUE klass) {
26+
rubyDuckDBDataChunk *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBDataChunk));
27+
return TypedData_Wrap_Struct(klass, &data_chunk_data_type, ctx);
28+
}
29+
30+
static size_t memsize(const void *p) {
31+
return sizeof(rubyDuckDBDataChunk);
32+
}
33+
34+
rubyDuckDBDataChunk *get_struct_data_chunk(VALUE obj) {
35+
rubyDuckDBDataChunk *ctx;
36+
TypedData_Get_Struct(obj, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
37+
return ctx;
38+
}
39+
40+
/*
41+
* call-seq:
42+
* data_chunk.column_count -> Integer
43+
*
44+
* Returns the number of columns in the data chunk.
45+
*
46+
* data_chunk.column_count # => 2
47+
*/
48+
static VALUE rbduckdb_data_chunk_column_count(VALUE self) {
49+
rubyDuckDBDataChunk *ctx;
50+
idx_t count;
51+
52+
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
53+
54+
count = duckdb_data_chunk_get_column_count(ctx->data_chunk);
55+
56+
return ULL2NUM(count);
57+
}
58+
59+
/*
60+
* call-seq:
61+
* data_chunk.size -> Integer
62+
*
63+
* Returns the current number of tuples in the data chunk.
64+
*
65+
* data_chunk.size # => 100
66+
*/
67+
static VALUE rbduckdb_data_chunk_get_size(VALUE self) {
68+
rubyDuckDBDataChunk *ctx;
69+
idx_t size;
70+
71+
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
72+
73+
size = duckdb_data_chunk_get_size(ctx->data_chunk);
74+
75+
return ULL2NUM(size);
76+
}
77+
78+
/*
79+
* call-seq:
80+
* data_chunk.size = size -> size
81+
*
82+
* Sets the number of tuples in the data chunk.
83+
*
84+
* data_chunk.size = 50
85+
*/
86+
static VALUE rbduckdb_data_chunk_set_size(VALUE self, VALUE size) {
87+
rubyDuckDBDataChunk *ctx;
88+
idx_t sz;
89+
90+
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
91+
92+
sz = NUM2ULL(size);
93+
duckdb_data_chunk_set_size(ctx->data_chunk, sz);
94+
95+
return size;
96+
}
97+
98+
/*
99+
* call-seq:
100+
* data_chunk.get_vector(col_idx) -> DuckDB::Vector
101+
*
102+
* Gets the vector at the specified column index.
103+
*
104+
* vector = data_chunk.get_vector(0)
105+
*/
106+
static VALUE rbduckdb_data_chunk_get_vector(VALUE self, VALUE col_idx) {
107+
rubyDuckDBDataChunk *ctx;
108+
idx_t idx;
109+
duckdb_vector vector;
110+
VALUE vector_obj;
111+
rubyDuckDBVector *vector_ctx;
112+
113+
TypedData_Get_Struct(self, rubyDuckDBDataChunk, &data_chunk_data_type, ctx);
114+
115+
idx = NUM2ULL(col_idx);
116+
vector = duckdb_data_chunk_get_vector(ctx->data_chunk, idx);
117+
118+
// Create Vector wrapper
119+
vector_obj = rb_class_new_instance(0, NULL, cDuckDBVector);
120+
vector_ctx = get_struct_vector(vector_obj);
121+
vector_ctx->vector = vector;
122+
123+
return vector_obj;
124+
}
125+
126+
void rbduckdb_init_duckdb_data_chunk(void) {
127+
#if 0
128+
VALUE mDuckDB = rb_define_module("DuckDB");
129+
#endif
130+
cDuckDBDataChunk = rb_define_class_under(mDuckDB, "DataChunk", rb_cObject);
131+
rb_define_alloc_func(cDuckDBDataChunk, allocate);
132+
133+
rb_define_method(cDuckDBDataChunk, "column_count", rbduckdb_data_chunk_column_count, 0);
134+
rb_define_method(cDuckDBDataChunk, "size", rbduckdb_data_chunk_get_size, 0);
135+
rb_define_method(cDuckDBDataChunk, "size=", rbduckdb_data_chunk_set_size, 1);
136+
rb_define_method(cDuckDBDataChunk, "get_vector", rbduckdb_data_chunk_get_vector, 1);
137+
}

ext/duckdb/data_chunk.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef RUBY_DUCKDB_DATA_CHUNK_H
2+
#define RUBY_DUCKDB_DATA_CHUNK_H
3+
4+
struct _rubyDuckDBDataChunk {
5+
duckdb_data_chunk data_chunk;
6+
};
7+
8+
typedef struct _rubyDuckDBDataChunk rubyDuckDBDataChunk;
9+
10+
rubyDuckDBDataChunk *get_struct_data_chunk(VALUE obj);
11+
void rbduckdb_init_duckdb_data_chunk(void);
12+
13+
#endif

ext/duckdb/duckdb.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,7 @@ Init_duckdb_native(void) {
4343
rbduckdb_init_duckdb_value_impl();
4444
rbduckdb_init_duckdb_scalar_function();
4545
rbduckdb_init_duckdb_bind_info();
46+
rbduckdb_init_duckdb_vector();
47+
rbduckdb_init_duckdb_data_chunk();
4648
rbduckdb_init_duckdb_table_function();
4749
}

ext/duckdb/ruby-duckdb.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "./value_impl.h"
3232
#include "./scalar_function.h"
3333
#include "./bind_info.h"
34+
#include "./vector.h"
35+
#include "./data_chunk.h"
3436
#include "./table_function.h"
3537

3638
extern VALUE mDuckDB;

ext/duckdb/vector.c

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#include "ruby-duckdb.h"
2+
3+
VALUE cDuckDBVector;
4+
5+
static void deallocate(void *ctx);
6+
static VALUE allocate(VALUE klass);
7+
static size_t memsize(const void *p);
8+
static VALUE rbduckdb_vector_get_data(VALUE self);
9+
static VALUE rbduckdb_vector_get_validity(VALUE self);
10+
static VALUE rbduckdb_vector_assign_string_element(VALUE self, VALUE index, VALUE str);
11+
12+
static const rb_data_type_t vector_data_type = {
13+
"DuckDB/Vector",
14+
{NULL, deallocate, memsize,},
15+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
16+
};
17+
18+
static void deallocate(void *ctx) {
19+
rubyDuckDBVector *p = (rubyDuckDBVector *)ctx;
20+
xfree(p);
21+
}
22+
23+
static VALUE allocate(VALUE klass) {
24+
rubyDuckDBVector *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBVector));
25+
return TypedData_Wrap_Struct(klass, &vector_data_type, ctx);
26+
}
27+
28+
static size_t memsize(const void *p) {
29+
return sizeof(rubyDuckDBVector);
30+
}
31+
32+
rubyDuckDBVector *get_struct_vector(VALUE obj) {
33+
rubyDuckDBVector *ctx;
34+
TypedData_Get_Struct(obj, rubyDuckDBVector, &vector_data_type, ctx);
35+
return ctx;
36+
}
37+
38+
/*
39+
* call-seq:
40+
* vector.get_data -> Integer (pointer address)
41+
*
42+
* Gets the raw data pointer for the vector.
43+
* Returns the memory address as an integer.
44+
*
45+
* ptr = vector.get_data
46+
*/
47+
static VALUE rbduckdb_vector_get_data(VALUE self) {
48+
rubyDuckDBVector *ctx;
49+
void *data;
50+
51+
TypedData_Get_Struct(self, rubyDuckDBVector, &vector_data_type, ctx);
52+
53+
data = duckdb_vector_get_data(ctx->vector);
54+
55+
return ULL2NUM((uintptr_t)data);
56+
}
57+
58+
/*
59+
* call-seq:
60+
* vector.get_validity -> Integer or nil (pointer address)
61+
*
62+
* Gets the validity mask pointer for the vector.
63+
* Returns nil if all values are valid.
64+
*
65+
* validity = vector.get_validity
66+
*/
67+
static VALUE rbduckdb_vector_get_validity(VALUE self) {
68+
rubyDuckDBVector *ctx;
69+
uint64_t *validity;
70+
71+
TypedData_Get_Struct(self, rubyDuckDBVector, &vector_data_type, ctx);
72+
73+
validity = duckdb_vector_get_validity(ctx->vector);
74+
75+
if (!validity) {
76+
return Qnil;
77+
}
78+
79+
return ULL2NUM((uintptr_t)validity);
80+
}
81+
82+
/*
83+
* call-seq:
84+
* vector.assign_string_element(index, str) -> self
85+
*
86+
* Assigns a string value at the specified index.
87+
*
88+
* vector.assign_string_element(0, 'hello')
89+
*/
90+
static VALUE rbduckdb_vector_assign_string_element(VALUE self, VALUE index, VALUE str) {
91+
rubyDuckDBVector *ctx;
92+
idx_t idx;
93+
const char *string_val;
94+
95+
TypedData_Get_Struct(self, rubyDuckDBVector, &vector_data_type, ctx);
96+
97+
idx = NUM2ULL(index);
98+
string_val = StringValueCStr(str);
99+
100+
duckdb_vector_assign_string_element(ctx->vector, idx, string_val);
101+
102+
return self;
103+
}
104+
105+
void rbduckdb_init_duckdb_vector(void) {
106+
#if 0
107+
VALUE mDuckDB = rb_define_module("DuckDB");
108+
#endif
109+
cDuckDBVector = rb_define_class_under(mDuckDB, "Vector", rb_cObject);
110+
rb_define_alloc_func(cDuckDBVector, allocate);
111+
112+
rb_define_method(cDuckDBVector, "get_data", rbduckdb_vector_get_data, 0);
113+
rb_define_method(cDuckDBVector, "get_validity", rbduckdb_vector_get_validity, 0);
114+
rb_define_method(cDuckDBVector, "assign_string_element", rbduckdb_vector_assign_string_element, 2);
115+
}

ext/duckdb/vector.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef RUBY_DUCKDB_VECTOR_H
2+
#define RUBY_DUCKDB_VECTOR_H
3+
4+
struct _rubyDuckDBVector {
5+
duckdb_vector vector;
6+
};
7+
8+
typedef struct _rubyDuckDBVector rubyDuckDBVector;
9+
10+
rubyDuckDBVector *get_struct_vector(VALUE obj);
11+
void rbduckdb_init_duckdb_vector(void);
12+
13+
#endif

lib/duckdb.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
require 'duckdb/logical_type'
1717
require 'duckdb/scalar_function'
1818
require 'duckdb/bind_info'
19+
require 'duckdb/vector'
20+
require 'duckdb/data_chunk'
1921
require 'duckdb/table_function'
2022
require 'duckdb/infinity'
2123
require 'duckdb/instance_cache'

lib/duckdb/data_chunk.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# frozen_string_literal: true
2+
3+
module DuckDB
4+
#
5+
# The DuckDB::DataChunk represents a chunk of data for table function output.
6+
#
7+
# During table function execution, data chunks are used to return rows.
8+
#
9+
# Example:
10+
#
11+
# table_function.execute do |func_info, output|
12+
# # Set number of rows to output
13+
# output.size = 10
14+
#
15+
# # Get vector for column 0
16+
# vector = output.get_vector(0)
17+
#
18+
# # Write data to vector...
19+
# end
20+
#
21+
# rubocop:disable Lint/EmptyClass
22+
class DataChunk
23+
# All methods are defined in C extension (ext/duckdb/data_chunk.c)
24+
end
25+
# rubocop:enable Lint/EmptyClass
26+
end

lib/duckdb/vector.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# frozen_string_literal: true
2+
3+
module DuckDB
4+
#
5+
# The DuckDB::Vector represents a column vector in a data chunk.
6+
#
7+
# Vectors store the actual data for table function output.
8+
#
9+
# Example:
10+
#
11+
# vector = output.get_vector(0)
12+
# vector.assign_string_element(0, 'hello')
13+
# vector.assign_string_element(1, 'world')
14+
#
15+
# rubocop:disable Lint/EmptyClass
16+
class Vector
17+
# All methods are defined in C extension (ext/duckdb/vector.c)
18+
end
19+
# rubocop:enable Lint/EmptyClass
20+
end

0 commit comments

Comments
 (0)