@@ -40,79 +40,152 @@ module DuckDB
4040 class TableFunction
4141 # TableFunction#initialize is defined in C extension
4242
43- #
44- # Creates a new table function with a declarative API.
45- #
46- # @param name [String] The name of the table function
47- # @param parameters [Array<LogicalType>, Hash<String, LogicalType>] Function parameters (optional)
48- # @param columns [Hash<String, LogicalType>] Output columns (required)
49- # @yield [func_info, output] The execute block that generates data
50- # @yieldparam func_info [FunctionInfo] Function execution context
51- # @yieldparam output [DataChunk] Output data chunk to fill
52- # @yieldreturn [Integer] Number of rows generated (0 when done)
53- # @return [TableFunction] The configured table function
54- #
55- # @example Simple range function
56- # tf = TableFunction.create(
57- # name: 'my_range',
58- # parameters: [LogicalType::BIGINT],
59- # columns: { 'value' => LogicalType::BIGINT }
60- # ) do |func_info, output|
61- # # Generate data...
62- # 0 # Signal done
63- # end
64- #
65- # @example Function that returns data
66- # tf = TableFunction.create(
67- # name: 'my_function',
68- # columns: { 'value' => LogicalType::BIGINT }
69- # ) do |func_info, output|
70- # vec = output.get_vector(0)
71- # # Fill vector...
72- # 3 # Return row count
73- # end
74- #
75- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
76- def self . create ( name :, columns :, parameters : nil , &)
77- raise ArgumentError , 'name is required' unless name
78- raise ArgumentError , 'columns are required' unless columns
79- raise ArgumentError , 'block is required' unless block_given?
43+ @table_adapters = { }
8044
81- tf = new
82- tf . name = name
45+ class << self
46+ #
47+ # Creates a new table function with a declarative API.
48+ #
49+ # @param name [String] The name of the table function
50+ # @param parameters [Array<LogicalType>, Hash<String, LogicalType>] Function parameters (optional)
51+ # @param columns [Hash<String, LogicalType>] Output columns (required)
52+ # @yield [func_info, output] The execute block that generates data
53+ # @yieldparam func_info [FunctionInfo] Function execution context
54+ # @yieldparam output [DataChunk] Output data chunk to fill
55+ # @yieldreturn [Integer] Number of rows generated (0 when done)
56+ # @return [TableFunction] The configured table function
57+ #
58+ # @example Simple range function
59+ # tf = TableFunction.create(
60+ # name: 'my_range',
61+ # parameters: [LogicalType::BIGINT],
62+ # columns: { 'value' => LogicalType::BIGINT }
63+ # ) do |func_info, output|
64+ # # Generate data...
65+ # 0 # Signal done
66+ # end
67+ #
68+ # @example Function that returns data
69+ # tf = TableFunction.create(
70+ # name: 'my_function',
71+ # columns: { 'value' => LogicalType::BIGINT }
72+ # ) do |func_info, output|
73+ # vec = output.get_vector(0)
74+ # # Fill vector...
75+ # 3 # Return row count
76+ # end
77+ #
78+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
79+ def create ( name :, columns :, parameters : nil , &)
80+ raise ArgumentError , 'name is required' unless name
81+ raise ArgumentError , 'columns are required' unless columns
82+ raise ArgumentError , 'block is required' unless block_given?
8383
84- # Add parameters (positional or named)
85- if parameters
86- case parameters
87- when Array
88- parameters . each { |type | tf . add_parameter ( type ) }
89- when Hash
90- parameters . each { |param_name , type | tf . add_named_parameter ( param_name , type ) }
91- else
92- raise ArgumentError , 'parameters must be Array or Hash'
84+ tf = new
85+ tf . name = name
86+
87+ # Add parameters (positional or named)
88+ if parameters
89+ case parameters
90+ when Array
91+ parameters . each { |type | tf . add_parameter ( type ) }
92+ when Hash
93+ parameters . each { |param_name , type | tf . add_named_parameter ( param_name , type ) }
94+ else
95+ raise ArgumentError , 'parameters must be Array or Hash'
96+ end
9397 end
94- end
9598
96- # Set bind callback to add result columns
97- tf . bind do |bind_info |
98- columns . each do |col_name , col_type |
99- bind_info . add_result_column ( col_name , col_type )
99+ # Set bind callback to add result columns
100+ tf . bind do |bind_info |
101+ columns . each do |col_name , col_type |
102+ bind_info . add_result_column ( col_name , col_type )
103+ end
104+ end
105+
106+ # Set init callback (required by DuckDB)
107+ tf . init do |_init_info |
108+ # No-op
109+ end
110+
111+ # Set execute callback - user's block returns row count
112+ tf . execute do |func_info , output |
113+ size = yield ( func_info , output )
114+ output . size = Integer ( size )
100115 end
101- end
102116
103- # Set init callback (required by DuckDB)
104- tf . init do |_init_info |
105- # No-op
117+ tf
106118 end
119+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
107120
108- # Set execute callback - user's block returns row count
109- tf . execute do |func_info , output |
110- size = yield ( func_info , output )
111- output . size = Integer ( size )
121+ # Registers a table adapter for a Ruby class.
122+ #
123+ # The adapter is used by +DuckDB::Connection#expose_as_table+ to convert
124+ # instances of +klass+ into a DuckDB table function. The adapter must respond
125+ # to +call(object, name, columns: nil)+ and return a +DuckDB::TableFunction+.
126+ #
127+ # == Implementing a Table Adapter
128+ #
129+ # An adapter is any object that responds to +call(object, name, columns: nil)+.
130+ # The +columns:+ keyword argument allows callers to override the column schema;
131+ # the adapter should fall back to its own schema detection when it is +nil+.
132+ #
133+ # The execute block passed to +DuckDB::TableFunction.create+ must:
134+ # - Write one batch of rows into +output+ per call
135+ # - Return the number of rows written as an +Integer+
136+ # - Return +0+ to signal that all data has been exhausted
137+ #
138+ # @example Minimal adapter for CSV objects
139+ # class CSVTableAdapter
140+ # def call(csv, name, columns: nil)
141+ # columns ||= infer_columns(csv)
142+ #
143+ # DuckDB::TableFunction.create(name:, columns:) do |_func_info, output|
144+ # row = csv.readline
145+ # if row
146+ # row.each_with_index { |cell, i| output.set_value(i, 0, cell[1]) }
147+ # 1 # wrote one row
148+ # else
149+ # csv.rewind
150+ # 0 # signal end of data
151+ # end
152+ # end
153+ # end
154+ #
155+ # private
156+ #
157+ # def infer_columns(csv)
158+ # headers = csv.first.headers
159+ # csv.rewind
160+ # headers.each_with_object({}) { |h, hsh| hsh[h] = DuckDB::LogicalType::VARCHAR }
161+ # end
162+ # end
163+ #
164+ # # Register and use:
165+ # DuckDB::TableFunction.add_table_adapter(CSV, CSVTableAdapter.new)
166+ # con.execute('SET threads=1')
167+ # con.expose_as_table(csv, 'csv_table')
168+ # con.query('SELECT * FROM csv_table()').to_a
169+ #
170+ # @param klass [Class] the Ruby class to register an adapter for (e.g. +CSV+)
171+ # @param adapter [#call] the adapter object
172+ # @return [void]
173+ #
174+ def add_table_adapter ( klass , adapter )
175+ @table_adapters [ klass ] = adapter
112176 end
113177
114- tf
178+ # Returns the table adapter registered for the given class, or +nil+ if none.
179+ #
180+ # @param klass [Class] the Ruby class to look up
181+ # @return [#call, nil] the registered adapter, or +nil+ if not found
182+ #
183+ # @example
184+ # adapter = DuckDB::TableFunction.table_adapter_for(CSV)
185+ #
186+ def table_adapter_for ( klass )
187+ @table_adapters [ klass ]
188+ end
115189 end
116- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
117190 end
118191end
0 commit comments