-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Fix incorrect results with multiple COUNT(DISTINCT..) aggregates on dictionaries
#9679
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,7 +47,7 @@ use crate::binary_map::OutputType; | |
| use crate::expressions::format_state_name; | ||
| use crate::{AggregateExpr, PhysicalExpr}; | ||
|
|
||
| /// Expression for a COUNT(DISTINCT) aggregation. | ||
| /// Expression for a `COUNT(DISTINCT)` aggregation. | ||
| #[derive(Debug)] | ||
| pub struct DistinctCount { | ||
| /// Column name | ||
|
|
@@ -100,6 +100,7 @@ impl AggregateExpr for DistinctCount { | |
| use TimeUnit::*; | ||
|
|
||
| Ok(match &self.state_data_type { | ||
| // try and use a specialized accumulator if possible, otherwise fall back to generic accumulator | ||
| Int8 => Box::new(PrimitiveDistinctCountAccumulator::<Int8Type>::new()), | ||
| Int16 => Box::new(PrimitiveDistinctCountAccumulator::<Int16Type>::new()), | ||
| Int32 => Box::new(PrimitiveDistinctCountAccumulator::<Int32Type>::new()), | ||
|
|
@@ -157,6 +158,7 @@ impl AggregateExpr for DistinctCount { | |
| OutputType::Binary, | ||
| )), | ||
|
|
||
| // Use the generic accumulator based on `ScalarValue` for all other types | ||
| _ => Box::new(DistinctCountAccumulator { | ||
| values: HashSet::default(), | ||
| state_data_type: self.state_data_type.clone(), | ||
|
|
@@ -183,7 +185,11 @@ impl PartialEq<dyn Any> for DistinctCount { | |
| } | ||
|
|
||
| /// General purpose distinct accumulator that works for any DataType by using | ||
| /// [`ScalarValue`]. Some types have specialized accumulators that are (much) | ||
| /// [`ScalarValue`]. | ||
| /// | ||
| /// It stores intermediate results as a `ListArray` | ||
| /// | ||
| /// Note that many types have specialized accumulators that are (much) | ||
| /// more efficient such as [`PrimitiveDistinctCountAccumulator`] and | ||
| /// [`BytesDistinctCountAccumulator`] | ||
| #[derive(Debug)] | ||
|
|
@@ -193,8 +199,9 @@ struct DistinctCountAccumulator { | |
| } | ||
|
|
||
| impl DistinctCountAccumulator { | ||
| // calculating the size for fixed length values, taking first batch size * number of batches | ||
| // This method is faster than .full_size(), however it is not suitable for variable length values like strings or complex types | ||
| // calculating the size for fixed length values, taking first batch size * | ||
| // number of batches This method is faster than .full_size(), however it is | ||
| // not suitable for variable length values like strings or complex types | ||
| fn fixed_size(&self) -> usize { | ||
| std::mem::size_of_val(self) | ||
| + (std::mem::size_of::<ScalarValue>() * self.values.capacity()) | ||
|
|
@@ -207,7 +214,8 @@ impl DistinctCountAccumulator { | |
| + std::mem::size_of::<DataType>() | ||
| } | ||
|
|
||
| // calculates the size as accurate as possible, call to this method is expensive | ||
| // calculates the size as accurately as possible. Note that calling this | ||
| // method is expensive | ||
| fn full_size(&self) -> usize { | ||
| std::mem::size_of_val(self) | ||
| + (std::mem::size_of::<ScalarValue>() * self.values.capacity()) | ||
|
|
@@ -221,6 +229,7 @@ impl DistinctCountAccumulator { | |
| } | ||
|
|
||
| impl Accumulator for DistinctCountAccumulator { | ||
| /// Returns the distinct values seen so far as (one element) ListArray. | ||
| fn state(&mut self) -> Result<Vec<ScalarValue>> { | ||
| let scalars = self.values.iter().cloned().collect::<Vec<_>>(); | ||
| let arr = ScalarValue::new_list(scalars.as_slice(), &self.state_data_type); | ||
|
|
@@ -246,15 +255,24 @@ impl Accumulator for DistinctCountAccumulator { | |
| }) | ||
| } | ||
|
|
||
| /// Merges multiple sets of distinct values into the current set. | ||
| /// | ||
| /// The input to this function is a `ListArray` with **multiple** rows, | ||
| /// where each row contains the values from a partial aggregate's phase (e.g. | ||
| /// the result of calling `Self::state` on multiple accumulators). | ||
| fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { | ||
| if states.is_empty() { | ||
| return Ok(()); | ||
| } | ||
| assert_eq!(states.len(), 1, "array_agg states must be singleton!"); | ||
| let array = &states[0]; | ||
| let list_array = array.as_list::<i32>(); | ||
| let inner_array = list_array.value(0); | ||
| self.update_batch(&[inner_array]) | ||
| for inner_array in list_array.iter() { | ||
| let inner_array = inner_array | ||
| .expect("counts are always non null, so are intermediate results"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed this when updating my local repo .... is expect something that should be used here ... my understanding that it panics on None. Given the method returns Result I would expect err to be returned instead - am I missing something in my understanding of Rust here?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It panics, but it is fine if it is ensured to be non-null. I am looking into how the array was built in the above comment but failed. 😢
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good point and I think it would be a better UX to avoid panic'ing even if something "impossible" happens. I made the change in #9712 |
||
| self.update_batch(&[inner_array])?; | ||
| } | ||
| Ok(()) | ||
| } | ||
|
|
||
| fn evaluate(&mut self) -> Result<ScalarValue> { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -280,3 +280,70 @@ ORDER BY | |
| 2023-12-20T01:20:00 1000 f2 foo | ||
| 2023-12-20T01:30:00 1000 f1 32.0 | ||
| 2023-12-20T01:30:00 1000 f2 foo | ||
|
|
||
| # Cleanup | ||
| statement ok | ||
| drop view m1; | ||
|
|
||
| statement ok | ||
| drop view m2; | ||
|
|
||
| ###### | ||
| # Create a table using UNION ALL to get 2 partitions (very important) | ||
| ###### | ||
| statement ok | ||
| create table m3_source as | ||
| select * from (values('foo', 'bar', 1)) | ||
| UNION ALL | ||
| select * from (values('foo', 'baz', 1)); | ||
|
|
||
| ###### | ||
| # Now, create a table with the same data, but column2 has type `Dictionary(Int32)` to trigger the fallback code | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why does the cast to the dictionary trigger the fallback code? Does it refer to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. specifically, why the key of dict is the sub group index after casting? 🤔 group 1: "a", "b", we get
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The reason the dictionary triggers Dictionary encoded columns run this path
What is happening is that we are doing a two phase groupby (illustated here) And so there are two different group 1 (partial): "a", "b", The merge is called to combine the results together with a two element array But I may be misunderstanding your question
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also filed #9695 to add some more coverage of array operations
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious about how and where the DictionarayArray has been built. It is quite hard to trace the previous caller of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it comes from emitting
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems converted at this point already |
||
| ###### | ||
| statement ok | ||
| create table m3 as | ||
| select | ||
| column1, | ||
| arrow_cast(column2, 'Dictionary(Int32, Utf8)') as "column2", | ||
| column3 | ||
| from m3_source; | ||
|
|
||
| # there are two values in column2 | ||
| query T?I rowsort | ||
| SELECT * | ||
| FROM m3; | ||
| ---- | ||
| foo bar 1 | ||
| foo baz 1 | ||
|
|
||
| # There is 1 distinct value in column1 | ||
| query I | ||
| SELECT count(distinct column1) | ||
| FROM m3 | ||
| GROUP BY column3; | ||
| ---- | ||
| 1 | ||
|
|
||
| # There are 2 distinct values in column2 | ||
| query I | ||
| SELECT count(distinct column2) | ||
| FROM m3 | ||
| GROUP BY column3; | ||
| ---- | ||
| 2 | ||
|
|
||
| # Should still get the same results when querying in the same query | ||
| query II | ||
| SELECT count(distinct column1), count(distinct column2) | ||
| FROM m3 | ||
| GROUP BY column3; | ||
| ---- | ||
| 1 2 | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this query returns |
||
|
|
||
|
|
||
| # Cleanup | ||
| statement ok | ||
| drop table m3; | ||
|
|
||
| statement ok | ||
| drop table m3_source; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the actual bug fix -- to use all rows not just the first. The rest of this PR is tests / comment improvements