From 17bcde91442e5fc8dc6215f34971f64a73e40c4b Mon Sep 17 00:00:00 2001 From: Michael Edwards Date: Wed, 28 Apr 2021 19:06:10 +0200 Subject: [PATCH 1/3] sort_primitive result is capped to the min of limit or values.len fixes #235 --- arrow/src/compute/kernels/sort.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 30341b6f63a6..99dcfc98bf39 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -495,13 +495,12 @@ where } // collect results directly into a buffer instead of a vec to avoid another aligned allocation - let mut result = MutableBuffer::new(values.len() * std::mem::size_of::()); + let result_capacity = len * std::mem::size_of::(); + let mut result = MutableBuffer::new(result_capacity); // sets len to capacity so we can access the whole buffer as a typed slice - result.resize(values.len() * std::mem::size_of::(), 0); + result.resize(result_capacity, 0); let result_slice: &mut [u32] = result.typed_data_mut(); - debug_assert_eq!(result_slice.len(), nulls_len + valids_len); - if options.nulls_first { let size = nulls_len.min(len); result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls); @@ -1556,6 +1555,17 @@ mod tests { Some(3), vec![Some(1.0), Some(2.0), Some(3.0)], ); + + // limit that includes some extra nulls + test_sort_primitive_arrays::( + vec![Some(2.0), None, None, Some(1.0)], + Some(SortOptions { + descending: false, + nulls_first: false, + }), + Some(3), + vec![Some(1.0), Some(2.0), None], + ); } #[test] From 7bc352391e2ec78cffea43e86447876d10cdc275 Mon Sep 17 00:00:00 2001 From: Michael Edwards Date: Thu, 29 Apr 2021 15:47:28 +0200 Subject: [PATCH 2/3] Fixed length calculation of nulls to include --- arrow/src/compute/kernels/sort.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 99dcfc98bf39..ba5c2dbd1d54 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -487,9 +487,13 @@ where len = limit.min(len); } if !descending { - sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1)); + sort_by(&mut valids, len.saturating_sub(nulls_len), |a, b| { + cmp(a.1, b.1) + }); } else { - sort_by(&mut valids, len - nulls_len, |a, b| cmp(a.1, b.1).reverse()); + sort_by(&mut valids, len.saturating_sub(nulls_len), |a, b| { + cmp(a.1, b.1).reverse() + }); // reverse to keep a stable ordering nulls.reverse(); } @@ -503,7 +507,7 @@ where if options.nulls_first { let size = nulls_len.min(len); - result_slice[0..nulls_len.min(len)].copy_from_slice(&nulls); + result_slice[0..size].copy_from_slice(&nulls[0..size]); if nulls_len < len { insert_valid_values(result_slice, nulls_len, &valids[0..len - size]); } @@ -1566,6 +1570,17 @@ mod tests { Some(3), vec![Some(1.0), Some(2.0), None], ); + + // too many nulls + test_sort_primitive_arrays::( + vec![Some(2.0), None, None, None], + Some(SortOptions { + descending: false, + nulls_first: true, + }), + Some(2), + vec![None, None], + ); } #[test] From e5840d7c887071e457372dc8ec74ea4fbfe8f06e Mon Sep 17 00:00:00 2001 From: Michael Edwards Date: Fri, 30 Apr 2021 15:17:04 +0200 Subject: [PATCH 3/3] Add more sort_primitive tests for sorts /w limit --- arrow/src/compute/kernels/sort.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index ba5c2dbd1d54..9287425bf126 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -1560,7 +1560,7 @@ mod tests { vec![Some(1.0), Some(2.0), Some(3.0)], ); - // limit that includes some extra nulls + // valid values less than limit with extra nulls test_sort_primitive_arrays::( vec![Some(2.0), None, None, Some(1.0)], Some(SortOptions { @@ -1571,7 +1571,17 @@ mod tests { vec![Some(1.0), Some(2.0), None], ); - // too many nulls + test_sort_primitive_arrays::( + vec![Some(2.0), None, None, Some(1.0)], + Some(SortOptions { + descending: false, + nulls_first: true, + }), + Some(3), + vec![None, None, Some(1.0)], + ); + + // more nulls than limit test_sort_primitive_arrays::( vec![Some(2.0), None, None, None], Some(SortOptions { @@ -1581,6 +1591,16 @@ mod tests { Some(2), vec![None, None], ); + + test_sort_primitive_arrays::( + vec![Some(2.0), None, None, None], + Some(SortOptions { + descending: false, + nulls_first: false, + }), + Some(2), + vec![Some(2.0), None], + ); } #[test]