Skip to content

Commit f993b08

Browse files
committed
Copy edit docs obsessively
1 parent 5c8c1ba commit f993b08

File tree

1 file changed

+23
-16
lines changed

1 file changed

+23
-16
lines changed

parquet/src/arrow/arrow_reader/statistics.rs

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,39 +1046,41 @@ where
10461046

10471047
/// Extracts Parquet statistics as Arrow arrays
10481048
///
1049-
/// This is used to convert Parquet statistics to Arrow arrays, with proper type
1050-
/// conversions. This information can be used for pruning parquet files or row
1051-
/// groups based on the statistics embedded in parquet files
1049+
/// This is used to convert Parquet statistics to Arrow [`ArrayRef`], with
1050+
/// proper type conversions. This information can be used for pruning Parquet
1051+
/// files, row groups, and data pages based on the statistics embedded in
1052+
/// Parquet metadata.
10521053
///
10531054
/// # Schemas
10541055
///
1055-
/// The schema of the parquet file and the arrow schema are used to convert the
1056-
/// underlying statistics value (stored as a parquet value) into the
1057-
/// corresponding Arrow value. For example, Decimals are stored as binary in
1058-
/// parquet files.
1056+
/// The converter ues the schema of the Parquet file and the Arrow schema to
1057+
/// convert the underlying statistics value (stored as a parquet value) into the
1058+
/// corresponding Arrow value. For example, Decimals are stored as binary in
1059+
/// parquet files and this structure handles mapping them to the `i128`
1060+
/// representation used in Arrow.
10591061
///
1060-
/// The parquet_schema and arrow_schema do not have to be identical (for
1062+
/// Note: The Parquet schema and Arrow schema do not have to be identical (for
10611063
/// example, the columns may be in different orders and one or the other schemas
10621064
/// may have additional columns). The function [`parquet_column`] is used to
1063-
/// match the column in the parquet file to the column in the arrow schema.
1065+
/// match the column in the Parquet schema to the column in the Arrow schema.
10641066
#[derive(Debug)]
10651067
pub struct StatisticsConverter<'a> {
1066-
/// the index of the matched column in the parquet schema
1068+
/// the index of the matched column in the Parquet schema
10671069
parquet_column_index: Option<usize>,
1068-
/// The field (with data type) of the column in the arrow schema
1070+
/// The field (with data type) of the column in the Arrow schema
10691071
arrow_field: &'a Field,
10701072
}
10711073

10721074
impl<'a> StatisticsConverter<'a> {
1073-
/// Return the index of the column in the parquet schema, if any
1075+
/// Return the index of the column in the Parquet schema, if any
10741076
///
10751077
/// Returns `None` if the column is was present in the Arrow schema, but not
10761078
/// present in the parquet file
10771079
pub fn parquet_column_index(&self) -> Option<usize> {
10781080
self.parquet_column_index
10791081
}
10801082

1081-
/// Return the arrow schema's [`Field]` of the column in the arrow schema
1083+
/// Return the arrow schema's [`Field]` of the column in the Arrow schema
10821084
pub fn arrow_field(&self) -> &'a Field {
10831085
self.arrow_field
10841086
}
@@ -1093,7 +1095,7 @@ impl<'a> StatisticsConverter<'a> {
10931095
/// # Example
10941096
/// ```no_run
10951097
/// # use arrow::datatypes::Schema;
1096-
/// # use arrow_array::ArrayRef;
1098+
/// # use arrow_array::{ArrayRef, UInt64Array};
10971099
/// # use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
10981100
/// # use parquet::file::metadata::ParquetMetaData;
10991101
/// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() }
@@ -1109,7 +1111,9 @@ impl<'a> StatisticsConverter<'a> {
11091111
/// let row_counts = converter.row_group_row_counts(metadata
11101112
/// .row_groups()
11111113
/// .iter()
1112-
/// );
1114+
/// ).unwrap();
1115+
/// // file had 2 row groups, with 1024 and 23 rows respectively
1116+
/// assert_eq!(row_counts, Some(UInt64Array::from(vec![1024, 23])));
11131117
/// ```
11141118
pub fn row_group_row_counts<I>(&self, metadatas: I) -> Result<Option<UInt64Array>>
11151119
where
@@ -1199,8 +1203,9 @@ impl<'a> StatisticsConverter<'a> {
11991203
///
12001204
/// # Example
12011205
/// ```no_run
1206+
/// # use std::sync::Arc;
12021207
/// # use arrow::datatypes::Schema;
1203-
/// # use arrow_array::ArrayRef;
1208+
/// # use arrow_array::{ArrayRef, Float64Array};
12041209
/// # use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
12051210
/// # use parquet::file::metadata::ParquetMetaData;
12061211
/// # fn get_parquet_metadata() -> ParquetMetaData { unimplemented!() }
@@ -1216,6 +1221,8 @@ impl<'a> StatisticsConverter<'a> {
12161221
/// let min_values: ArrayRef = converter
12171222
/// .row_group_mins(metadata.row_groups().iter())
12181223
/// .unwrap();
1224+
/// // if "foo" is a Float64 value, the returned array will contain Float64 values
1225+
/// assert_eq!(min_values, Arc::new(Float64Array::from(vec![Some(1.0), Some(2.0)])) as _);
12191226
/// ```
12201227
pub fn row_group_mins<I>(&self, metadatas: I) -> Result<ArrayRef>
12211228
where

0 commit comments

Comments
 (0)