From f6bc3cc6a83eefbee0d958778d72c81f13d8e779 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 26 Aug 2022 16:55:56 -0800
Subject: [PATCH 01/23] Add and fix numpydoc check PR03 in pyarrow

---
 python/pyarrow/_csv.pyx     | 16 ++++++++--------
 python/pyarrow/_dataset.pyx | 26 +++++++++++++++++++++++---
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 0ac32f1bbf26..a3da98f25286 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -104,14 +104,6 @@ cdef class ReadOptions(_Weakrefable):
     skip_rows : int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
-    skip_rows_after_names : int, optional (default 0)
-        The number of rows to skip after the column names.
-        This number can be larger than the number of rows in one
-        block, and empty rows are counted.
-        The order of application is as follows:
-        - `skip_rows` is applied (if non-zero);
-        - column names aread (unless `column_names` is set);
-        - `skip_rows_after_names` is applied (if non-zero).
     column_names : list, optional
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
@@ -123,6 +115,14 @@ cdef class ReadOptions(_Weakrefable):
     encoding : str, optional (default 'utf8')
         The character encoding of the CSV data.  Columns that cannot
         decode using this encoding can still be read as Binary.
+    skip_rows_after_names : int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
 
     Examples
     --------
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 42781ff2aae1..33b1d39edf22 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2513,6 +2513,16 @@ cdef class Scanner(_Weakrefable):
         ----------
         dataset : Dataset
             Dataset to scan.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        use_async : bool, default True
+            This flag is deprecated and is being kept for this release for
+            backwards compatibility.  It will be removed in the next
+            release.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
         columns : list of str, default None
             The columns to project. This can be a list of column names to
             include (order and duplicates will be preserved), or a dictionary
@@ -2601,6 +2611,16 @@ cdef class Scanner(_Weakrefable):
             fragment to scan.
         schema : Schema, optional
             The schema of the fragment.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        use_async : bool, default True
+            This flag is deprecated and is being kept for this release for
+            backwards compatibility.  It will be removed in the next
+            release.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
         columns : list of str, default None
             The columns to project. This can be a list of column names to
             include (order and duplicates will be preserved), or a dictionary
@@ -2634,9 +2654,6 @@ cdef class Scanner(_Weakrefable):
             The number of batches to read ahead in a file. This might not work
             for all file formats. Increasing this number will increase
             RAM usage but could also improve IO utilization.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
@@ -2647,6 +2664,9 @@ cdef class Scanner(_Weakrefable):
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
         """
         cdef:
             shared_ptr[CScanOptions] options = make_shared[CScanOptions]()

From 8f55098adf5d712b3fee22517318b3950e918084 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 29 Aug 2022 14:07:30 -0800
Subject: [PATCH 02/23] Re-arrange order of skip_rows and skip_rows_after_names

---
 python/pyarrow/_csv.pyx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index a3da98f25286..0ac32f1bbf26 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -104,6 +104,14 @@ cdef class ReadOptions(_Weakrefable):
     skip_rows : int, optional (default 0)
         The number of rows to skip before the column names (if any)
         and the CSV data.
+    skip_rows_after_names : int, optional (default 0)
+        The number of rows to skip after the column names.
+        This number can be larger than the number of rows in one
+        block, and empty rows are counted.
+        The order of application is as follows:
+        - `skip_rows` is applied (if non-zero);
+        - column names aread (unless `column_names` is set);
+        - `skip_rows_after_names` is applied (if non-zero).
     column_names : list, optional
         The column names of the target table.  If empty, fall back on
         `autogenerate_column_names`.
@@ -115,14 +123,6 @@ cdef class ReadOptions(_Weakrefable):
     encoding : str, optional (default 'utf8')
         The character encoding of the CSV data.  Columns that cannot
         decode using this encoding can still be read as Binary.
-    skip_rows_after_names : int, optional (default 0)
-        The number of rows to skip after the column names.
-        This number can be larger than the number of rows in one
-        block, and empty rows are counted.
-        The order of application is as follows:
-        - `skip_rows` is applied (if non-zero);
-        - column names aread (unless `column_names` is set);
-        - `skip_rows_after_names` is applied (if non-zero).
 
     Examples
     --------

From 4a73ce8bcce8aeaf96bf577f0f685b93e9875bc6 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 14 Sep 2022 10:07:00 -0800
Subject: [PATCH 03/23] Clean up arg order in pyarrow Dataset methods

---
 python/pyarrow/_dataset.pyx | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 33b1d39edf22..406833b125cd 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2513,16 +2513,6 @@ cdef class Scanner(_Weakrefable):
         ----------
         dataset : Dataset
             Dataset to scan.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        use_async : bool, default True
-            This flag is deprecated and is being kept for this release for
-            backwards compatibility.  It will be removed in the next
-            release.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
         columns : list of str, default None
             The columns to project. This can be a list of column names to
             include (order and duplicates will be preserved), or a dictionary
@@ -2611,16 +2601,6 @@ cdef class Scanner(_Weakrefable):
             fragment to scan.
         schema : Schema, optional
             The schema of the fragment.
-        use_threads : bool, default True
-            If enabled, then maximum parallelism will be used determined by
-            the number of available CPU cores.
-        use_async : bool, default True
-            This flag is deprecated and is being kept for this release for
-            backwards compatibility.  It will be removed in the next
-            release.
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required. If not specified, uses the
-            default pool.
         columns : list of str, default None
             The columns to project. This can be a list of column names to
             include (order and duplicates will be preserved), or a dictionary
@@ -2667,6 +2647,16 @@ cdef class Scanner(_Weakrefable):
         fragment_scan_options : FragmentScanOptions, default None
             Options specific to a particular scan and fragment type, which
             can change between different scans of the same dataset.
+        use_threads : bool, default True
+            If enabled, then maximum parallelism will be used determined by
+            the number of available CPU cores.
+        use_async : bool, default True
+            This flag is deprecated and is being kept for this release for
+            backwards compatibility.  It will be removed in the next
+            release.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required. If not specified, uses the
+            default pool.
         """
         cdef:
             shared_ptr[CScanOptions] options = make_shared[CScanOptions]()

From 61fcbf1def81f8c5e0de6222deb26757ea4305ab Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 14 Sep 2022 13:29:57 -0800
Subject: [PATCH 04/23] Fixes for YD01

---
 python/pyarrow/parquet/core.py | 4 ++--
 python/pyarrow/plasma.py       | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index e6148be0f896..e738236bf53a 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -541,9 +541,9 @@ def iter_batches(self, batch_size=65536, row_groups=None, columns=None,
             If True and file has custom pandas schema metadata, ensure that
             index columns are also loaded.
 
-        Returns
+        Yields
         -------
-        iterator of pyarrow.RecordBatch
+        pyarrow.RecordBatch
             Contents of each batch as a record batch
 
         Examples
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index 5c2c6543418d..00342765557d 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -108,11 +108,12 @@ def start_plasma_store(plasma_store_memory,
     external_store : str
         External store to use for evicted objects.
 
-    Returns
+    Yields
     -------
-    result : (str, subprocess.Popen)
-        A tuple of the name of the plasma store socket and the process ID of
-        the plasma store process.
+    plasma_store_name : str
+        Name of the plasma store socket
+    proc : subprocess.Popen
+        Process ID of the plasma store process
     """
     warnings.warn(
         "Plasma is deprecated since Arrow 10.0.0. It will be removed in "

From 5007824cdf829cc3235679f0150b0c2499922e9a Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 14 Sep 2022 13:30:15 -0800
Subject: [PATCH 05/23] Fixes for RT03

---
 python/pyarrow/compute.py      | 9 ++++++++-
 python/pyarrow/dataset.py      | 2 ++
 python/pyarrow/feather.py      | 2 ++
 python/pyarrow/ipc.py          | 3 +++
 python/pyarrow/parquet/core.py | 7 ++++++-
 5 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 265d75f6f6b0..1ee6c40f4232 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -374,6 +374,7 @@ def cast(arr, target_type=None, safe=None, options=None):
     Returns
     -------
     casted : Array
+        The cast result as a new Array
     """
     safe_vars_passed = (safe is not None) or (target_type is not None)
 
@@ -452,6 +453,7 @@ def take(data, indices, *, boundscheck=True, memory_pool=None):
     Returns
     -------
     result : depends on inputs
+        Selected values for the given indices
 
     Examples
     --------
@@ -490,6 +492,7 @@ def fill_null(values, fill_value):
     Returns
     -------
     result : depends on inputs
+        Values with all null elements replaced
 
     Examples
     --------
@@ -534,7 +537,8 @@ def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
 
     Returns
     -------
-    result : Array of indices
+    result : Array
+        Indices of the top-k ordered elements
 
     Examples
     --------
@@ -581,6 +585,7 @@ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
     Returns
     -------
     result : Array of indices
+        Indices of the bottom-k ordered elements
 
     Examples
     --------
@@ -650,6 +655,7 @@ def field(*name_or_index):
     Returns
     -------
     field_expr : Expression
+        Reference to the given field
 
     Examples
     --------
@@ -691,5 +697,6 @@ def scalar(value):
     Returns
     -------
     scalar_expr : Expression
+        An Expression representing the scalar value
     """
     return Expression._scalar(value)
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index adbf064a736c..de9469de445b 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -151,6 +151,7 @@ def partitioning(schema=None, field_names=None, flavor=None,
     Returns
     -------
     Partitioning or PartitioningFactory
+        The partioning scheme
 
     Examples
     --------
@@ -524,6 +525,7 @@ def parquet_dataset(metadata_path, schema=None, filesystem=None, format=None,
     Returns
     -------
     FileSystemDataset
+        The dataset corresponding to the given metadata
     """
     from pyarrow.fs import LocalFileSystem, _ensure_filesystem
 
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 54a16a2f89d3..fbd060259700 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -221,6 +221,7 @@ def read_feather(source, columns=None, use_threads=True,
     Returns
     -------
     df : pandas.DataFrame
+        The contents of the Feather file as a pandas.DataFrame
     """
     return (read_table(
         source, columns=columns, memory_map=memory_map,
@@ -246,6 +247,7 @@ def read_table(source, columns=None, memory_map=False, use_threads=True):
     Returns
     -------
     table : pyarrow.Table
+        The contents of the Feather file as a pyarrow.Table
     """
     reader = _feather.FeatherReader(
         source, use_memory_map=memory_map, use_threads=use_threads)
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index fc724109d94c..9006637dc477 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -164,6 +164,7 @@ def new_stream(sink, schema, *, use_legacy_format=None, options=None):
 Returns
 -------
 writer : RecordBatchStreamWriter
+    A writer for the given sink
 """.format(_ipc_writer_class_doc)
 
 
@@ -202,6 +203,7 @@ def new_file(sink, schema, *, use_legacy_format=None, options=None):
 Returns
 -------
 writer : RecordBatchFileWriter
+    A writer for the given sink
 """.format(_ipc_writer_class_doc)
 
 
@@ -271,6 +273,7 @@ def deserialize_pandas(buf, *, use_threads=True):
     Returns
     -------
     df : pandas.DataFrame
+        The buffer deserialized as pandas DataFrame
     """
     buffer_reader = pa.BufferReader(buf)
     with pa.RecordBatchStreamReader(buffer_reader) as reader:
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index e738236bf53a..837e35fae8da 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -645,7 +645,8 @@ def scan_contents(self, columns=None, batch_size=65536):
 
         Returns
         -------
-        num_rows : number of rows in file
+        num_rows : int
+            Number of rows in file
 
         Examples
         --------
@@ -1186,6 +1187,7 @@ def get_metadata(self):
         Returns
         -------
         metadata : FileMetaData
+            The file's metadata
         """
         with self.open() as parquet:
             return parquet.metadata
@@ -1222,6 +1224,7 @@ def read(self, columns=None, use_threads=True, partitions=None,
         Returns
         -------
         table : pyarrow.Table
+            The pierce as a pyarrow.Table
         """
         if self.open_file_func is not None:
             reader = self.open()
@@ -3560,6 +3563,7 @@ def read_metadata(where, memory_map=False, decryption_properties=None,
     Returns
     -------
     metadata : FileMetaData
+        The metadata of the Parquet file
 
     Examples
     --------
@@ -3609,6 +3613,7 @@ def read_schema(where, memory_map=False, decryption_properties=None,
     Returns
     -------
     schema : pyarrow.Schema
+        The schema of the Parquet file
 
     Examples
     --------

From 88a078dbb48aeb29b3796360b2d00d95f31baa6b Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Thu, 15 Sep 2022 13:16:17 -0800
Subject: [PATCH 06/23] Fix PR05

---
 python/pyarrow/_dataset.pyx         |  6 +++---
 python/pyarrow/_dataset_parquet.pyx | 18 +++++++++---------
 python/pyarrow/fs.py                |  2 +-
 python/pyarrow/io.pxi               |  4 ++--
 python/pyarrow/ipc.pxi              | 26 +++++++++++++-------------
 python/pyarrow/parquet/core.py      |  8 ++++----
 6 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 406833b125cd..2d39f9e37b1d 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -532,12 +532,12 @@ cdef class InMemoryDataset(Dataset):
 
     Parameters
     ----------
-    source : The data for this dataset.
+    source : The data for this dataset
         Can be a RecordBatch, Table, list of
-        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader.
+        RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
         If an iterable is provided, the schema must also be provided.
     schema : Schema, optional
-        Only required if passing an iterable as the source.
+        Only required if passing an iterable as the source
     """
 
     cdef:
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index 744bfac6bfb5..eefeb49afcf7 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -67,11 +67,11 @@ cdef class ParquetFileFormat(FileFormat):
     Parameters
     ----------
     read_options : ParquetReadOptions
-        Read options for the file.
+        Read options for the file
     default_fragment_scan_options : ParquetFragmentScanOptions
-        Scan Options for the file.
+        Scan Options for the file
     **kwargs : dict
-        Additional options for read option or scan option.
+        Additional options for read option or scan option
     """
 
     cdef:
@@ -236,9 +236,9 @@ class RowGroupInfo:
 
     Parameters
     ----------
-    id : the group id.
-    metadata : the rowgroup metadata.
-    schema : schema of the rows.
+    id : the group id
+    metadata : the rowgroup metadata
+    schema : schema of the rows
     """
 
     def __init__(self, id, metadata, schema):
@@ -449,12 +449,12 @@ cdef class ParquetReadOptions(_Weakrefable):
     ----------
     dictionary_columns : list of string, default None
         Names of columns which should be dictionary encoded as
-        they are read.
-    coerce_int96_timestamp_unit : str, default None.
+        they are read
+    coerce_int96_timestamp_unit : str, default None
         Cast timestamps that are stored in INT96 format to a particular
         resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
         and therefore INT96 timestamps will be inferred as timestamps
-        in nanoseconds.
+        in nanoseconds
     """
 
     cdef public:
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index ab151bc5d8f6..21db243528c7 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -281,7 +281,7 @@ class FSSpecHandler(FileSystemHandler):
 
     Parameters
     ----------
-    fs : FSSpec-compliant filesystem instance.
+    fs : FSSpec-compliant filesystem instance
 
     Examples
     --------
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index f1a1b315f681..21c17b4d36dc 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -2207,7 +2207,7 @@ def input_stream(source, compression='detect', buffer_size=None):
 
     Parameters
     ----------
-    source : str, Path, buffer, file-like object, ...
+    source : str, Path, buffer, or file-like object
         The source to open for reading.
     compression : str optional, default 'detect'
         The compression algorithm to use for on-the-fly decompression.
@@ -2259,7 +2259,7 @@ def output_stream(source, compression='detect', buffer_size=None):
 
     Parameters
     ----------
-    source : str, Path, buffer, file-like object, ...
+    source : str, Path, buffer, file-like object
         The source to open for writing.
     compression : str optional, default 'detect'
         The compression algorithm to use for on-the-fly compression.
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 35a07d8737b5..2a40a51e9441 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -57,11 +57,11 @@ class WriteStats(_WriteStats):
 
     Parameters
     ----------
-    num_messages : number of messages.
-    num_record_batches : number of record batches.
-    num_dictionary_batches : number of dictionary batches.
-    num_dictionary_deltas : delta of dictionaries.
-    num_replaced_dictionaries : number of replaced dictionaries.
+    num_messages : number of messages
+    num_record_batches : number of record batches
+    num_dictionary_batches : number of dictionary batches
+    num_dictionary_deltas : delta of dictionaries
+    num_replaced_dictionaries : number of replaced dictionaries
     """
     __slots__ = ()
 
@@ -84,11 +84,11 @@ class ReadStats(_ReadStats):
 
     Parameters
     ----------
-    num_messages : number of messages.
-    num_record_batches : number of record batches.
-    num_dictionary_batches : number of dictionary batches.
-    num_dictionary_deltas : delta of dictionaries.
-    num_replaced_dictionaries : number of replaced dictionaries.
+    num_messages : number of messages
+    num_record_batches : number of record batches
+    num_dictionary_batches : number of dictionary batches
+    num_dictionary_deltas : delta of dictionaries
+    num_replaced_dictionaries : number of replaced dictionaries
     """
     __slots__ = ()
 
@@ -108,14 +108,14 @@ cdef class IpcReadOptions(_Weakrefable):
     ----------
     ensure_native_endian : bool
         Whether to convert incoming data to platform-native endianness.
-        Default is true.
+        Default is true
     use_threads : bool
         Whether to use the global CPU thread pool to parallelize any
-        computational tasks like decompression.
+        computational tasks like decompression
     included_fields : list
         If empty (the default), return all deserialized fields.
         If non-empty, the values are the indices of fields to read on
-        the top-level schema.
+        the top-level schema
     """
     __slots__ = ()
 
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 837e35fae8da..dd9a426d2cb1 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -242,7 +242,7 @@ class ParquetFile:
         Coalesce and issue file reads in parallel to improve performance on
         high-latency filesystems (e.g. S3). If True, Arrow will use a
         background I/O thread pool.
-    coerce_int96_timestamp_unit : str, default None.
+    coerce_int96_timestamp_unit : str, default None
         Cast timestamps that are stored in INT96 format to a particular
         resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
         and therefore INT96 timestamps will be inferred as timestamps
@@ -1312,7 +1312,7 @@ def get_index(self, key):
 
         Parameters
         ----------
-        key : The value for which we want to known the index.
+        key : The value for which we want to known the index
         """
         if key in self.key_indices:
             return self.key_indices[key]
@@ -1716,7 +1716,7 @@ class ParquetDataset:
     use_legacy_dataset=False. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
-coerce_int96_timestamp_unit : str, default None.
+coerce_int96_timestamp_unit : str, default None
     Cast timestamps that are stored in INT96 format to a particular resolution
     (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
     timestamps will be inferred as timestamps in nanoseconds.
@@ -2786,7 +2786,7 @@ def partitioning(self):
     use_legacy_dataset=False. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
-coerce_int96_timestamp_unit : str, default None.
+coerce_int96_timestamp_unit : str, default None
     Cast timestamps that are stored in INT96 format to a particular
     resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
     and therefore INT96 timestamps will be inferred as timestamps

From 70387316e386b510773ef1248ed89c89af5e4992 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Thu, 15 Sep 2022 13:16:28 -0800
Subject: [PATCH 07/23] Fix docstrings in pyarrow ipc.py

---
 python/pyarrow/ipc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 9006637dc477..7885636dbc7c 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -181,6 +181,7 @@ def open_stream(source, *, options=None, memory_pool=None):
         If None, default values will be used.
     memory_pool : MemoryPool, default None
         If None, default memory pool is used.
+
     Returns
     -------
     reader : RecordBatchStreamReader
@@ -223,6 +224,7 @@ def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
         If None, default values will be used.
     memory_pool : MemoryPool, default None
         If None, default memory pool is used.
+
     Returns
     -------
     reader : RecordBatchFileReader

From 1d1d2d4ad053f61ff76d7abc098c7faaa0ac1dce Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Thu, 15 Sep 2022 13:41:44 -0800
Subject: [PATCH 08/23] Fix PR04

---
 python/pyarrow/array.pxi |  6 +++---
 python/pyarrow/ipc.pxi   |  2 +-
 python/pyarrow/table.pxi | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5772592ead1f..b2dff6567737 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -984,7 +984,7 @@ cdef class Array(_PandasConvertible):
 
         Parameters
         ----------
-        null_encoding
+        null_encoding : str, default "mask"
             How to handle null entries.
 
         Returns
@@ -1265,7 +1265,7 @@ cdef class Array(_PandasConvertible):
 
         Parameters
         ----------
-        fill_value
+        fill_value : any
             The replacement value for null entries.
 
         Returns
@@ -1363,7 +1363,7 @@ cdef class Array(_PandasConvertible):
         ----------
         mask : Array or array-like
             The boolean mask to filter the array with.
-        null_selection_behavior
+        null_selection_behavior : str, default "drop"
             How nulls in the mask should be handled.
 
         Returns
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 2a40a51e9441..8ad1f2425d88 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -411,7 +411,7 @@ cdef class MessageReader(_Weakrefable):
 
         Parameters
         ----------
-        source
+        source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
             A readable source, like an InputStream
         """
         cdef:
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 53e841228240..bcc428a4cb29 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -366,7 +366,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
         Parameters
         ----------
-        fill_value
+        fill_value : any
             The replacement value for null entries.
 
         Returns
@@ -530,7 +530,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
         Parameters
         ----------
-        null_encoding
+        null_encoding : str, default "mask"
             How to handle null entries.
 
         Returns
@@ -853,7 +853,7 @@ cdef class ChunkedArray(_PandasConvertible):
         ----------
         mask : Array or array-like
             The boolean mask to filter the chunked array with.
-        null_selection_behavior
+        null_selection_behavior : str, default "drop"
             How nulls in the mask should be handled.
 
         Returns
@@ -2103,7 +2103,7 @@ cdef class RecordBatch(_PandasConvertible):
         ----------
         mask : Array or array-like
             The boolean mask to filter the record batch with.
-        null_selection_behavior
+        null_selection_behavior : str, default "drop"
             How nulls in the mask should be handled.
 
         Returns
@@ -2938,7 +2938,7 @@ cdef class Table(_PandasConvertible):
         ----------
         mask : Array or array-like or .Expression
             The boolean mask or the :class:`.Expression` to filter the table with.
-        null_selection_behavior
+        null_selection_behavior : str, default "drop"
             How nulls in the mask should be handled, does nothing if
             an :class:`.Expression` is used.
 

From 7c9fc851633631c51c3893d8da13f9661590c546 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 19 Sep 2022 13:02:23 -0800
Subject: [PATCH 09/23] Fixes for RT03

---
 python/pyarrow/ipc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 7885636dbc7c..523196e1e338 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -185,6 +185,7 @@ def open_stream(source, *, options=None, memory_pool=None):
     Returns
     -------
     reader : RecordBatchStreamReader
+        A reader for the given source
     """
     return RecordBatchStreamReader(source, options=options,
                                    memory_pool=memory_pool)
@@ -228,6 +229,7 @@ def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
     Returns
     -------
     reader : RecordBatchFileReader
+        A reader for the given source
     """
     return RecordBatchFileReader(
         source, footer_offset=footer_offset,

From efaaa3f9e73ee2e049c40e461a6ca659f3309c91 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 19 Sep 2022 13:04:53 -0800
Subject: [PATCH 10/23] enable numpydoc checks GL10, PR04, PR05, RT03, and YD01

---
 dev/archery/archery/cli.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 105a64c0603d..d489372fbd9c 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -331,7 +331,17 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
     archery numpydoc pyarrow.csv pyarrow.json pyarrow.parquet
     archery numpydoc pyarrow.array
     """
-    disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
+    allow_rule = allow_rule or {
+        'GL10',
+        'PR01',
+        'PR03',
+        'PR04',
+        'PR05',
+        'PR10',
+        'RT03',
+        'YD01'
+    }
+
     try:
         results = python_numpydoc(
             symbols, allow_rules=_flatten_numpydoc_rules(allow_rule),

From 1166e01626ebece9f2c6b6545bf7f98dbafdedc5 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 19 Sep 2022 13:08:53 -0800
Subject: [PATCH 11/23] Make archery numpydoc use default behavior

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 23583d6b6542..93cf9563eac7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1107,7 +1107,7 @@ services:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         pip install -e /arrow/dev/archery[numpydoc] &&
-        archery numpydoc --allow-rule PR01,PR03,PR10 &&
+        archery numpydoc &&
         /arrow/ci/scripts/python_test.sh /arrow"]
 
   conda-python-dask:

From 6fafe445b294ab0c562c21cf82fd03d7f468fbc3 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 4 Jan 2023 12:51:00 -0900
Subject: [PATCH 12/23] Fix regression in PR03

---
 python/pyarrow/_dataset.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 2d39f9e37b1d..aad7f7d6dcc1 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -2634,6 +2634,9 @@ cdef class Scanner(_Weakrefable):
             The number of batches to read ahead in a file. This might not work
             for all file formats. Increasing this number will increase
             RAM usage but could also improve IO utilization.
+        fragment_scan_options : FragmentScanOptions, default None
+            Options specific to a particular scan and fragment type, which
+            can change between different scans of the same dataset.
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
@@ -2644,9 +2647,6 @@ cdef class Scanner(_Weakrefable):
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
-        fragment_scan_options : FragmentScanOptions, default None
-            Options specific to a particular scan and fragment type, which
-            can change between different scans of the same dataset.
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.

From 31b5c67b3bd9aeed9772884330da7aeba218b0f5 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 4 Jan 2023 12:58:06 -0900
Subject: [PATCH 13/23] Fix regression in RT03

---
 python/pyarrow/parquet/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index dd9a426d2cb1..305f6b7648dd 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -162,6 +162,7 @@ def filters_to_expression(filters):
     Returns
     -------
     pyarrow.compute.Expression
+        An Expression representing the filters
     """
     import pyarrow.dataset as ds
 

From b38c4f973c40fb1e6d618d1a984ebd7cb48db182 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 4 Jan 2023 13:07:57 -0900
Subject: [PATCH 14/23] Revert unrelated changes to archery/docker-compose

---
 dev/archery/archery/cli.py | 11 +----------
 docker-compose.yml         |  2 +-
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index d489372fbd9c..77c4362cdb49 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -331,16 +331,7 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
     archery numpydoc pyarrow.csv pyarrow.json pyarrow.parquet
     archery numpydoc pyarrow.array
     """
-    allow_rule = allow_rule or {
-        'GL10',
-        'PR01',
-        'PR03',
-        'PR04',
-        'PR05',
-        'PR10',
-        'RT03',
-        'YD01'
-    }
+    disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
 
     try:
         results = python_numpydoc(
diff --git a/docker-compose.yml b/docker-compose.yml
index 93cf9563eac7..9ef8f6dbcebc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1107,7 +1107,7 @@ services:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         pip install -e /arrow/dev/archery[numpydoc] &&
-        archery numpydoc &&
+        archery numpydoc --allow-rule GL10,PR01,PR03,PR05,PR10,RT03,YD01 &&
         /arrow/ci/scripts/python_test.sh /arrow"]
 
   conda-python-dask:

From 733b29975896dbfe5a3a5cf0d68af4df6fe88c07 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Wed, 4 Jan 2023 13:08:39 -0900
Subject: [PATCH 15/23] Whitespace

---
 dev/archery/archery/cli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 77c4362cdb49..105a64c0603d 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -332,7 +332,6 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
     archery numpydoc pyarrow.array
     """
     disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
-
     try:
         results = python_numpydoc(
             symbols, allow_rules=_flatten_numpydoc_rules(allow_rule),

From 8f42248444fbf1aece52dc1b0e4d841eee766210 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Thu, 5 Jan 2023 15:27:42 -0900
Subject: [PATCH 16/23] Add PR04 to archery job

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 9ef8f6dbcebc..ca42291dcddb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1107,7 +1107,7 @@ services:
       ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         pip install -e /arrow/dev/archery[numpydoc] &&
-        archery numpydoc --allow-rule GL10,PR01,PR03,PR05,PR10,RT03,YD01 &&
+        archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
         /arrow/ci/scripts/python_test.sh /arrow"]
 
   conda-python-dask:

From ff0c382c926b4517838103f64e1455457fd901dc Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:32:13 -0900
Subject: [PATCH 17/23] Address review comments

From https://github.com/apache/arrow/pull/15214
---
 python/pyarrow/_dataset.pyx         |  4 ++--
 python/pyarrow/_dataset_parquet.pyx | 13 ++++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index aad7f7d6dcc1..5f1610c384fb 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -532,8 +532,8 @@ cdef class InMemoryDataset(Dataset):
 
     Parameters
     ----------
-    source : The data for this dataset
-        Can be a RecordBatch, Table, list of
+    source : RecordBatch, Table, list, tuple
+        The data for this dataset. Can be a RecordBatch, Table, list of
         RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
         If an iterable is provided, the schema must also be provided.
     schema : Schema, optional
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index eefeb49afcf7..01a3b30da5ca 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -67,9 +67,9 @@ cdef class ParquetFileFormat(FileFormat):
     Parameters
     ----------
     read_options : ParquetReadOptions
-        Read options for the file
+        Read options for the file.
     default_fragment_scan_options : ParquetFragmentScanOptions
-        Scan Options for the file
+        Scan Options for the file.
     **kwargs : dict
         Additional options for read option or scan option
     """
@@ -236,9 +236,12 @@ class RowGroupInfo:
 
     Parameters
     ----------
-    id : the group id
-    metadata : the rowgroup metadata
-    schema : schema of the rows
+    id : integer
+        The group ID.
+    metadata : FileMetaData
+        The rowgroup metadata.
+    schema : Schema
+        Schema of the rows.
     """
 
     def __init__(self, id, metadata, schema):

From f1f8a8e04d607e56c3ab1a734b13570d7af4208c Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:58:05 -0900
Subject: [PATCH 18/23] Update python/pyarrow/ipc.pxi

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/ipc.pxi | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 8ad1f2425d88..318ebc05b1ff 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -57,11 +57,16 @@ class WriteStats(_WriteStats):
 
     Parameters
     ----------
-    num_messages : number of messages
-    num_record_batches : number of record batches
-    num_dictionary_batches : number of dictionary batches
-    num_dictionary_deltas : delta of dictionaries
-    num_replaced_dictionaries : number of replaced dictionaries
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
     """
     __slots__ = ()
 

From 3fd1c364beca8bc9f9c4387c6c65d2e6b13eeebc Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:58:13 -0900
Subject: [PATCH 19/23] Update python/pyarrow/ipc.pxi

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/ipc.pxi | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 318ebc05b1ff..9aa2573d80b9 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -89,11 +89,16 @@ class ReadStats(_ReadStats):
 
     Parameters
     ----------
-    num_messages : number of messages
-    num_record_batches : number of record batches
-    num_dictionary_batches : number of dictionary batches
-    num_dictionary_deltas : delta of dictionaries
-    num_replaced_dictionaries : number of replaced dictionaries
+    num_messages : int
+        Number of messages.
+    num_record_batches : int
+        Number of record batches.
+    num_dictionary_batches : int
+        Number of dictionary batches.
+    num_dictionary_deltas : int
+        Delta of dictionaries.
+    num_replaced_dictionaries : int
+        Number of replaced dictionaries.
     """
     __slots__ = ()
 

From 3325f2424df280175478e7cfab6150f2d580986f Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:58:18 -0900
Subject: [PATCH 20/23] Update python/pyarrow/ipc.pxi

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/ipc.pxi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 9aa2573d80b9..d5bb839781c1 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -116,7 +116,7 @@ cdef class IpcReadOptions(_Weakrefable):
 
     Parameters
     ----------
-    ensure_native_endian : bool
+    ensure_native_endian : bool, default True
         Whether to convert incoming data to platform-native endianness.
         Default is true
     use_threads : bool

From 058c19c2f2406b5bcbf85fedfbbf59f7046b9302 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:58:35 -0900
Subject: [PATCH 21/23] Update python/pyarrow/ipc.pxi

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/ipc.pxi | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index d5bb839781c1..6e60b8b9a058 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -118,7 +118,6 @@ cdef class IpcReadOptions(_Weakrefable):
     ----------
     ensure_native_endian : bool, default True
         Whether to convert incoming data to platform-native endianness.
-        Default is true
     use_threads : bool
         Whether to use the global CPU thread pool to parallelize any
         computational tasks like decompression

From 91fda30c7a7e87fb8771e1a4fde796f8f7d91f75 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 12:58:43 -0900
Subject: [PATCH 22/23] Update python/pyarrow/parquet/core.py

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/parquet/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 305f6b7648dd..2ddc6eb67b5d 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1313,7 +1313,8 @@ def get_index(self, key):
 
         Parameters
         ----------
-        key : The value for which we want to known the index
+        key : str or int
+            The value for which we want to known the index.
         """
         if key in self.key_indices:
             return self.key_indices[key]

From 4e3afcb1b2d32f8e955dd10bd89e45ab6977a6db Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Fri, 6 Jan 2023 13:00:18 -0900
Subject: [PATCH 23/23] Update python/pyarrow/parquet/core.py

Co-authored-by: Will Jones <willjones127@gmail.com>
---
 python/pyarrow/parquet/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 2ddc6eb67b5d..88e3cf2a677c 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -1225,7 +1225,7 @@ def read(self, columns=None, use_threads=True, partitions=None,
         Returns
         -------
         table : pyarrow.Table
-            The pierce as a pyarrow.Table
+            The piece as a pyarrow.Table.
         """
         if self.open_file_func is not None:
             reader = self.open()