Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ services:
["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule PR01,PR03,PR10 &&
archery numpydoc --allow-rule GL10,PR01,PR03,PR04,PR05,PR10,RT03,YD01 &&
/arrow/ci/scripts/python_test.sh /arrow"]

conda-python-dask:
Expand Down
18 changes: 14 additions & 4 deletions python/pyarrow/_dataset.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -532,12 +532,12 @@ cdef class InMemoryDataset(Dataset):

Parameters
----------
source : The data for this dataset.
Can be a RecordBatch, Table, list of
RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader.
source : RecordBatch, Table, list, tuple
The data for this dataset. Can be a RecordBatch, Table, list of
RecordBatch/Table, iterable of RecordBatch, or a RecordBatchReader
If an iterable is provided, the schema must also be provided.
schema : Schema, optional
Only required if passing an iterable as the source.
Only required if passing an iterable as the source
"""

cdef:
Expand Down Expand Up @@ -2647,6 +2647,16 @@ cdef class Scanner(_Weakrefable):
memory_pool : MemoryPool, default None
For memory allocations, if required. If not specified, uses the
default pool.
use_threads : bool, default True
If enabled, then maximum parallelism will be used determined by
the number of available CPU cores.
use_async : bool, default True
This flag is deprecated and is being kept for this release for
backwards compatibility. It will be removed in the next
release.
memory_pool : MemoryPool, default None
For memory allocations, if required. If not specified, uses the
default pool.
"""
cdef:
shared_ptr[CScanOptions] options = make_shared[CScanOptions]()
Expand Down
17 changes: 10 additions & 7 deletions python/pyarrow/_dataset_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ cdef class ParquetFileFormat(FileFormat):
default_fragment_scan_options : ParquetFragmentScanOptions
Scan Options for the file.
**kwargs : dict
Additional options for read option or scan option.
Additional options for read option or scan option
"""

cdef:
Expand Down Expand Up @@ -236,9 +236,12 @@ class RowGroupInfo:

Parameters
----------
id : the group id.
metadata : the rowgroup metadata.
schema : schema of the rows.
id : integer
The group ID.
metadata : FileMetaData
The rowgroup metadata.
schema : Schema
Schema of the rows.
"""

def __init__(self, id, metadata, schema):
Expand Down Expand Up @@ -449,12 +452,12 @@ cdef class ParquetReadOptions(_Weakrefable):
----------
dictionary_columns : list of string, default None
Names of columns which should be dictionary encoded as
they are read.
coerce_int96_timestamp_unit : str, default None.
they are read
coerce_int96_timestamp_unit : str, default None
Cast timestamps that are stored in INT96 format to a particular
resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
and therefore INT96 timestamps will be inferred as timestamps
in nanoseconds.
in nanoseconds
"""

cdef public:
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ cdef class Array(_PandasConvertible):

Parameters
----------
null_encoding
null_encoding : str, default "mask"
How to handle null entries.

Returns
Expand Down Expand Up @@ -1265,7 +1265,7 @@ cdef class Array(_PandasConvertible):

Parameters
----------
fill_value
fill_value : any
The replacement value for null entries.

Returns
Expand Down Expand Up @@ -1363,7 +1363,7 @@ cdef class Array(_PandasConvertible):
----------
mask : Array or array-like
The boolean mask to filter the array with.
null_selection_behavior
null_selection_behavior : str, default "drop"
How nulls in the mask should be handled.

Returns
Expand Down
9 changes: 8 additions & 1 deletion python/pyarrow/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ def cast(arr, target_type=None, safe=None, options=None):
Returns
-------
casted : Array
The cast result as a new Array
"""
safe_vars_passed = (safe is not None) or (target_type is not None)

Expand Down Expand Up @@ -452,6 +453,7 @@ def take(data, indices, *, boundscheck=True, memory_pool=None):
Returns
-------
result : depends on inputs
Selected values for the given indices

Examples
--------
Expand Down Expand Up @@ -490,6 +492,7 @@ def fill_null(values, fill_value):
Returns
-------
result : depends on inputs
Values with all null elements replaced

Examples
--------
Expand Down Expand Up @@ -534,7 +537,8 @@ def top_k_unstable(values, k, sort_keys=None, *, memory_pool=None):

Returns
-------
result : Array of indices
result : Array
Indices of the top-k ordered elements

Examples
--------
Expand Down Expand Up @@ -581,6 +585,7 @@ def bottom_k_unstable(values, k, sort_keys=None, *, memory_pool=None):
Returns
-------
result : Array of indices
Indices of the bottom-k ordered elements

Examples
--------
Expand Down Expand Up @@ -650,6 +655,7 @@ def field(*name_or_index):
Returns
-------
field_expr : Expression
Reference to the given field

Examples
--------
Expand Down Expand Up @@ -691,5 +697,6 @@ def scalar(value):
Returns
-------
scalar_expr : Expression
An Expression representing the scalar value
"""
return Expression._scalar(value)
2 changes: 2 additions & 0 deletions python/pyarrow/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def partitioning(schema=None, field_names=None, flavor=None,
Returns
-------
Partitioning or PartitioningFactory
The partioning scheme

Examples
--------
Expand Down Expand Up @@ -524,6 +525,7 @@ def parquet_dataset(metadata_path, schema=None, filesystem=None, format=None,
Returns
-------
FileSystemDataset
The dataset corresponding to the given metadata
"""
from pyarrow.fs import LocalFileSystem, _ensure_filesystem

Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def read_feather(source, columns=None, use_threads=True,
Returns
-------
df : pandas.DataFrame
The contents of the Feather file as a pandas.DataFrame
"""
return (read_table(
source, columns=columns, memory_map=memory_map,
Expand All @@ -246,6 +247,7 @@ def read_table(source, columns=None, memory_map=False, use_threads=True):
Returns
-------
table : pyarrow.Table
The contents of the Feather file as a pyarrow.Table
"""
reader = _feather.FeatherReader(
source, use_memory_map=memory_map, use_threads=use_threads)
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class FSSpecHandler(FileSystemHandler):

Parameters
----------
fs : FSSpec-compliant filesystem instance.
fs : FSSpec-compliant filesystem instance

Examples
--------
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2207,7 +2207,7 @@ def input_stream(source, compression='detect', buffer_size=None):

Parameters
----------
source : str, Path, buffer, file-like object, ...
source : str, Path, buffer, or file-like object
The source to open for reading.
compression : str optional, default 'detect'
The compression algorithm to use for on-the-fly decompression.
Expand Down Expand Up @@ -2259,7 +2259,7 @@ def output_stream(source, compression='detect', buffer_size=None):

Parameters
----------
source : str, Path, buffer, file-like object, ...
source : str, Path, buffer, file-like object
The source to open for writing.
compression : str optional, default 'detect'
The compression algorithm to use for on-the-fly compression.
Expand Down
39 changes: 24 additions & 15 deletions python/pyarrow/ipc.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,16 @@ class WriteStats(_WriteStats):

Parameters
----------
num_messages : number of messages.
num_record_batches : number of record batches.
num_dictionary_batches : number of dictionary batches.
num_dictionary_deltas : delta of dictionaries.
num_replaced_dictionaries : number of replaced dictionaries.
num_messages : int
Number of messages.
num_record_batches : int
Number of record batches.
num_dictionary_batches : int
Number of dictionary batches.
num_dictionary_deltas : int
Delta of dictionaries.
num_replaced_dictionaries : int
Number of replaced dictionaries.
"""
__slots__ = ()

Expand All @@ -84,11 +89,16 @@ class ReadStats(_ReadStats):

Parameters
----------
num_messages : number of messages.
num_record_batches : number of record batches.
num_dictionary_batches : number of dictionary batches.
num_dictionary_deltas : delta of dictionaries.
num_replaced_dictionaries : number of replaced dictionaries.
num_messages : int
Number of messages.
num_record_batches : int
Number of record batches.
num_dictionary_batches : int
Number of dictionary batches.
num_dictionary_deltas : int
Delta of dictionaries.
num_replaced_dictionaries : int
Number of replaced dictionaries.
"""
__slots__ = ()

Expand All @@ -106,16 +116,15 @@ cdef class IpcReadOptions(_Weakrefable):

Parameters
----------
ensure_native_endian : bool
ensure_native_endian : bool, default True
Whether to convert incoming data to platform-native endianness.
Default is true.
use_threads : bool
Whether to use the global CPU thread pool to parallelize any
computational tasks like decompression.
computational tasks like decompression
included_fields : list
If empty (the default), return all deserialized fields.
If non-empty, the values are the indices of fields to read on
the top-level schema.
the top-level schema
"""
__slots__ = ()

Expand Down Expand Up @@ -411,7 +420,7 @@ cdef class MessageReader(_Weakrefable):

Parameters
----------
source
source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
A readable source, like an InputStream
"""
cdef:
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def new_stream(sink, schema, *, use_legacy_format=None, options=None):
Returns
-------
writer : RecordBatchStreamWriter
A writer for the given sink
""".format(_ipc_writer_class_doc)


Expand All @@ -180,9 +181,11 @@ def open_stream(source, *, options=None, memory_pool=None):
If None, default values will be used.
memory_pool : MemoryPool, default None
If None, default memory pool is used.

Returns
-------
reader : RecordBatchStreamReader
A reader for the given source
"""
return RecordBatchStreamReader(source, options=options,
memory_pool=memory_pool)
Expand All @@ -202,6 +205,7 @@ def new_file(sink, schema, *, use_legacy_format=None, options=None):
Returns
-------
writer : RecordBatchFileWriter
A writer for the given sink
""".format(_ipc_writer_class_doc)


Expand All @@ -221,9 +225,11 @@ def open_file(source, footer_offset=None, *, options=None, memory_pool=None):
If None, default values will be used.
memory_pool : MemoryPool, default None
If None, default memory pool is used.

Returns
-------
reader : RecordBatchFileReader
A reader for the given source
"""
return RecordBatchFileReader(
source, footer_offset=footer_offset,
Expand Down Expand Up @@ -271,6 +277,7 @@ def deserialize_pandas(buf, *, use_threads=True):
Returns
-------
df : pandas.DataFrame
The buffer deserialized as pandas DataFrame
"""
buffer_reader = pa.BufferReader(buf)
with pa.RecordBatchStreamReader(buffer_reader) as reader:
Expand Down
Loading