diff --git a/LICENSE.txt b/LICENSE.txt index d285caa4ff2c..13842c1cb0fc 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -2178,6 +2178,14 @@ which is made available under the Apache License 2.0. -------------------------------------------------------------------------------- +The file python/pyarrow/vendored/docscrape.py contains code from + +https://github.com/numpy/numpydoc/ + +which is made available under the BSD 2-clause license. + +-------------------------------------------------------------------------------- + The file python/pyarrow/vendored/version.py contains code from https://github.com/pypa/packaging/ diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 1893d7546fcd..45dd6b79fc2c 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -305,7 +305,7 @@ class ARROW_EXPORT TrimOptions : public FunctionOptions { TrimOptions(); constexpr static char const kTypeName[] = "TrimOptions"; - /// The individual characters that can be trimmed from the string. + /// The individual characters to be trimmed from the string. std::string characters; }; diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index a6507b5a394a..489f741b6d02 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -222,7 +222,7 @@ def python_linter(src, fix=False): f"{_archery_install_msg}") return - flake8_exclude = ['.venv*'] + flake8_exclude = ['.venv*', 'vendored'] yield LintResult.from_cmd( flake8("--extend-exclude=" + ','.join(flake8_exclude), diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index a41fda206431..9f1fb8e51082 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -690,6 +690,26 @@ cdef class _CastOptions(FunctionOptions): class CastOptions(_CastOptions): + """ + Options for the `cast` function. + + Parameters + ---------- + target_type : DataType, optional + The PyArrow type to cast to. + allow_int_overflow : bool, default False + Whether integer overflow is allowed when casting. + allow_time_truncate : bool, default False + Whether time precision truncation is allowed when casting. + allow_time_overflow : bool, default False + Whether date/time range overflow is allowed when casting. + allow_decimal_truncate : bool, default False + Whether decimal precision truncation is allowed when casting. + allow_float_truncate : bool, default False + Whether floating-point precision truncation is allowed when casting. + allow_invalid_utf8 : bool, default False + Whether producing invalid utf8 data is allowed when casting. + """ def __init__(self, target_type=None, *, allow_int_overflow=None, allow_time_truncate=None, allow_time_overflow=None, @@ -730,12 +750,36 @@ class CastOptions(_CastOptions): return self +def _skip_nulls_doc(): + # (note the weird indent because of how the string is inserted + # by callers) + return """skip_nulls : bool, default True + Whether to skip (ignore) nulls in the input. + If False, any null in the input forces the output to null. +""" + + +def _min_count_doc(*, default): + return f"""min_count : int, default {default} + Minimum number of non-null values in the input. If the number + of non-null values is below `min_count`, the output is null. +""" + + cdef class _ElementWiseAggregateOptions(FunctionOptions): def _set_options(self, skip_nulls): self.wrapped.reset(new CElementWiseAggregateOptions(skip_nulls)) class ElementWiseAggregateOptions(_ElementWiseAggregateOptions): + __doc__ = f""" + Options for element-wise aggregate functions. + + Parameters + ---------- + {_skip_nulls_doc()} + """ + def __init__(self, *, skip_nulls=True): self._set_options(skip_nulls) @@ -772,11 +816,25 @@ cdef class _RoundOptions(FunctionOptions): class RoundOptions(_RoundOptions): + """ + Options for rounding numbers. + + Parameters + ---------- + ndigits : int, default 0 + Number of fractional digits to round to. + round_mode : str, default "half_to_even" + Rounding and tie-breaking mode. + Accepted values are "down", "up", "towards_zero", "towards_infinity", + "half_down", "half_up", "half_towards_zero", "half_towards_infinity", + "half_to_even", "half_to_odd". + """ + def __init__(self, ndigits=0, round_mode="half_to_even"): self._set_options(ndigits, round_mode) -cdef CCalendarUnit unwrap_round_unit(unit) except *: +cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: if unit == "nanosecond": return CCalendarUnit_NANOSECOND elif unit == "microsecond": @@ -803,18 +861,29 @@ cdef CCalendarUnit unwrap_round_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): - - def _set_options( - self, multiple, unit): + def _set_options(self, multiple, unit): self.wrapped.reset( new CRoundTemporalOptions( - multiple, unwrap_round_unit(unit)) + multiple, unwrap_round_temporal_unit(unit)) ) class RoundTemporalOptions(_RoundTemporalOptions): - def __init__( - self, multiple=1, unit="second", *): + """ + Options for rounding temporal values. + + Parameters + ---------- + multiple : int, default 1 + Number of units to round to. + unit : str, default "second" + The unit in which `multiple` is expressed. + Accepted values are "year", "quarter", "month", "week", "day", + "hour", "minute", "second", "millisecond", "microsecond", + "nanosecond". + """ + + def __init__(self, multiple=1, unit="second"): self._set_options(multiple, unit) @@ -827,6 +896,21 @@ cdef class _RoundToMultipleOptions(FunctionOptions): class RoundToMultipleOptions(_RoundToMultipleOptions): + """ + Options for rounding numbers to a multiple. + + Parameters + ---------- + multiple : numeric scalar, default 1.0 + Multiple to round to. Should be a scalar of a type compatible + with the argument to be rounded. + round_mode : str, default "half_to_even" + Rounding and tie-breaking mode. + Accepted values are "down", "up", "towards_zero", "towards_infinity", + "half_down", "half_up", "half_towards_zero", "half_towards_infinity", + "half_to_even", "half_to_odd". + """ + def __init__(self, multiple=1.0, round_mode="half_to_even"): self._set_options(multiple, round_mode) @@ -849,6 +933,19 @@ cdef class _JoinOptions(FunctionOptions): class JoinOptions(_JoinOptions): + """ + Options for the `binary_join_element_wise` function. + + Parameters + ---------- + null_handling : str, default "emit_null" + How to handle null values in the inputs. + Accepted values are "emit_null", "skip", "replace". + null_replacement : str, default "" + Replacement string to emit for null inputs if `null_handling` + is "replace". + """ + def __init__(self, null_handling="emit_null", null_replacement=""): self._set_options(null_handling, null_replacement) @@ -861,6 +958,17 @@ cdef class _MatchSubstringOptions(FunctionOptions): class MatchSubstringOptions(_MatchSubstringOptions): + """ + Options for looking for a substring. + + Parameters + ---------- + pattern : str + Substring pattern to look for inside input values. + ignore_case : bool, default False + Whether to perform a case-insensitive match. + """ + def __init__(self, pattern, *, ignore_case=False): self._set_options(pattern, ignore_case) @@ -871,6 +979,17 @@ cdef class _PadOptions(FunctionOptions): class PadOptions(_PadOptions): + """ + Options for padding strings. + + Parameters + ---------- + width : int + Desired string length. + padding : str, default " " + What to pad the string with. Should be one byte or codepoint. + """ + def __init__(self, width, padding=' '): self._set_options(width, padding) @@ -881,20 +1000,17 @@ cdef class _TrimOptions(FunctionOptions): class TrimOptions(_TrimOptions): - def __init__(self, characters): - self._set_options(tobytes(characters)) - - -cdef class _ReplaceSliceOptions(FunctionOptions): - def _set_options(self, start, stop, replacement): - self.wrapped.reset( - new CReplaceSliceOptions(start, stop, tobytes(replacement)) - ) + """ + Options for trimming characters from strings. + Parameters + ---------- + characters : str + Individual characters to be trimmed from the string. + """ -class ReplaceSliceOptions(_ReplaceSliceOptions): - def __init__(self, start, stop, replacement): - self._set_options(start, stop, replacement) + def __init__(self, characters): + self._set_options(tobytes(characters)) cdef class _ReplaceSubstringOptions(FunctionOptions): @@ -907,7 +1023,23 @@ cdef class _ReplaceSubstringOptions(FunctionOptions): class ReplaceSubstringOptions(_ReplaceSubstringOptions): - def __init__(self, pattern, replacement, *, max_replacements=-1): + """ + Options for replacing matched substrings. + + Parameters + ---------- + pattern : str + Substring pattern to look for inside input values. + replacement : str + What to replace the pattern with. + max_replacements : int or None, default None + The maximum number of strings to replace in each + input value (unlimited if None). + """ + + def __init__(self, pattern, replacement, *, max_replacements=None): + if max_replacements is None: + max_replacements = -1 self._set_options(pattern, replacement, max_replacements) @@ -917,6 +1049,15 @@ cdef class _ExtractRegexOptions(FunctionOptions): class ExtractRegexOptions(_ExtractRegexOptions): + """ + Options for the `extract_regex` function. + + Parameters + ---------- + pattern : str + Regular expression with named capture fields. + """ + def __init__(self, pattern): self._set_options(pattern) @@ -927,10 +1068,51 @@ cdef class _SliceOptions(FunctionOptions): class SliceOptions(_SliceOptions): - def __init__(self, start, stop=sys.maxsize, step=1): + """ + Options for slicing. + + Parameters + ---------- + start : int + Index to start slicing at (inclusive). + stop : int or None, default None + If given, index to stop slicing at (exclusive). + If not given, slicing will stop at the end. + step : int, default 1 + Slice step. + """ + + def __init__(self, start, stop=None, step=1): + if stop is None: + stop = sys.maxsize self._set_options(start, stop, step) +cdef class _ReplaceSliceOptions(FunctionOptions): + def _set_options(self, start, stop, replacement): + self.wrapped.reset( + new CReplaceSliceOptions(start, stop, tobytes(replacement)) + ) + + +class ReplaceSliceOptions(_ReplaceSliceOptions): + """ + Options for replacing slices. + + Parameters + ---------- + start : int + Index to start slicing at (inclusive). + stop : int + Index to stop slicing at (exclusive). + replacement : str + What to replace the slice with. + """ + + def __init__(self, start, stop, replacement): + self._set_options(start, stop, replacement) + + cdef class _FilterOptions(FunctionOptions): _null_selection_map = { "drop": CFilterNullSelectionBehavior_DROP, @@ -950,6 +1132,16 @@ cdef class _FilterOptions(FunctionOptions): class FilterOptions(_FilterOptions): + """ + Options for selecting with a boolean filter. + + Parameters + ---------- + null_selection_behavior : str, default "drop" + How to handle nulls in the selection filter. + Accepted values are "drop", "emit_null". + """ + def __init__(self, null_selection_behavior="drop"): self._set_options(null_selection_behavior) @@ -972,6 +1164,18 @@ cdef class _DictionaryEncodeOptions(FunctionOptions): class DictionaryEncodeOptions(_DictionaryEncodeOptions): + """ + Options for dictionary encoding. + + Parameters + ---------- + null_encoding : str, default "mask" + How to encode nulls in the input. + Accepted values are "mask" (null inputs emit a null in the indices + array), "encode" (null inputs emit a non-null index pointing to + a null value in the dictionary array). + """ + def __init__(self, null_encoding="mask"): self._set_options(null_encoding) @@ -982,6 +1186,17 @@ cdef class _TakeOptions(FunctionOptions): class TakeOptions(_TakeOptions): + """ + Options for the `take` and `array_take` functions. + + Parameters + ---------- + boundscheck : boolean, default True + Whether to check indices are within bounds. If False and an + index is out of boundes, behavior is undefined (the process + may crash). + """ + def __init__(self, *, boundscheck=True): self._set_options(boundscheck) @@ -1002,6 +1217,20 @@ cdef class _MakeStructOptions(FunctionOptions): class MakeStructOptions(_MakeStructOptions): + """ + Options for the `make_struct` function. + + Parameters + ---------- + field_names : sequence of str + Names of the struct fields to create. + field_nullability : sequence of bool, optional + Nullability information for each struct field. + If omitted, all fields are nullable. + field_metadata : sequence of KeyValueMetadata, optional + Metadata for each struct field. + """ + def __init__(self, field_names=(), *, field_nullability=None, field_metadata=None): if field_nullability is None: @@ -1017,6 +1246,16 @@ cdef class _StructFieldOptions(FunctionOptions): class StructFieldOptions(_StructFieldOptions): + """ + Options for the `struct_field` function. + + Parameters + ---------- + indices : sequence of int + List of indices for chained field lookup, for example `[4, 1]` + will look up the second nested field in the fifth outer field. + """ + def __init__(self, indices): self._set_options(indices) @@ -1027,6 +1266,15 @@ cdef class _ScalarAggregateOptions(FunctionOptions): class ScalarAggregateOptions(_ScalarAggregateOptions): + __doc__ = f""" + Options for scalar aggregations. + + Parameters + ---------- + {_skip_nulls_doc()} + {_min_count_doc(default=1)} + """ + def __init__(self, *, skip_nulls=True, min_count=1): self._set_options(skip_nulls, min_count) @@ -1046,6 +1294,16 @@ cdef class _CountOptions(FunctionOptions): class CountOptions(_CountOptions): + """ + Options for the `count` function. + + Parameters + ---------- + mode : str, default "only_valid" + Which values to count in the input. + Accepted values are "only_valid", "only_null", "all". + """ + def __init__(self, mode="only_valid"): self._set_options(mode) @@ -1057,7 +1315,7 @@ cdef class _IndexOptions(FunctionOptions): class IndexOptions(_IndexOptions): """ - Options for the index kernel. + Options for the `index` function. Parameters ---------- @@ -1075,6 +1333,17 @@ cdef class _ModeOptions(FunctionOptions): class ModeOptions(_ModeOptions): + __doc__ = f""" + Options for the `mode` function. + + Parameters + ---------- + n : int, default 1 + Number of distinct most-common values to return. + {_skip_nulls_doc()} + {_min_count_doc(default=0)} + """ + def __init__(self, n=1, *, skip_nulls=True, min_count=0): self._set_options(n, skip_nulls, min_count) @@ -1083,13 +1352,13 @@ cdef class _SetLookupOptions(FunctionOptions): def _set_options(self, value_set, c_bool skip_nulls): cdef unique_ptr[CDatum] valset if isinstance(value_set, Array): - valset.reset(new CDatum(( value_set).sp_array)) + valset.reset(new CDatum((< Array > value_set).sp_array)) elif isinstance(value_set, ChunkedArray): valset.reset( - new CDatum(( value_set).sp_chunked_array) + new CDatum((< ChunkedArray > value_set).sp_chunked_array) ) elif isinstance(value_set, Scalar): - valset.reset(new CDatum(( value_set).unwrap())) + valset.reset(new CDatum((< Scalar > value_set).unwrap())) else: _raise_invalid_function_option(value_set, "value set", exception_class=TypeError) @@ -1098,6 +1367,19 @@ cdef class _SetLookupOptions(FunctionOptions): class SetLookupOptions(_SetLookupOptions): + """ + Options for the `is_in` and `index_in` functions. + + Parameters + ---------- + value_set : Array + Set of values to look for in the input. + skip_nulls : bool, default False + If False, nulls in the input are matched in the value_set just + like regular values. + If True, nulls in the input always fail matching. + """ + def __init__(self, value_set, *, skip_nulls=False): self._set_options(value_set, skip_nulls) @@ -1120,6 +1402,18 @@ cdef class _StrptimeOptions(FunctionOptions): class StrptimeOptions(_StrptimeOptions): + """ + Options for the `strptime` function. + + Parameters + ---------- + format : str + Pattern for parsing input strings as timestamps, such as "%Y/%m/%d". + unit : str + Timestamp unit of the output. + Accepted values are "s", "ms", "us", "ns". + """ + def __init__(self, format, unit): self._set_options(format, unit) @@ -1132,6 +1426,17 @@ cdef class _StrftimeOptions(FunctionOptions): class StrftimeOptions(_StrftimeOptions): + """ + Options for the `strftime` function. + + Parameters + ---------- + format : str, default "%Y-%m-%dT%H:%M:%S" + Pattern for formatting input values. + locale : str, default "C" + Locale to use for locale-specific format specifiers. + """ + def __init__(self, format="%Y-%m-%dT%H:%M:%S", locale="C"): self._set_options(format, locale) @@ -1144,6 +1449,18 @@ cdef class _DayOfWeekOptions(FunctionOptions): class DayOfWeekOptions(_DayOfWeekOptions): + """ + Options for the `day_of_week` function. + + Parameters + ---------- + count_from_zero : bool, default True + If True, number days from 0, otherwise from 1. + week_start : int, default 1 + Which day does the week start with (Monday=1, Sunday=7). + How this value is numbered is unaffected by `count_from_zero`. + """ + def __init__(self, *, count_from_zero=True, week_start=1): self._set_options(count_from_zero, week_start) @@ -1158,6 +1475,24 @@ cdef class _WeekOptions(FunctionOptions): class WeekOptions(_WeekOptions): + """ + Options for the `week` function. + + Parameters + ---------- + week_starts_monday : bool, default True + If True, weeks start on Monday; if False, on Sunday. + count_from_zero : bool, default False + If True, dates at the start of a year that fall into the last week + of the previous year emit 0. + If False, they emit 52 or 53 (the week number of the last week + of the previous year). + first_week_is_fully_in_year : bool, default False + If True, week number 0 is fully in January. + If False, a week that begins on December 29, 30 or 31 is considered + to be week number 0 of the following year. + """ + def __init__(self, *, week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False): self._set_options(week_starts_monday, @@ -1191,6 +1526,21 @@ cdef class _AssumeTimezoneOptions(FunctionOptions): class AssumeTimezoneOptions(_AssumeTimezoneOptions): + """ + Options for the `assume_timezone` function. + + Parameters + ---------- + timezone : str + Timezone to assume for the input. + ambiguous : str, default "raise" + How to handle timestamps that are ambiguous in the assumed timezone. + Accepted values are "raise", "earliest", "latest". + nonexistent : str, default "raise" + How to handle timestamps that don't exist in the assumed timezone. + Accepted values are "raise", "earliest", "latest". + """ + def __init__(self, timezone, *, ambiguous="raise", nonexistent="raise"): self._set_options(timezone, ambiguous, nonexistent) @@ -1201,6 +1551,15 @@ cdef class _NullOptions(FunctionOptions): class NullOptions(_NullOptions): + """ + Options for the `is_null` function. + + Parameters + ---------- + nan_is_null : bool, default False + Whether floating-point NaN values are considered null. + """ + def __init__(self, *, nan_is_null=False): self._set_options(nan_is_null) @@ -1211,6 +1570,17 @@ cdef class _VarianceOptions(FunctionOptions): class VarianceOptions(_VarianceOptions): + __doc__ = f""" + Options for the `variance` and `stddev` functions. + + Parameters + ---------- + ddof : int, default 0 + Number of degrees of freedom. + {_skip_nulls_doc()} + {_min_count_doc(default=0)} + """ + def __init__(self, *, ddof=0, skip_nulls=True, min_count=0): self._set_options(ddof, skip_nulls, min_count) @@ -1221,7 +1591,21 @@ cdef class _SplitOptions(FunctionOptions): class SplitOptions(_SplitOptions): - def __init__(self, *, max_splits=-1, reverse=False): + """ + Options for splitting on whitespace. + + Parameters + ---------- + max_splits : int or None, default None + Maximum number of splits for each input value (unlimited if None). + reverse : bool, default False + Whether to start splitting from the end of each input value. + This only has an effect if `max_splits` is not None. + """ + + def __init__(self, *, max_splits=None, reverse=False): + if max_splits is None: + max_splits = -1 self._set_options(max_splits, reverse) @@ -1233,7 +1617,23 @@ cdef class _SplitPatternOptions(FunctionOptions): class SplitPatternOptions(_SplitPatternOptions): - def __init__(self, pattern, *, max_splits=-1, reverse=False): + """ + Options for splitting on a string pattern. + + Parameters + ---------- + pattern : str + String pattern to split on. + max_splits : int or None, default None + Maximum number of splits for each input value (unlimited if None). + reverse : bool, default False + Whether to start splitting from the end of each input value. + This only has an effect if `max_splits` is not None. + """ + + def __init__(self, pattern, *, max_splits=None, reverse=False): + if max_splits is None: + max_splits = -1 self._set_options(pattern, max_splits, reverse) @@ -1260,6 +1660,18 @@ cdef class _PartitionNthOptions(FunctionOptions): class PartitionNthOptions(_PartitionNthOptions): + """ + Options for the `partition_nth_indices` function. + + Parameters + ---------- + pivot : int + Index into the equivalent sorted array of the pivot element. + null_placement : str, default "at_end" + Where nulls in the input should be partitioned. + Accepted values are "at_start", "at_end". + """ + def __init__(self, pivot, *, null_placement="at_end"): self._set_options(pivot, null_placement) @@ -1271,6 +1683,19 @@ cdef class _ArraySortOptions(FunctionOptions): class ArraySortOptions(_ArraySortOptions): + """ + Options for the `array_sort_indices` function. + + Parameters + ---------- + order : str, default "ascending" + Which order to sort values in. + Accepted values are "ascending", "descending". + null_placement : str, default "at_end" + Where nulls in the input should be sorted. + Accepted values are "at_start", "at_end". + """ + def __init__(self, order="ascending", *, null_placement="at_end"): self._set_options(order, null_placement) @@ -1287,6 +1712,21 @@ cdef class _SortOptions(FunctionOptions): class SortOptions(_SortOptions): + """ + Options for the `sort_indices` function. + + Parameters + ---------- + sort_keys : sequence of (name, order) tuples + Names of field/column keys to sort the input on, + along with the order each field/column is sorted in. + Accepted values for `order` are "ascending", "descending". + null_placement : str, default "at_end" + Where nulls in input should be sorted, only applying to + columns/fields mentioned in `sort_keys`. + Accepted values are "at_start", "at_end". + """ + def __init__(self, sort_keys=(), *, null_placement="at_end"): self._set_options(sort_keys, null_placement) @@ -1302,6 +1742,21 @@ cdef class _SelectKOptions(FunctionOptions): class SelectKOptions(_SelectKOptions): + """ + Options for top/bottom k-selection. + + Parameters + ---------- + k : int + Number of leading values to select in sorted order + (i.e. the largest values if sort order is "descending", + the smallest otherwise). + sort_keys : sequence of (name, order) tuples + Names of field/column keys to sort the input on, + along with the order each field/column is sorted in. + Accepted values for `order` are "ascending", "descending". + """ + def __init__(self, k, sort_keys): self._set_options(k, sort_keys) @@ -1326,6 +1781,26 @@ cdef class _QuantileOptions(FunctionOptions): class QuantileOptions(_QuantileOptions): + __doc__ = f""" + Options for the `quantile` function. + + Parameters + ---------- + q : double or sequence of double, default 0.5 + Quantiles to compute. All values must be in [0, 1]. + interpolation : str, default "linear" + How to break ties between competing data points for a given quantile. + Accepted values are: + + - "linear": compute an interpolation + - "lower": always use the smallest of the two data points + - "higher": always use the largest of the two data points + - "nearest": select the data point that is closest to the quantile + - "midpoint": compute the (unweighted) mean of the two data points + {_skip_nulls_doc()} + {_min_count_doc(default=0)} + """ + def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True, min_count=0): if not isinstance(q, (list, tuple, np.ndarray)): @@ -1343,6 +1818,21 @@ cdef class _TDigestOptions(FunctionOptions): class TDigestOptions(_TDigestOptions): + __doc__ = f""" + Options for the `tdigest` function. + + Parameters + ---------- + q : double or sequence of double, default 0.5 + Quantiles to approximate. All values must be in [0, 1]. + delta : int, default 100 + Compression parameter for the T-digest algorithm. + buffer_size : int, default 500 + Buffer size for the T-digest algorithm. + {_skip_nulls_doc()} + {_min_count_doc(default=0)} + """ + def __init__(self, q=0.5, *, delta=100, buffer_size=500, skip_nulls=True, min_count=0): if not isinstance(q, (list, tuple, np.ndarray)): @@ -1369,6 +1859,16 @@ cdef class _Utf8NormalizeOptions(FunctionOptions): class Utf8NormalizeOptions(_Utf8NormalizeOptions): + """ + Options for the `utf8_normalize` function. + + Parameters + ---------- + form : str + Unicode normalization form. + Accepted values are "NFC", "NFKC", "NFD", NFKD". + """ + def __init__(self, form): self._set_options(form) @@ -1395,6 +1895,21 @@ cdef class _RandomOptions(FunctionOptions): class RandomOptions(_RandomOptions): + """ + Options for random generation. + + Parameters + ---------- + length : int + Number of random values to generate. + initializer : int or str + How to initialize the underlying random generator. + If an integer is given, it is used as a seed. + If "system" is given, the random generator is initialized with + a system-specific source of (hopefully true) randomness. + Other values are invalid. + """ + def __init__(self, length, *, initializer='system'): self._set_options(length, initializer) @@ -1413,7 +1928,7 @@ def _group_by(args, keys, aggregations): for aggr_func_name, aggr_opts in aggregations: c_aggr.function = tobytes(aggr_func_name) if aggr_opts is not None: - c_aggr.options = (aggr_opts).get_options() + c_aggr.options = ( aggr_opts).get_options() else: c_aggr.options = NULL c_aggregations.push_back(c_aggr) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index b4e7ac53ba13..79646f761f31 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -77,18 +77,30 @@ _group_by ) +from collections import namedtuple import inspect from textwrap import dedent import warnings import pyarrow as pa from pyarrow import _compute_docstrings +from pyarrow.vendored import docscrape def _get_arg_names(func): return func._doc.arg_names +_OptionsClassDoc = namedtuple('_OptionsClassDoc', ('params',)) + + +def _scrape_options_class_doc(options_class): + if not options_class.__doc__: + return None + doc = docscrape.NumpyDocString(options_class.__doc__) + return _OptionsClassDoc(doc['Parameters']) + + def _decorate_compute_function(wrapper, exposed_name, func, options_class): # Decorate the given compute function wrapper with useful metadata # and documentation. @@ -105,66 +117,70 @@ def _decorate_compute_function(wrapper, exposed_name, func, options_class): doc_pieces = [] # 1. One-line summary - cpp_doc = func._doc summary = cpp_doc.summary if not summary: arg_str = "arguments" if func.arity > 1 else "argument" summary = ("Call compute function {!r} with the given {}" .format(func.name, arg_str)) - description = cpp_doc.description - arg_names = _get_arg_names(func) - - doc_pieces.append("""\ - {}. - - """.format(summary)) + doc_pieces.append(f"{summary}.\n\n") # 2. Multi-line description + description = cpp_doc.description if description: - doc_pieces.append("{}\n\n".format(description)) + doc_pieces.append(f"{description}\n\n") doc_addition = _compute_docstrings.function_doc_additions.get(func.name) # 3. Parameter description - doc_pieces.append("""\ + doc_pieces.append(dedent("""\ Parameters ---------- - """) + """)) + # 3a. Compute function parameters + arg_names = _get_arg_names(func) for arg_name in arg_names: if func.kind in ('vector', 'scalar_aggregate'): arg_type = 'Array-like' else: arg_type = 'Array-like or scalar-like' - doc_pieces.append("""\ - {} : {} - Argument to compute function - """.format(arg_name, arg_type)) + doc_pieces.append(f"{arg_name} : {arg_type}\n") + doc_pieces.append(" Argument to compute function.\n") + # 3b. Compute function option values if options_class is not None: - options_sig = inspect.signature(options_class) - for p in options_sig.parameters.values(): - doc_pieces.append("""\ - {0} : optional - Parameter for {1} constructor. Either `options` - or `{0}` can be passed, but not both at the same time. - """.format(p.name, options_class.__name__)) - doc_pieces.append("""\ - options : pyarrow.compute.{0}, optional - Parameters altering compute function semantics. - """.format(options_class.__name__)) - - doc_pieces.append("""\ + options_class_doc = _scrape_options_class_doc(options_class) + if options_class_doc: + for p in options_class_doc.params: + doc_pieces.append(f"{p.name} : {p.type}\n") + for s in p.desc: + doc_pieces.append(f" {s}\n") + else: + warnings.warn(f"Options class {options_class.__name__} " + f"does not have a docstring", RuntimeWarning) + options_sig = inspect.signature(options_class) + for p in options_sig.parameters.values(): + doc_pieces.append(dedent("""\ + {0} : optional + Parameter for {1} constructor. Either `options` + or `{0}` can be passed, but not both at the same time. + """.format(p.name, options_class.__name__))) + doc_pieces.append(dedent(f"""\ + options : pyarrow.compute.{options_class.__name__}, optional + Alternative way of passing options. + """)) + + doc_pieces.append(dedent("""\ memory_pool : pyarrow.MemoryPool, optional If not passed, will allocate memory from the default memory pool. - """) + """)) # 4. Custom addition (e.g. examples) if doc_addition is not None: - doc_pieces.append("\n{}\n".format(doc_addition.strip("\n"))) + doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n"))) - wrapper.__doc__ = "".join(dedent(s) for s in doc_pieces) + wrapper.__doc__ = "".join(doc_pieces) return wrapper @@ -360,7 +376,8 @@ def index(data, value, start=None, end=None, *, memory_pool=None): Parameters ---------- data : Array or ChunkedArray - value : Scalar-like object + value : Scalar + The value to search for. start : int, optional end : int, optional memory_pool : MemoryPool, optional diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2d6d763466ac..23050f4e403f 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -669,6 +669,7 @@ def test_is_valid(): def test_generated_docstrings(): + # With options assert pc.min_max.__doc__ == textwrap.dedent("""\ Compute the minimum and maximum values of a numeric array. @@ -678,18 +679,19 @@ def test_generated_docstrings(): Parameters ---------- array : Array-like - Argument to compute function - skip_nulls : optional - Parameter for ScalarAggregateOptions constructor. Either `options` - or `skip_nulls` can be passed, but not both at the same time. - min_count : optional - Parameter for ScalarAggregateOptions constructor. Either `options` - or `min_count` can be passed, but not both at the same time. + Argument to compute function. + skip_nulls : bool, default True + Whether to skip (ignore) nulls in the input. + If False, any null in the input forces the output to null. + min_count : int, default 1 + Minimum number of non-null values in the input. If the number + of non-null values is below `min_count`, the output is null. options : pyarrow.compute.ScalarAggregateOptions, optional - Parameters altering compute function semantics. + Alternative way of passing options. memory_pool : pyarrow.MemoryPool, optional If not passed, will allocate memory from the default memory pool. """) + # Without options assert pc.add.__doc__ == textwrap.dedent("""\ Add the arguments element-wise. @@ -700,30 +702,126 @@ def test_generated_docstrings(): Parameters ---------- x : Array-like or scalar-like - Argument to compute function + Argument to compute function. y : Array-like or scalar-like - Argument to compute function + Argument to compute function. memory_pool : pyarrow.MemoryPool, optional If not passed, will allocate memory from the default memory pool. """) + # Varargs with options + assert pc.min_element_wise.__doc__ == textwrap.dedent("""\ + Find the element-wise minimum value. + + Nulls are ignored (by default) or propagated. + NaN is preferred over null, but not over any valid value. + + Parameters + ---------- + *args : Array-like or scalar-like + Argument to compute function. + skip_nulls : bool, default True + Whether to skip (ignore) nulls in the input. + If False, any null in the input forces the output to null. + options : pyarrow.compute.ElementWiseAggregateOptions, optional + Alternative way of passing options. + memory_pool : pyarrow.MemoryPool, optional + If not passed, will allocate memory from the default memory pool. + """) + # Nullary with options + assert pc.random.__doc__ == textwrap.dedent("""\ + Generate numbers in the range [0, 1). + + Generated values are uniformly-distributed, double-precision """ + + """in range [0, 1). + Length of generated data, algorithm and seed can be changed """ + + """via RandomOptions. + + Parameters + ---------- + length : int + Number of random values to generate. + initializer : int or str + How to initialize the underlying random generator. + If an integer is given, it is used as a seed. + If "system" is given, the random generator is initialized with + a system-specific source of (hopefully true) randomness. + Other values are invalid. + options : pyarrow.compute.RandomOptions, optional + Alternative way of passing options. + memory_pool : pyarrow.MemoryPool, optional + If not passed, will allocate memory from the default memory pool. + """) + # With custom examples + assert pc.filter.__doc__ == textwrap.dedent("""\ + Filter with a boolean selection filter. + + The output is populated with values from the input at positions + where the selection filter is non-zero. Nulls in the selection filter + are handled based on FilterOptions. + + Parameters + ---------- + input : Array-like or scalar-like + Argument to compute function. + selection_filter : Array-like or scalar-like + Argument to compute function. + null_selection_behavior : str, default "drop" + How to handle nulls in the selection filter. + Accepted values are "drop", "emit_null". + options : pyarrow.compute.FilterOptions, optional + Alternative way of passing options. + memory_pool : pyarrow.MemoryPool, optional + If not passed, will allocate memory from the default memory pool. + + Examples + -------- + >>> import pyarrow as pa + >>> arr = pa.array(["a", "b", "c", None, "e"]) + >>> mask = pa.array([True, False, None, False, True]) + >>> arr.filter(mask) + + [ + "a", + "e" + ] + >>> arr.filter(mask, null_selection_behavior='emit_null') + + [ + "a", + null, + "e" + ] + """) def test_generated_signatures(): # The self-documentation provided by signatures should show acceptable # options and their default values. + + # Without options sig = inspect.signature(pc.add) assert str(sig) == "(x, y, /, *, memory_pool=None)" + # With options sig = inspect.signature(pc.min_max) assert str(sig) == ("(array, /, *, skip_nulls=True, min_count=1, " "options=None, memory_pool=None)") + # With positional options sig = inspect.signature(pc.quantile) assert str(sig) == ("(array, /, q=0.5, *, interpolation='linear', " "skip_nulls=True, min_count=0, " "options=None, memory_pool=None)") + # Varargs with options sig = inspect.signature(pc.binary_join_element_wise) assert str(sig) == ("(*strings, null_handling='emit_null', " "null_replacement='', options=None, " "memory_pool=None)") + # Varargs without options + sig = inspect.signature(pc.choose) + assert str(sig) == "(indices, /, *values, memory_pool=None)" + # Nullary with options + sig = inspect.signature(pc.random) + assert str(sig) == ("(length, *, initializer='system', " + "options=None, memory_pool=None)") # We use isprintable to find about codepoints that Python doesn't know, but diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py new file mode 100644 index 000000000000..6c4d6e01400b --- /dev/null +++ b/python/pyarrow/vendored/docscrape.py @@ -0,0 +1,716 @@ +# Vendored from https://github.com/numpy/numpydoc/, +# changeset 4ae1e00e72e522c126403c1814f0b99dc5978622 + +# This file is licensed under the BSD License. See the LICENSE.txt file +# in the root of the `numpydoc` repository for complete details. + +"""Extract reference documentation from the NumPy source tree. + +""" +import inspect +import textwrap +import re +import pydoc +from warnings import warn +from collections import namedtuple +from collections.abc import Callable, Mapping +import copy +import sys + + +def strip_blank_lines(l): + "Remove leading and trailing blank lines from a list of lines" + while l and not l[0].strip(): + del l[0] + while l and not l[-1].strip(): + del l[-1] + return l + + +class Reader: + """A line-based string reader. + + """ + + def __init__(self, data): + """ + Parameters + ---------- + data : str + String with lines separated by '\\n'. + + """ + if isinstance(data, list): + self._str = data + else: + self._str = data.split('\n') # store string as list of lines + + self.reset() + + def __getitem__(self, n): + return self._str[n] + + def reset(self): + self._l = 0 # current line nr + + def read(self): + if not self.eof(): + out = self[self._l] + self._l += 1 + return out + else: + return '' + + def seek_next_non_empty_line(self): + for l in self[self._l:]: + if l.strip(): + break + else: + self._l += 1 + + def eof(self): + return self._l >= len(self._str) + + def read_to_condition(self, condition_func): + start = self._l + for line in self[start:]: + if condition_func(line): + return self[start:self._l] + self._l += 1 + if self.eof(): + return self[start:self._l+1] + return [] + + def read_to_next_empty_line(self): + self.seek_next_non_empty_line() + + def is_empty(line): + return not line.strip() + + return self.read_to_condition(is_empty) + + def read_to_next_unindented_line(self): + def is_unindented(line): + return (line.strip() and (len(line.lstrip()) == len(line))) + return self.read_to_condition(is_unindented) + + def peek(self, n=0): + if self._l + n < len(self._str): + return self[self._l + n] + else: + return '' + + def is_empty(self): + return not ''.join(self._str).strip() + + +class ParseError(Exception): + def __str__(self): + message = self.args[0] + if hasattr(self, 'docstring'): + message = "%s in %r" % (message, self.docstring) + return message + + +Parameter = namedtuple('Parameter', ['name', 'type', 'desc']) + + +class NumpyDocString(Mapping): + """Parses a numpydoc string to an abstract representation + + Instances define a mapping from section title to structured data. + + """ + + sections = { + 'Signature': '', + 'Summary': [''], + 'Extended Summary': [], + 'Parameters': [], + 'Returns': [], + 'Yields': [], + 'Receives': [], + 'Raises': [], + 'Warns': [], + 'Other Parameters': [], + 'Attributes': [], + 'Methods': [], + 'See Also': [], + 'Notes': [], + 'Warnings': [], + 'References': '', + 'Examples': '', + 'index': {} + } + + def __init__(self, docstring, config=None): + orig_docstring = docstring + docstring = textwrap.dedent(docstring).split('\n') + + self._doc = Reader(docstring) + self._parsed_data = copy.deepcopy(self.sections) + + try: + self._parse() + except ParseError as e: + e.docstring = orig_docstring + raise + + def __getitem__(self, key): + return self._parsed_data[key] + + def __setitem__(self, key, val): + if key not in self._parsed_data: + self._error_location("Unknown section %s" % key, error=False) + else: + self._parsed_data[key] = val + + def __iter__(self): + return iter(self._parsed_data) + + def __len__(self): + return len(self._parsed_data) + + def _is_at_section(self): + self._doc.seek_next_non_empty_line() + + if self._doc.eof(): + return False + + l1 = self._doc.peek().strip() # e.g. Parameters + + if l1.startswith('.. index::'): + return True + + l2 = self._doc.peek(1).strip() # ---------- or ========== + if len(l2) >= 3 and (set(l2) in ({'-'}, {'='})) and len(l2) != len(l1): + snip = '\n'.join(self._doc._str[:2])+'...' + self._error_location("potentially wrong underline length... \n%s \n%s in \n%s" + % (l1, l2, snip), error=False) + return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1)) + + def _strip(self, doc): + i = 0 + j = 0 + for i, line in enumerate(doc): + if line.strip(): + break + + for j, line in enumerate(doc[::-1]): + if line.strip(): + break + + return doc[i:len(doc)-j] + + def _read_to_next_section(self): + section = self._doc.read_to_next_empty_line() + + while not self._is_at_section() and not self._doc.eof(): + if not self._doc.peek(-1).strip(): # previous line was empty + section += [''] + + section += self._doc.read_to_next_empty_line() + + return section + + def _read_sections(self): + while not self._doc.eof(): + data = self._read_to_next_section() + name = data[0].strip() + + if name.startswith('..'): # index section + yield name, data[1:] + elif len(data) < 2: + yield StopIteration + else: + yield name, self._strip(data[2:]) + + def _parse_param_list(self, content, single_element_is_type=False): + content = dedent_lines(content) + r = Reader(content) + params = [] + while not r.eof(): + header = r.read().strip() + if ' :' in header: + arg_name, arg_type = header.split(' :', maxsplit=1) + arg_name, arg_type = arg_name.strip(), arg_type.strip() + else: + if single_element_is_type: + arg_name, arg_type = '', header + else: + arg_name, arg_type = header, '' + + desc = r.read_to_next_unindented_line() + desc = dedent_lines(desc) + desc = strip_blank_lines(desc) + + params.append(Parameter(arg_name, arg_type, desc)) + + return params + + # See also supports the following formats. + # + # + # SPACE* COLON SPACE+ SPACE* + # ( COMMA SPACE+ )+ (COMMA | PERIOD)? SPACE* + # ( COMMA SPACE+ )* SPACE* COLON SPACE+ SPACE* + + # is one of + # + # COLON COLON BACKTICK BACKTICK + # where + # is a legal function name, and + # is any nonempty sequence of word characters. + # Examples: func_f1 :meth:`func_h1` :obj:`~baz.obj_r` :class:`class_j` + # is a string describing the function. + + _role = r":(?P(py:)?\w+):" + _funcbacktick = r"`(?P(?:~\w+\.)?[a-zA-Z0-9_\.-]+)`" + _funcplain = r"(?P[a-zA-Z0-9_\.-]+)" + _funcname = r"(" + _role + _funcbacktick + r"|" + _funcplain + r")" + _funcnamenext = _funcname.replace('role', 'rolenext') + _funcnamenext = _funcnamenext.replace('name', 'namenext') + _description = r"(?P\s*:(\s+(?P\S+.*))?)?\s*$" + _func_rgx = re.compile(r"^\s*" + _funcname + r"\s*") + _line_rgx = re.compile( + r"^\s*" + + r"(?P" + # group for all function names + _funcname + + r"(?P([,]\s+" + _funcnamenext + r")*)" + + r")" + # end of "allfuncs" + # Some function lists have a trailing comma (or period) '\s*' + r"(?P[,\.])?" + + _description) + + # Empty elements are replaced with '..' + empty_description = '..' + + def _parse_see_also(self, content): + """ + func_name : Descriptive text + continued text + another_func_name : Descriptive text + func_name1, func_name2, :meth:`func_name`, func_name3 + + """ + + content = dedent_lines(content) + + items = [] + + def parse_item_name(text): + """Match ':role:`name`' or 'name'.""" + m = self._func_rgx.match(text) + if not m: + self._error_location(f"Error parsing See Also entry {line!r}") + role = m.group('role') + name = m.group('name') if role else m.group('name2') + return name, role, m.end() + + rest = [] + for line in content: + if not line.strip(): + continue + + line_match = self._line_rgx.match(line) + description = None + if line_match: + description = line_match.group('desc') + if line_match.group('trailing') and description: + self._error_location( + 'Unexpected comma or period after function list at index %d of ' + 'line "%s"' % (line_match.end('trailing'), line), + error=False) + if not description and line.startswith(' '): + rest.append(line.strip()) + elif line_match: + funcs = [] + text = line_match.group('allfuncs') + while True: + if not text.strip(): + break + name, role, match_end = parse_item_name(text) + funcs.append((name, role)) + text = text[match_end:].strip() + if text and text[0] == ',': + text = text[1:].strip() + rest = list(filter(None, [description])) + items.append((funcs, rest)) + else: + self._error_location(f"Error parsing See Also entry {line!r}") + return items + + def _parse_index(self, section, content): + """ + .. index: default + :refguide: something, else, and more + + """ + def strip_each_in(lst): + return [s.strip() for s in lst] + + out = {} + section = section.split('::') + if len(section) > 1: + out['default'] = strip_each_in(section[1].split(','))[0] + for line in content: + line = line.split(':') + if len(line) > 2: + out[line[1]] = strip_each_in(line[2].split(',')) + return out + + def _parse_summary(self): + """Grab signature (if given) and summary""" + if self._is_at_section(): + return + + # If several signatures present, take the last one + while True: + summary = self._doc.read_to_next_empty_line() + summary_str = " ".join([s.strip() for s in summary]).strip() + compiled = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$') + if compiled.match(summary_str): + self['Signature'] = summary_str + if not self._is_at_section(): + continue + break + + if summary is not None: + self['Summary'] = summary + + if not self._is_at_section(): + self['Extended Summary'] = self._read_to_next_section() + + def _parse(self): + self._doc.reset() + self._parse_summary() + + sections = list(self._read_sections()) + section_names = set([section for section, content in sections]) + + has_returns = 'Returns' in section_names + has_yields = 'Yields' in section_names + # We could do more tests, but we are not. Arbitrarily. + if has_returns and has_yields: + msg = 'Docstring contains both a Returns and Yields section.' + raise ValueError(msg) + if not has_yields and 'Receives' in section_names: + msg = 'Docstring contains a Receives section but not Yields.' + raise ValueError(msg) + + for (section, content) in sections: + if not section.startswith('..'): + section = (s.capitalize() for s in section.split(' ')) + section = ' '.join(section) + if self.get(section): + self._error_location("The section %s appears twice in %s" + % (section, '\n'.join(self._doc._str))) + + if section in ('Parameters', 'Other Parameters', 'Attributes', + 'Methods'): + self[section] = self._parse_param_list(content) + elif section in ('Returns', 'Yields', 'Raises', 'Warns', 'Receives'): + self[section] = self._parse_param_list( + content, single_element_is_type=True) + elif section.startswith('.. index::'): + self['index'] = self._parse_index(section, content) + elif section == 'See Also': + self['See Also'] = self._parse_see_also(content) + else: + self[section] = content + + @property + def _obj(self): + if hasattr(self, '_cls'): + return self._cls + elif hasattr(self, '_f'): + return self._f + return None + + def _error_location(self, msg, error=True): + if self._obj is not None: + # we know where the docs came from: + try: + filename = inspect.getsourcefile(self._obj) + except TypeError: + filename = None + msg += f" in the docstring of {self._obj.__name__}" + msg += f" in {filename}." if filename else "" + if error: + raise ValueError(msg) + else: + warn(msg) + + # string conversion routines + + def _str_header(self, name, symbol='-'): + return [name, len(name)*symbol] + + def _str_indent(self, doc, indent=4): + return [' '*indent + line for line in doc] + + def _str_signature(self): + if self['Signature']: + return [self['Signature'].replace('*', r'\*')] + [''] + return [''] + + def _str_summary(self): + if self['Summary']: + return self['Summary'] + [''] + return [] + + def _str_extended_summary(self): + if self['Extended Summary']: + return self['Extended Summary'] + [''] + return [] + + def _str_param_list(self, name): + out = [] + if self[name]: + out += self._str_header(name) + for param in self[name]: + parts = [] + if param.name: + parts.append(param.name) + if param.type: + parts.append(param.type) + out += [' : '.join(parts)] + if param.desc and ''.join(param.desc).strip(): + out += self._str_indent(param.desc) + out += [''] + return out + + def _str_section(self, name): + out = [] + if self[name]: + out += self._str_header(name) + out += self[name] + out += [''] + return out + + def _str_see_also(self, func_role): + if not self['See Also']: + return [] + out = [] + out += self._str_header("See Also") + out += [''] + last_had_desc = True + for funcs, desc in self['See Also']: + assert isinstance(funcs, list) + links = [] + for func, role in funcs: + if role: + link = ':%s:`%s`' % (role, func) + elif func_role: + link = ':%s:`%s`' % (func_role, func) + else: + link = "`%s`_" % func + links.append(link) + link = ', '.join(links) + out += [link] + if desc: + out += self._str_indent([' '.join(desc)]) + last_had_desc = True + else: + last_had_desc = False + out += self._str_indent([self.empty_description]) + + if last_had_desc: + out += [''] + out += [''] + return out + + def _str_index(self): + idx = self['index'] + out = [] + output_index = False + default_index = idx.get('default', '') + if default_index: + output_index = True + out += ['.. index:: %s' % default_index] + for section, references in idx.items(): + if section == 'default': + continue + output_index = True + out += [' :%s: %s' % (section, ', '.join(references))] + if output_index: + return out + return '' + + def __str__(self, func_role=''): + out = [] + out += self._str_signature() + out += self._str_summary() + out += self._str_extended_summary() + for param_list in ('Parameters', 'Returns', 'Yields', 'Receives', + 'Other Parameters', 'Raises', 'Warns'): + out += self._str_param_list(param_list) + out += self._str_section('Warnings') + out += self._str_see_also(func_role) + for s in ('Notes', 'References', 'Examples'): + out += self._str_section(s) + for param_list in ('Attributes', 'Methods'): + out += self._str_param_list(param_list) + out += self._str_index() + return '\n'.join(out) + + +def dedent_lines(lines): + """Deindent a list of lines maximally""" + return textwrap.dedent("\n".join(lines)).split("\n") + + +class FunctionDoc(NumpyDocString): + def __init__(self, func, role='func', doc=None, config=None): + self._f = func + self._role = role # e.g. "func" or "meth" + + if doc is None: + if func is None: + raise ValueError("No function or docstring given") + doc = inspect.getdoc(func) or '' + if config is None: + config = {} + NumpyDocString.__init__(self, doc, config) + + def get_func(self): + func_name = getattr(self._f, '__name__', self.__class__.__name__) + if inspect.isclass(self._f): + func = getattr(self._f, '__call__', self._f.__init__) + else: + func = self._f + return func, func_name + + def __str__(self): + out = '' + + func, func_name = self.get_func() + + roles = {'func': 'function', + 'meth': 'method'} + + if self._role: + if self._role not in roles: + print("Warning: invalid role %s" % self._role) + out += '.. %s:: %s\n \n\n' % (roles.get(self._role, ''), + func_name) + + out += super().__str__(func_role=self._role) + return out + + +class ObjDoc(NumpyDocString): + def __init__(self, obj, doc=None, config=None): + self._f = obj + if config is None: + config = {} + NumpyDocString.__init__(self, doc, config=config) + + +class ClassDoc(NumpyDocString): + + extra_public_methods = ['__call__'] + + def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc, + config=None): + if not inspect.isclass(cls) and cls is not None: + raise ValueError("Expected a class or None, but got %r" % cls) + self._cls = cls + + if 'sphinx' in sys.modules: + from sphinx.ext.autodoc import ALL + else: + ALL = object() + + if config is None: + config = {} + self.show_inherited_members = config.get( + 'show_inherited_class_members', True) + + if modulename and not modulename.endswith('.'): + modulename += '.' + self._mod = modulename + + if doc is None: + if cls is None: + raise ValueError("No class or documentation string given") + doc = pydoc.getdoc(cls) + + NumpyDocString.__init__(self, doc) + + _members = config.get('members', []) + if _members is ALL: + _members = None + _exclude = config.get('exclude-members', []) + + if config.get('show_class_members', True) and _exclude is not ALL: + def splitlines_x(s): + if not s: + return [] + else: + return s.splitlines() + for field, items in [('Methods', self.methods), + ('Attributes', self.properties)]: + if not self[field]: + doc_list = [] + for name in sorted(items): + if (name in _exclude or + (_members and name not in _members)): + continue + try: + doc_item = pydoc.getdoc(getattr(self._cls, name)) + doc_list.append( + Parameter(name, '', splitlines_x(doc_item))) + except AttributeError: + pass # method doesn't exist + self[field] = doc_list + + @property + def methods(self): + if self._cls is None: + return [] + return [name for name, func in inspect.getmembers(self._cls) + if ((not name.startswith('_') or + name in self.extra_public_methods) and + isinstance(func, Callable) and + self._is_show_member(name))] + + @property + def properties(self): + if self._cls is None: + return [] + return [name for name, func in inspect.getmembers(self._cls) + if (not name.startswith('_') and + (func is None or isinstance(func, property) or + inspect.isdatadescriptor(func)) and + self._is_show_member(name))] + + def _is_show_member(self, name): + if self.show_inherited_members: + return True # show all class members + if name not in self._cls.__dict__: + return False # class member is inherited, we do not show it + return True + + +def get_doc_object(obj, what=None, doc=None, config=None): + if what is None: + if inspect.isclass(obj): + what = 'class' + elif inspect.ismodule(obj): + what = 'module' + elif isinstance(obj, Callable): + what = 'function' + else: + what = 'object' + if config is None: + config = {} + + if what == 'class': + return ClassDoc(obj, func_doc=FunctionDoc, doc=doc, config=config) + elif what in ('function', 'method'): + return FunctionDoc(obj, doc=doc, config=config) + else: + if doc is None: + doc = pydoc.getdoc(obj) + return ObjDoc(obj, doc, config=config)