diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 37135ed3ee84..01d7f2129087 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -66,9 +66,13 @@ class PrettyPrinter { void CloseArray(const Array& array); void Flush() { (*sink_) << std::flush; } - PrettyPrintOptions ChildOptions() const { + PrettyPrintOptions ChildOptions(bool increment_indent = false) const { PrettyPrintOptions child_options = options_; - child_options.indent = indent_; + if (increment_indent) { + child_options.indent = indent_ + child_options.indent_size; + } else { + child_options.indent = indent_; + } return child_options; } @@ -134,18 +138,19 @@ class ArrayPrinter : public PrettyPrinter { private: template Status WriteValues(const Array& array, FormatFunction&& func, - bool indent_non_null_values = true) { + bool indent_non_null_values = true, bool is_container = false) { // `indent_non_null_values` should be false if `FormatFunction` applies // indentation itself. + int window = is_container ? options_.container_window : options_.window; for (int64_t i = 0; i < array.length(); ++i) { const bool is_last = (i == array.length() - 1); - if ((i >= options_.window) && (i < (array.length() - options_.window))) { + if ((i >= window) && (i < (array.length() - window))) { IndentAfterNewline(); (*sink_) << "..."; if (!is_last && options_.skip_new_lines) { (*sink_) << ","; } - i = array.length() - options_.window - 1; + i = array.length() - window - 1; } else if (array.IsNull(i)) { IndentAfterNewline(); (*sink_) << options_.null_rep; @@ -187,7 +192,7 @@ class ArrayPrinter : public PrettyPrinter { Status PrintChildren(const std::vector>& fields, int64_t offset, int64_t length) { for (size_t i = 0; i < fields.size(); ++i) { - Newline(); + Write("\n"); // Always want newline before child array description Indent(); std::stringstream ss; ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n"; @@ -197,7 +202,8 @@ class ArrayPrinter : public PrettyPrinter { if (offset != 0) { field = field->Slice(offset, length); } - RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_)); + // Indent(); + RETURN_NOT_OK(PrettyPrint(*field, ChildOptions(true), sink_)); } return Status::OK(); } @@ -256,7 +262,8 @@ class ArrayPrinter : public PrettyPrinter { return values_printer.Print( *values->Slice(array.value_offset(i), array.value_length(i))); }, - /*indent_non_null_values=*/false); + /*indent_non_null_values=*/false, + /*is_container=*/true); } Status WriteDataValues(const MapArray& array) { @@ -268,7 +275,7 @@ class ArrayPrinter : public PrettyPrinter { return WriteValues( array, [&](int64_t i) { - Indent(); + IndentAfterNewline(); (*sink_) << "keys:"; Newline(); RETURN_NOT_OK(values_printer.Print( @@ -334,7 +341,7 @@ class ArrayPrinter : public PrettyPrinter { Indent(); Write("-- type_ids: "); UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset()); - RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_)); + RETURN_NOT_OK(PrettyPrint(type_codes, ChildOptions(true), sink_)); if (array.mode() == UnionMode::DENSE) { Newline(); @@ -343,7 +350,7 @@ class ArrayPrinter : public PrettyPrinter { Int32Array value_offsets( array.length(), checked_cast(array).value_offsets(), nullptr, 0, array.offset()); - RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_)); + RETURN_NOT_OK(PrettyPrint(value_offsets, ChildOptions(true), sink_)); } // Print the children without any offset, because the type ids are absolute @@ -359,13 +366,12 @@ class ArrayPrinter : public PrettyPrinter { Newline(); Indent(); Write("-- dictionary:\n"); - RETURN_NOT_OK( - PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_)); + RETURN_NOT_OK(PrettyPrint(*array.dictionary(), ChildOptions(true), sink_)); Newline(); Indent(); Write("-- indices:\n"); - return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_); + return PrettyPrint(*array.indices(), ChildOptions(true), sink_); } Status Print(const Array& array) { @@ -384,7 +390,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) { Indent(); BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0, array.offset()); - return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_); + return PrettyPrint(is_valid, ChildOptions(true), sink_); } else { Write(" all not null"); return Status::OK(); @@ -418,13 +424,16 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op std::ostream* sink) { int num_chunks = chunked_arr.num_chunks(); int indent = options.indent; - int window = options.window; + int window = options.container_window; + // Struct fields are always on new line + bool skip_new_lines = + options.skip_new_lines && (chunked_arr.type()->id() != Type::STRUCT); for (int i = 0; i < indent; ++i) { (*sink) << " "; } (*sink) << "["; - if (!options.skip_new_lines) { + if (!skip_new_lines) { *sink << "\n"; } bool skip_comma = true; @@ -433,7 +442,7 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op skip_comma = false; } else { (*sink) << ","; - if (!options.skip_new_lines) { + if (!skip_new_lines) { *sink << "\n"; } } @@ -441,8 +450,8 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op for (int i = 0; i < indent; ++i) { (*sink) << " "; } - (*sink) << "..."; - if (!options.skip_new_lines) { + (*sink) << "...,"; + if (!skip_new_lines) { *sink << "\n"; } i = num_chunks - window - 1; diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h index 1bc086a68893..5d22fd5c51ab 100644 --- a/cpp/src/arrow/pretty_print.h +++ b/cpp/src/arrow/pretty_print.h @@ -35,16 +35,17 @@ class Table; struct PrettyPrintOptions { PrettyPrintOptions() = default; - PrettyPrintOptions(int indent_arg, // NOLINT runtime/explicit - int window_arg = 10, int indent_size_arg = 2, - std::string null_rep_arg = "null", bool skip_new_lines_arg = false, - bool truncate_metadata_arg = true) - : indent(indent_arg), - indent_size(indent_size_arg), - window(window_arg), - null_rep(std::move(null_rep_arg)), - skip_new_lines(skip_new_lines_arg), - truncate_metadata(truncate_metadata_arg) {} + PrettyPrintOptions(int indent, // NOLINT runtime/explicit + int window = 10, int indent_size = 2, std::string null_rep = "null", + bool skip_new_lines = false, bool truncate_metadata = true, + int container_window = 2) + : indent(indent), + indent_size(indent_size), + window(window), + container_window(container_window), + null_rep(std::move(null_rep)), + skip_new_lines(skip_new_lines), + truncate_metadata(truncate_metadata) {} static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); } @@ -57,6 +58,10 @@ struct PrettyPrintOptions { /// Maximum number of elements to show at the beginning and at the end. int window = 10; + /// Maximum number of elements to show at the beginning and at the end, for elements + /// that are containers (that is, list in ListArray and chunks in ChunkedArray) + int container_window = 2; + /// String to use for representing a null value, defaults to "null" std::string null_rep = "null"; diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 7b47a05630ca..bebbc6e82e39 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -642,6 +642,24 @@ TEST_F(TestPrettyPrint, StructTypeAdvanced) { CheckStream(*array, {0, 10}, ex); } +TEST_F(TestPrettyPrint, StructTypeNoNewLines) { + // Struct types will at least have new lines for arrays + auto simple_1 = field("one", int32()); + auto simple_2 = field("two", int32()); + auto simple_struct = struct_({simple_1, simple_2}); + + auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]"); + auto options = PrettyPrintOptions(); + options.skip_new_lines = true; + + static const char* ex = R"expected(-- is_valid:[true,false,true] +-- child 0 type: int32 +[11,0,null] +-- child 1 type: int32 +[22,0,33])expected"; + CheckStream(*array, options, ex); +} + TEST_F(TestPrettyPrint, BinaryType) { std::vector is_valid = {true, true, false, true, true, true}; std::vector values = {"foo", "bar", "", "baz", "", "\xff"}; @@ -715,17 +733,46 @@ TEST_F(TestPrettyPrint, ListType) { 3 ] ])expected"; + static const char* ex_4 = R"expected([ + [ + null + ], + [], + ... + [ + 4, + 6, + 7 + ], + [ + 2, + 3 + ] +])expected"; auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]"); - CheckArray(*array, {0, 10}, ex); - CheckArray(*array, {2, 10}, ex_2); - CheckStream(*array, {0, 1}, ex_3); + auto make_options = [](int indent, int window, int container_window) { + auto options = PrettyPrintOptions(indent, window); + options.container_window = container_window; + return options; + }; + CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5), + ex); + CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5), + ex_2); + CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1), + ex_3); + CheckArray(*array, {0, 10}, ex_4); list_type = large_list(int64()); array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]"); - CheckArray(*array, {0, 10}, ex); - CheckArray(*array, {2, 10}, ex_2); - CheckStream(*array, {0, 1}, ex_3); + CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5), + ex); + CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5), + ex_2); + CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1), + ex_3); + CheckArray(*array, {0, 10}, ex_4); } TEST_F(TestPrettyPrint, ListTypeNoNewlines) { @@ -736,10 +783,12 @@ TEST_F(TestPrettyPrint, ListTypeNoNewlines) { PrettyPrintOptions options{}; options.skip_new_lines = true; options.null_rep = "NA"; + options.container_window = 10; CheckArray(*empty_array, options, "[]", false); CheckArray(*array, options, "[[NA],[],NA,[4,5,6,7,8],[2,3]]", false); options.window = 2; + options.container_window = 2; CheckArray(*empty_array, options, "[]", false); CheckArray(*array, options, "[[NA],[],...,[4,5,...,7,8],[2,3]]", false); } @@ -779,6 +828,14 @@ TEST_F(TestPrettyPrint, MapType) { [] ])expected"; CheckArray(*array, {0, 10}, ex); + + PrettyPrintOptions options{}; + options.skip_new_lines = true; + + static const char* ex_flat = + R"expected([keys:["joe","mark"]values:[0,null],null,)expected" + R"expected(keys:["cap"]values:[8],keys:[]values:[]])expected"; + CheckArray(*array, options, ex_flat, false); } TEST_F(TestPrettyPrint, FixedSizeListType) { @@ -797,7 +854,7 @@ TEST_F(TestPrettyPrint, FixedSizeListType) { 3, null ], - null, + ... [ 4, 6, @@ -809,7 +866,39 @@ TEST_F(TestPrettyPrint, FixedSizeListType) { 5 ] ])expected"); - CheckStream(*array, {0, 1}, R"expected([ + + auto make_options = [](int indent, int window, int container_window) { + auto options = PrettyPrintOptions(indent, window); + options.container_window = container_window; + return options; + }; + CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/3), + R"expected([ + [ + null, + ... + 1 + ], + [ + 2, + ... + null + ], + null, + [ + 4, + ... + 7 + ], + [ + 8, + ... + 5 + ] +])expected"); + + CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/1), + R"expected([ [ null, ... diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 10ead0e6a951..2a40e658b4e6 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1007,7 +1007,7 @@ cdef class Array(_PandasConvertible): return '{0}\n{1}'.format(type_format, str(self)) def to_string(self, *, int indent=2, int top_level_indent=0, int window=10, - c_bool skip_new_lines=False): + int container_window=2, c_bool skip_new_lines=False): """ Render a "pretty-printed" string representation of the Array. @@ -1020,9 +1020,13 @@ cdef class Array(_PandasConvertible): How much to indent right the entire content of the array, by default ``0``. window : int - How many items to preview at the begin and end - of the array when the arrays is bigger than the window. - The other elements will be ellipsed. + How many primitive items to preview at the begin and end + of the array when the array is bigger than the window. + The other items will be ellipsed. + container_window : int + How many container items (such as a list in a list array) + to preview at the begin and end of the array when the array + is bigger than the window. skip_new_lines : bool If the array should be rendered as a single line of text or if each element should be on its own line. diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 91f05868f2ff..a42623c4a2c2 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -492,6 +492,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: int indent int indent_size int window + int container_window c_string null_rep c_bool skip_new_lines c_bool truncate_metadata diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 37ea5ace22d1..de2bba790af6 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -63,7 +63,7 @@ cdef class ChunkedArray(_PandasConvertible): type_format = object.__repr__(self) return '{0}\n{1}'.format(type_format, str(self)) - def to_string(self, *, int indent=0, int window=10, + def to_string(self, *, int indent=0, int window=5, int container_window=2, c_bool skip_new_lines=False): """ Render a "pretty-printed" string representation of the ChunkedArray @@ -74,9 +74,14 @@ cdef class ChunkedArray(_PandasConvertible): How much to indent right the content of the array, by default ``0``. window : int - How many items to preview at the begin and end - of the array when the arrays is bigger than the window. + How many items to preview within each chunk at the begin and end + of the chunk when the chunk is bigger than the window. The other elements will be ellipsed. + container_window : int + How many chunks to preview at the begin and end + of the array when the array is bigger than the window. + The other elements will be ellipsed. + This setting also applies to list columns. skip_new_lines : bool If the array should be rendered as a single line of text or if each element should be on its own line. @@ -88,6 +93,7 @@ cdef class ChunkedArray(_PandasConvertible): with nogil: options = PrettyPrintOptions(indent, window) options.skip_new_lines = skip_new_lines + options.container_window = container_window check_status( PrettyPrint( deref(self.chunked_array), diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 8caff25bf524..11ecb101777f 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -1742,8 +1742,8 @@ def test_table_repr_to_string_ellipsis(): c0: int16 c1: int32 ---- -c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]] -c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,40,10,20,30,40]]""" +c0: [[1,2,3,4,1,...,4,1,2,3,4]] +c1: [[10,20,30,40,10,...,40,10,20,30,40]]""" def test_table_function_unicode_schema():