Skip to content
49 changes: 29 additions & 20 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,13 @@ class PrettyPrinter {
void CloseArray(const Array& array);
void Flush() { (*sink_) << std::flush; }

PrettyPrintOptions ChildOptions() const {
PrettyPrintOptions ChildOptions(bool increment_indent = false) const {
PrettyPrintOptions child_options = options_;
child_options.indent = indent_;
if (increment_indent) {
child_options.indent = indent_ + child_options.indent_size;
} else {
child_options.indent = indent_;
}
return child_options;
}

Expand Down Expand Up @@ -134,18 +138,19 @@ class ArrayPrinter : public PrettyPrinter {
private:
template <typename FormatFunction>
Status WriteValues(const Array& array, FormatFunction&& func,
bool indent_non_null_values = true) {
bool indent_non_null_values = true, bool is_container = false) {
// `indent_non_null_values` should be false if `FormatFunction` applies
// indentation itself.
int window = is_container ? options_.container_window : options_.window;
for (int64_t i = 0; i < array.length(); ++i) {
const bool is_last = (i == array.length() - 1);
if ((i >= options_.window) && (i < (array.length() - options_.window))) {
if ((i >= window) && (i < (array.length() - window))) {
IndentAfterNewline();
(*sink_) << "...";
if (!is_last && options_.skip_new_lines) {
(*sink_) << ",";
}
i = array.length() - options_.window - 1;
i = array.length() - window - 1;
} else if (array.IsNull(i)) {
IndentAfterNewline();
(*sink_) << options_.null_rep;
Expand Down Expand Up @@ -187,7 +192,7 @@ class ArrayPrinter : public PrettyPrinter {
Status PrintChildren(const std::vector<std::shared_ptr<Array>>& fields, int64_t offset,
int64_t length) {
for (size_t i = 0; i < fields.size(); ++i) {
Newline();
Write("\n"); // Always want newline before child array description
Indent();
std::stringstream ss;
ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n";
Expand All @@ -197,7 +202,8 @@ class ArrayPrinter : public PrettyPrinter {
if (offset != 0) {
field = field->Slice(offset, length);
}
RETURN_NOT_OK(PrettyPrint(*field, indent_ + options_.indent_size, sink_));
// Indent();
RETURN_NOT_OK(PrettyPrint(*field, ChildOptions(true), sink_));
}
return Status::OK();
}
Expand Down Expand Up @@ -256,7 +262,8 @@ class ArrayPrinter : public PrettyPrinter {
return values_printer.Print(
*values->Slice(array.value_offset(i), array.value_length(i)));
},
/*indent_non_null_values=*/false);
/*indent_non_null_values=*/false,
/*is_container=*/true);
}

Status WriteDataValues(const MapArray& array) {
Expand All @@ -268,7 +275,7 @@ class ArrayPrinter : public PrettyPrinter {
return WriteValues(
array,
[&](int64_t i) {
Indent();
IndentAfterNewline();
(*sink_) << "keys:";
Newline();
RETURN_NOT_OK(values_printer.Print(
Expand Down Expand Up @@ -334,7 +341,7 @@ class ArrayPrinter : public PrettyPrinter {
Indent();
Write("-- type_ids: ");
UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset());
RETURN_NOT_OK(PrettyPrint(type_codes, indent_ + options_.indent_size, sink_));
RETURN_NOT_OK(PrettyPrint(type_codes, ChildOptions(true), sink_));

if (array.mode() == UnionMode::DENSE) {
Newline();
Expand All @@ -343,7 +350,7 @@ class ArrayPrinter : public PrettyPrinter {
Int32Array value_offsets(
array.length(), checked_cast<const DenseUnionArray&>(array).value_offsets(),
nullptr, 0, array.offset());
RETURN_NOT_OK(PrettyPrint(value_offsets, indent_ + options_.indent_size, sink_));
RETURN_NOT_OK(PrettyPrint(value_offsets, ChildOptions(true), sink_));
}

// Print the children without any offset, because the type ids are absolute
Expand All @@ -359,13 +366,12 @@ class ArrayPrinter : public PrettyPrinter {
Newline();
Indent();
Write("-- dictionary:\n");
RETURN_NOT_OK(
PrettyPrint(*array.dictionary(), indent_ + options_.indent_size, sink_));
RETURN_NOT_OK(PrettyPrint(*array.dictionary(), ChildOptions(true), sink_));

Newline();
Indent();
Write("-- indices:\n");
return PrettyPrint(*array.indices(), indent_ + options_.indent_size, sink_);
return PrettyPrint(*array.indices(), ChildOptions(true), sink_);
}

Status Print(const Array& array) {
Expand All @@ -384,7 +390,7 @@ Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
Indent();
BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0,
array.offset());
return PrettyPrint(is_valid, indent_ + options_.indent_size, sink_);
return PrettyPrint(is_valid, ChildOptions(true), sink_);
} else {
Write(" all not null");
return Status::OK();
Expand Down Expand Up @@ -418,13 +424,16 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
std::ostream* sink) {
int num_chunks = chunked_arr.num_chunks();
int indent = options.indent;
int window = options.window;
int window = options.container_window;
// Struct fields are always on new line
Comment thread
wjones127 marked this conversation as resolved.
Outdated
bool skip_new_lines =
options.skip_new_lines && (chunked_arr.type()->id() != Type::STRUCT);

for (int i = 0; i < indent; ++i) {
(*sink) << " ";
}
(*sink) << "[";
if (!options.skip_new_lines) {
if (!skip_new_lines) {
*sink << "\n";
}
bool skip_comma = true;
Expand All @@ -433,16 +442,16 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
skip_comma = false;
} else {
(*sink) << ",";
if (!options.skip_new_lines) {
if (!skip_new_lines) {
*sink << "\n";
}
}
if ((i >= window) && (i < (num_chunks - window))) {
for (int i = 0; i < indent; ++i) {
(*sink) << " ";
}
(*sink) << "...";
if (!options.skip_new_lines) {
(*sink) << "...,";
if (!skip_new_lines) {
*sink << "\n";
}
i = num_chunks - window - 1;
Expand Down
25 changes: 15 additions & 10 deletions cpp/src/arrow/pretty_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,17 @@ class Table;
struct PrettyPrintOptions {
PrettyPrintOptions() = default;

PrettyPrintOptions(int indent_arg, // NOLINT runtime/explicit
int window_arg = 10, int indent_size_arg = 2,
std::string null_rep_arg = "null", bool skip_new_lines_arg = false,
bool truncate_metadata_arg = true)
: indent(indent_arg),
indent_size(indent_size_arg),
window(window_arg),
null_rep(std::move(null_rep_arg)),
skip_new_lines(skip_new_lines_arg),
truncate_metadata(truncate_metadata_arg) {}
PrettyPrintOptions(int indent, // NOLINT runtime/explicit
int window = 10, int indent_size = 2, std::string null_rep = "null",
bool skip_new_lines = false, bool truncate_metadata = true,
int container_window = 2)
: indent(indent),
indent_size(indent_size),
window(window),
container_window(container_window),
null_rep(std::move(null_rep)),
skip_new_lines(skip_new_lines),
truncate_metadata(truncate_metadata) {}

static PrettyPrintOptions Defaults() { return PrettyPrintOptions(); }

Expand All @@ -57,6 +58,10 @@ struct PrettyPrintOptions {
/// Maximum number of elements to show at the beginning and at the end.
int window = 10;

/// Maximum number of elements to show at the beginning and at the end, for elements
/// that are containers (that is, list in ListArray and chunks in ChunkedArray)
int container_window = 2;

/// String to use for representing a null value, defaults to "null"
std::string null_rep = "null";

Expand Down
105 changes: 97 additions & 8 deletions cpp/src/arrow/pretty_print_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,24 @@ TEST_F(TestPrettyPrint, StructTypeAdvanced) {
CheckStream(*array, {0, 10}, ex);
}

TEST_F(TestPrettyPrint, StructTypeNoNewLines) {
// Struct types will at least have new lines for arrays
auto simple_1 = field("one", int32());
auto simple_2 = field("two", int32());
auto simple_struct = struct_({simple_1, simple_2});

auto array = ArrayFromJSON(simple_struct, "[[11, 22], null, [null, 33]]");
auto options = PrettyPrintOptions();
options.skip_new_lines = true;

static const char* ex = R"expected(-- is_valid:[true,false,true]
-- child 0 type: int32
[11,0,null]
-- child 1 type: int32
[22,0,33])expected";
CheckStream(*array, options, ex);
}

TEST_F(TestPrettyPrint, BinaryType) {
std::vector<bool> is_valid = {true, true, false, true, true, true};
std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
Expand Down Expand Up @@ -715,17 +733,46 @@ TEST_F(TestPrettyPrint, ListType) {
3
]
])expected";
static const char* ex_4 = R"expected([
[
null
],
[],
...
[
4,
6,
7
],
[
2,
3
]
])expected";

auto array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
CheckArray(*array, {0, 10}, ex);
CheckArray(*array, {2, 10}, ex_2);
CheckStream(*array, {0, 1}, ex_3);
Comment thread
wjones127 marked this conversation as resolved.
Outdated
auto make_options = [](int indent, int window, int container_window) {
auto options = PrettyPrintOptions(indent, window);
options.container_window = container_window;
return options;
};
CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5),
ex);
CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5),
ex_2);
CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1),
ex_3);
CheckArray(*array, {0, 10}, ex_4);

list_type = large_list(int64());
array = ArrayFromJSON(list_type, "[[null], [], null, [4, 6, 7], [2, 3]]");
CheckArray(*array, {0, 10}, ex);
CheckArray(*array, {2, 10}, ex_2);
CheckStream(*array, {0, 1}, ex_3);
CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/5),
ex);
CheckStream(*array, make_options(/*indent=*/2, /*window=*/10, /*container_window=*/5),
ex_2);
CheckStream(*array, make_options(/*indent=*/0, /*window=*/10, /*container_window=*/1),
ex_3);
CheckArray(*array, {0, 10}, ex_4);
}

TEST_F(TestPrettyPrint, ListTypeNoNewlines) {
Expand All @@ -736,10 +783,12 @@ TEST_F(TestPrettyPrint, ListTypeNoNewlines) {
PrettyPrintOptions options{};
options.skip_new_lines = true;
options.null_rep = "NA";
options.container_window = 10;
CheckArray(*empty_array, options, "[]", false);
CheckArray(*array, options, "[[NA],[],NA,[4,5,6,7,8],[2,3]]", false);

options.window = 2;
options.container_window = 2;
CheckArray(*empty_array, options, "[]", false);
CheckArray(*array, options, "[[NA],[],...,[4,5,...,7,8],[2,3]]", false);
}
Expand Down Expand Up @@ -779,6 +828,14 @@ TEST_F(TestPrettyPrint, MapType) {
[]
])expected";
CheckArray(*array, {0, 10}, ex);

PrettyPrintOptions options{};
options.skip_new_lines = true;

static const char* ex_flat =
R"expected([keys:["joe","mark"]values:[0,null],null,)expected"
R"expected(keys:["cap"]values:[8],keys:[]values:[]])expected";
CheckArray(*array, options, ex_flat, false);
}

TEST_F(TestPrettyPrint, FixedSizeListType) {
Expand All @@ -797,7 +854,7 @@ TEST_F(TestPrettyPrint, FixedSizeListType) {
3,
null
],
null,
...
[
4,
6,
Expand All @@ -809,7 +866,39 @@ TEST_F(TestPrettyPrint, FixedSizeListType) {
5
]
])expected");
CheckStream(*array, {0, 1}, R"expected([

auto make_options = [](int indent, int window, int container_window) {
auto options = PrettyPrintOptions(indent, window);
options.container_window = container_window;
return options;
};
CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/3),
R"expected([
[
null,
...
1
],
[
2,
...
null
],
null,
[
4,
...
7
],
[
8,
...
5
]
])expected");

CheckStream(*array, make_options(/*indent=*/0, /*window=*/1, /*container_window=*/1),
R"expected([
[
null,
...
Expand Down
12 changes: 8 additions & 4 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,7 @@ cdef class Array(_PandasConvertible):
return '{0}\n{1}'.format(type_format, str(self))

def to_string(self, *, int indent=2, int top_level_indent=0, int window=10,
c_bool skip_new_lines=False):
int container_window=2, c_bool skip_new_lines=False):
"""
Render a "pretty-printed" string representation of the Array.

Expand All @@ -1020,9 +1020,13 @@ cdef class Array(_PandasConvertible):
How much to indent right the entire content of the array,
by default ``0``.
window : int
How many items to preview at the begin and end
of the array when the arrays is bigger than the window.
The other elements will be ellipsed.
How many primitive items to preview at the begin and end
Comment thread
wjones127 marked this conversation as resolved.
Outdated
of the array when the array is bigger than the window.
The other items will be ellipsed.
container_window : int
How many container items (such as a list in a list array)
to preview at the begin and end of the array when the array
is bigger than the window.
skip_new_lines : bool
If the array should be rendered as a single line of text
or if each element should be on its own line.
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int indent
int indent_size
int window
int container_window
c_string null_rep
c_bool skip_new_lines
c_bool truncate_metadata
Expand Down
Loading