From 05bf8579f20f589f3d516af3bf2cff985828c6a7 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 29 Oct 2020 12:17:28 +0100 Subject: [PATCH 01/13] store metadata for each element of a list column too, not just the list itself. ARROW-10386. --- r/R/record-batch.R | 20 ++++++++++++++------ r/R/table.R | 19 ++++++++++++++++--- r/tests/testthat/test-metadata.R | 8 ++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index ef42c8de7fbc..afff6af34ced 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -291,6 +291,20 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ... apply_arrow_r_metadata <- function(x, r_metadata) { tryCatch({ + columns_metadata <- r_metadata$columns + if (is.data.frame(x)) { + if (length(names(x)) && !is.null(columns_metadata)) { + for (name in intersect(names(columns_metadata), names(x))) { + x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]]) + } + } + } else if(is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) { + x <- map2(x, columns_metadata, function(.x, .y) { + apply_arrow_r_metadata(.x, .y) + }) + x + } + if (!is.null(r_metadata$attributes)) { attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes if (inherits(x, "POSIXlt")) { @@ -302,12 +316,6 @@ apply_arrow_r_metadata <- function(x, r_metadata) { } } - columns_metadata <- r_metadata$columns - if (length(names(x)) && !is.null(columns_metadata)) { - for (name in intersect(names(columns_metadata), names(x))) { - x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]]) - } - } }, error = function(e) { warning("Invalid metadata$r", call. = FALSE) }) diff --git a/r/R/table.R b/r/R/table.R index 1d2190589f7f..172e7bceab70 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -210,11 +210,24 @@ arrow_attributes <- function(x, only_top_level = FALSE) { if (is.data.frame(x)) { columns <- map(x, arrow_attributes) - if (length(att) || !all(map_lgl(columns, is.null))) { + out <- if (length(att) || !all(map_lgl(columns, is.null))) { list(attributes = att, columns = columns) } - } else if (length(att)) { - list(attributes = att, columns = NULL) + return(out) + } + + columns <- NULL + if (is.list(x) && !inherits(x, "POSIXlt")) { + # for list columns, we also keep attributes of each + # element in columns + columns <- map(x, arrow_attributes) + if (all(map_lgl(columns, is.null))) { + columns <- NULL + } + } + + if (length(att) || !is.null(columns)) { + list(attributes = att, columns = columns) } else { NULL } diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 53ee4279b852..f869091ddd45 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -137,6 +137,7 @@ test_that("metadata keeps attribute of top level data frame", { expect_identical(as.data.frame(tab), df) }) + test_that("metadata drops readr's problems attribute", { readr_like <- tibble::tibble( dbl = 1.1, @@ -156,3 +157,10 @@ test_that("metadata drops readr's problems attribute", { tab <- Table$create(readr_like) expect_null(attr(as.data.frame(tab), "problems")) }) + +test_that("metadata of list elements (ARROW-10386)", { + df <- data.frame(x = list(structure(1, foo = "bar"), structure(2, foo = "bar"))) + tab <- Table$create(df) + expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") + expect_identical(attr(as.data.frame(tab)$x[[2]], "foo"), "bar") +}) From 0c6065a2bae8f600da4b7ca686e1db4dfa3d1191 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 29 Oct 2020 12:25:54 +0100 Subject: [PATCH 02/13] update test --- r/tests/testthat/test-metadata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index f869091ddd45..b01620cd4a5d 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -159,7 +159,7 @@ test_that("metadata drops readr's problems attribute", { }) test_that("metadata of list elements (ARROW-10386)", { - df <- data.frame(x = list(structure(1, foo = "bar"), structure(2, foo = "bar"))) + df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, foo = "bar")))) tab <- Table$create(df) expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") expect_identical(attr(as.data.frame(tab)$x[[2]], "foo"), "bar") From 57f05e2140be27bbd35c6d63ade6a245c59a0733 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Mon, 11 Jan 2021 17:10:49 -0600 Subject: [PATCH 03/13] Slight clarification on test --- r/tests/testthat/test-metadata.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index b01620cd4a5d..479cb4ea0580 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -159,8 +159,8 @@ test_that("metadata drops readr's problems attribute", { }) test_that("metadata of list elements (ARROW-10386)", { - df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, foo = "bar")))) + df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, baz = "qux")))) tab <- Table$create(df) expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") - expect_identical(attr(as.data.frame(tab)$x[[2]], "foo"), "bar") + expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux") }) From a92ed0d98310547413668c0315214b08be62601e Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 12 Jan 2021 18:07:05 -0600 Subject: [PATCH 04/13] Try some compression --- r/R/record-batch.R | 22 ++++++++++++++++++++-- r/tests/testthat/helper-data.R | 5 +++++ r/tests/testthat/test-metadata.R | 30 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index afff6af34ced..71dfafdbe9c1 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -279,11 +279,29 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ... # drop problems attributes (most likely from readr) x[["attributes"]][["problems"]] <- NULL - rawToChar(serialize(x, NULL, ascii = TRUE)) + out <- serialize(x, NULL, ascii = TRUE) + + # if the metadata is over 100 kB, compress + if (object.size(out) > 100000) { + out_comp <- serialize(memCompress(out, type = "gzip"), NULL, ascii = TRUE) + + # but ensure that the compression+serialization is effective. + if (object.size(out) > object.size(out_comp)) out <- out_comp + } + + rawToChar(out) } .unserialize_arrow_r_metadata <- function(x) { - tryCatch(unserialize(charToRaw(x)), error = function(e) { + tryCatch({ + out <- unserialize(charToRaw(x)) + + # if this is still raw, try decompressing + if (is.raw(out)) { + out <- unserialize(memDecompress(out, type = "gzip")) + } + out + }, error = function(e) { warning("Invalid metadata$r", call. = FALSE) NULL }) diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R index 26b1cf0e1084..06f0b48cb8ed 100644 --- a/r/tests/testthat/helper-data.R +++ b/r/tests/testthat/helper-data.R @@ -67,3 +67,8 @@ make_big_string <- function() { # This creates a character vector that would exceed the capacity of BinaryArray rep(purrr::map_chr(2047:2050, ~paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18) } + +make_string_of_size <- function(size = 1) { + purrr::map_chr(1000*size, ~paste(sample(letters, ., replace = TRUE), collapse = "")) +} + diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 479cb4ea0580..f9fac3b90c65 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -83,6 +83,36 @@ test_that("Garbage R metadata doesn't break things", { ) }) +test_that("Metadata serialization compression", { + # attributes that (when serialized) are just under 100kb are not compressed, + # and simply serialized + strings <- rep(make_string_of_size(1), 98) + small <- .serialize_arrow_r_metadata(strings) + expect_equal( + object.size(small), + object.size(rawToChar(serialize(strings, NULL, ascii = TRUE))) + ) + + # Large strings will be compressed + large_strings <- rep(make_string_of_size(1), 100) + large <- .serialize_arrow_r_metadata(large_strings) + expect_lt( + object.size(large), + object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE))) + ) + # and this compression ends up being smaller than even the "small" strings + expect_lt(object.size(large), object.size(small)) + + # However strings where compression + serialization is not effective are no + # worse than only serialization alone + large_few_strings <- rep(make_string_of_size(50), 2) + large_few <- .serialize_arrow_r_metadata(large_few_strings) + expect_equal( + object.size(large_few), + object.size(rawToChar(serialize(large_few_strings, NULL, ascii = TRUE))) + ) +}) + test_that("RecordBatch metadata", { rb <- RecordBatch$create(x = 1:2, y = c("a", "b")) expect_equivalent(rb$metadata, list()) From 95aaa304030d2bb5d76262cf82d7cb99a298bf25 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 12 Jan 2021 18:53:04 -0600 Subject: [PATCH 05/13] Oops, attributes must be lists. --- r/tests/testthat/test-metadata.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index f9fac3b90c65..80d76e5612c7 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -86,7 +86,7 @@ test_that("Garbage R metadata doesn't break things", { test_that("Metadata serialization compression", { # attributes that (when serialized) are just under 100kb are not compressed, # and simply serialized - strings <- rep(make_string_of_size(1), 98) + strings <- as.list(rep(make_string_of_size(1), 98)) small <- .serialize_arrow_r_metadata(strings) expect_equal( object.size(small), @@ -94,7 +94,7 @@ test_that("Metadata serialization compression", { ) # Large strings will be compressed - large_strings <- rep(make_string_of_size(1), 100) + large_strings <- as.list(rep(make_string_of_size(1), 100)) large <- .serialize_arrow_r_metadata(large_strings) expect_lt( object.size(large), @@ -105,7 +105,7 @@ test_that("Metadata serialization compression", { # However strings where compression + serialization is not effective are no # worse than only serialization alone - large_few_strings <- rep(make_string_of_size(50), 2) + large_few_strings <- as.list(rep(make_string_of_size(50), 2)) large_few <- .serialize_arrow_r_metadata(large_few_strings) expect_equal( object.size(large_few), From 6fd2d35254bb4e5bda8ea9e69701715614e6cc44 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 08:27:01 -0600 Subject: [PATCH 06/13] Add option for disabling compression --- r/R/arrow-package.R | 4 ++++ r/R/record-batch.R | 2 +- r/tests/testthat/test-metadata.R | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 8743037f5d3a..540cbcd8645a 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -147,6 +147,10 @@ print.arrow_info <- function(x, ...) { invisible(x) } +option_compress_metadata <- function() { + !is_false(getOption("arrow.compress_metadata")) +} + #' @include enums.R ArrowObject <- R6Class("ArrowObject", public = list( diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 71dfafdbe9c1..bd2dff0b76f4 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -282,7 +282,7 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ... out <- serialize(x, NULL, ascii = TRUE) # if the metadata is over 100 kB, compress - if (object.size(out) > 100000) { + if (option_compress_metadata() && object.size(out) > 100000) { out_comp <- serialize(memCompress(out, type = "gzip"), NULL, ascii = TRUE) # but ensure that the compression+serialization is effective. diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 80d76e5612c7..17c43bb28ca6 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -111,6 +111,16 @@ test_that("Metadata serialization compression", { object.size(large_few), object.size(rawToChar(serialize(large_few_strings, NULL, ascii = TRUE))) ) + + # But we can disable compression + op <- options(arrow.compress_metadata = FALSE); on.exit(options(op)) + + large_strings <- as.list(rep(make_string_of_size(1), 100)) + large <- .serialize_arrow_r_metadata(large_strings) + expect_equal( + object.size(large), + object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE))) + ) }) test_that("RecordBatch metadata", { From 5649500df3fc7fb0875c9763f91047ddb907d834 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 11:36:32 -0600 Subject: [PATCH 07/13] Updated documentation --- r/NAMESPACE | 1 + r/R/feather.R | 1 + r/R/parquet.R | 1 + r/R/record-batch.R | 3 +- r/R/schema.R | 28 ++++++++++++++ r/R/table.R | 2 +- r/man/ParquetWriterProperties.Rd | 2 + r/man/RecordBatch.Rd | 2 +- r/man/Schema.Rd | 64 ++++++++++++++++++++++---------- r/man/Table.Rd | 2 +- r/man/write_feather.Rd | 2 + r/vignettes/arrow.Rmd | 2 +- 12 files changed, 86 insertions(+), 24 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 9ce89ca1f1cd..25434ee7fc4b 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -333,6 +333,7 @@ importFrom(utils,head) importFrom(utils,install.packages) importFrom(utils,modifyList) importFrom(utils,packageVersion) +importFrom(utils,object.size) importFrom(utils,tail) importFrom(vctrs,s3_register) importFrom(vctrs,vec_cast) diff --git a/r/R/feather.R b/r/R/feather.R index 6d29b7d0b891..5aaf340c6dbf 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -44,6 +44,7 @@ #' the stream will be left open. #' @export #' @seealso [RecordBatchWriter] for lower-level access to writing Arrow IPC data. +#' @seealso [Schema] for information about schemas and metadata handling. #' @examples #' \donttest{ #' tf <- tempfile() diff --git a/r/R/parquet.R b/r/R/parquet.R index ccf87c2f5117..4fe321666afd 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -275,6 +275,7 @@ make_valid_version <- function(version, valid_versions = valid_parquet_version) #' "snappy" for the `compression` argument. #' #' @seealso [write_parquet] +#' @seealso [Schema] for information about schemas and metadata handling. #' #' @export ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index bd2dff0b76f4..6b89c01408c1 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -66,7 +66,7 @@ #' - `$schema` #' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list. #' Modify or replace by assigning in (`batch$metadata <- new_metadata`). -#' All list elements are coerced to string. +#' All list elements are coerced to string. See `schema()` for more information. #' - `$columns`: Returns a list of `Array`s #' @rdname RecordBatch #' @name RecordBatch @@ -273,6 +273,7 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ... df } +#' @importFrom utils object.size .serialize_arrow_r_metadata <- function(x) { assert_is(x, "list") diff --git a/r/R/schema.R b/r/R/schema.R index 9a0ad85acac4..57d8614baad6 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -50,6 +50,34 @@ #' - `$metadata`: returns the key-value metadata as a named list. #' Modify or replace by assigning in (`sch$metadata <- new_metadata`). #' All list elements are coerced to string. +#' +#' @section Metadata: +#' +#' Attributes from the `data.frame` are saved alongside tables so that the +#' object can be reconstructed faithfully in R (e.g. with `as.data.frame()`). +#' This metadata can be both at the top-level of the `data.frame` (e.g. +#' `attributes(df)`) or at the column (e.g. `attributes(df$col_a)`) or element +#' level (e.g. `attributes(df[1, "col_a"])`). For example, this allows for +#' storing `haven` columns in a table and being able to faithfully re-create +#' them when pulled back into R. This metadata is separate from the schema +#' (e.g. types of the columns) which is compatible with other Arrow clients. +#' The R metadata is only read by R and is ignored by other clients (e.g. +#' pyarrow which has its own custom metadata for things like Pandas metadata). +#' This metadata is stored (and can be accessed with) `table$metadata$r`. +#' +#' This metadata is saved by serializing R's attribute list structure to a +#' serialized string. Because of this, large amounts of metadata can quickly +#' increase the size of tables (and therefore the size of tables written to +#' parquet or feather files). If the (serialized) metadata exceeds 100Kbs in +#' size, it is first compressed before saving. To disable this compression +#' (e.g. for tables that are compatible with Arrow versions before 3.0.0 and +#' include large amounts of metadata) you can set the option +#' `arrow.compress_metadata` to `FALSE`. +#' +#' One exception to storing all metadata: `readr`'s `problems` attribute if it +#' exists is not saved with the metadata in order to prevent what are +#' sometimes excessively large when serialized. +#' #' @rdname Schema #' @name Schema #' @examples diff --git a/r/R/table.R b/r/R/table.R index 172e7bceab70..af79ab7809af 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -75,7 +75,7 @@ #' - `$schema` #' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list. #' Modify or replace by assigning in (`tab$metadata <- new_metadata`). -#' All list elements are coerced to string. +#' All list elements are coerced to string. See `schema()` for more information. #' - `$columns`: Returns a list of `ChunkedArray`s #' @rdname Table #' @name Table diff --git a/r/man/ParquetWriterProperties.Rd b/r/man/ParquetWriterProperties.Rd index a2fab2a96ae9..7beb8a82a461 100644 --- a/r/man/ParquetWriterProperties.Rd +++ b/r/man/ParquetWriterProperties.Rd @@ -44,4 +44,6 @@ size of data pages within a column chunk (in bytes). Default 1 MiB. \seealso{ \link{write_parquet} + +\link{Schema} for information about schemas and metadata handling. } diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index c9cdb343ef88..4653c55814d2 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -68,7 +68,7 @@ There are also some active bindings \item \verb{$schema} \item \verb{$metadata}: Returns the key-value metadata of the \code{Schema} as a named list. Modify or replace by assigning in (\code{batch$metadata <- new_metadata}). -All list elements are coerced to string. +All list elements are coerced to string. See \code{schema()} for more information. \item \verb{$columns}: Returns a list of \code{Array}s } } diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd index 1c1f75e2dd26..7471757115cb 100644 --- a/r/man/Schema.Rd +++ b/r/man/Schema.Rd @@ -12,22 +12,20 @@ schema(...) \item{...}{named list of \link[=data-type]{data types}} } \description{ -A \code{Schema} is a list of \link{Field}s, which map names to -Arrow \link[=data-type]{data types}. Create a \code{Schema} when you -want to convert an R \code{data.frame} to Arrow but don't want to rely on the -default mapping of R types to Arrow types, such as when you want to choose a -specific numeric precision, or when creating a \link{Dataset} and you want to -ensure a specific schema rather than inferring it from the various files. +A \code{Schema} is a list of \link{Field}s, which map names to Arrow \link[=data-type]{data types}. Create a \code{Schema} when you want to convert an R +\code{data.frame} to Arrow but don't want to rely on the default mapping of R +types to Arrow types, such as when you want to choose a specific numeric +precision, or when creating a \link{Dataset} and you want to ensure a specific +schema rather than inferring it from the various files. -Many Arrow objects, including \link{Table} and \link{Dataset}, have a \verb{$schema} method -(active binding) that lets you access their schema. +Many Arrow objects, including \link{Table} and \link{Dataset}, have a \verb{$schema} +method (active binding) that lets you access their schema. } \section{Methods}{ \itemize{ -\item \verb{$ToString()}: convert to a string -\item \verb{$field(i)}: returns the field at index \code{i} (0-based) -\item \verb{$GetFieldByName(x)}: returns the field with name \code{x} +\item \verb{$ToString()}: convert to a string - \verb{$field(i)}: returns the field at +index \code{i} (0-based) - \verb{$GetFieldByName(x)}: returns the field with name \code{x} \item \verb{$WithMetadata(metadata)}: returns a new \code{Schema} with the key-value \code{metadata} set. Note that all list elements in \code{metadata} will be coerced to \code{character}. @@ -37,17 +35,45 @@ to \code{character}. \section{Active bindings}{ \itemize{ -\item \verb{$names}: returns the field names (called in \code{names(Schema)}) -\item \verb{$num_fields}: returns the number of fields (called in \code{length(Schema)}) -\item \verb{$fields}: returns the list of \code{Field}s in the \code{Schema}, suitable for -iterating over -\item \verb{$HasMetadata}: logical: does this \code{Schema} have extra metadata? -\item \verb{$metadata}: returns the key-value metadata as a named list. -Modify or replace by assigning in (\code{sch$metadata <- new_metadata}). -All list elements are coerced to string. +\item \verb{$names}: returns the field names (called in \code{names(Schema)}) - +\verb{$num_fields}: returns the number of fields (called in \code{length(Schema)}) - +\verb{$fields}: returns the list of \code{Field}s in the \code{Schema}, suitable for +iterating over - \verb{$HasMetadata}: logical: does this \code{Schema} have extra +metadata? - \verb{$metadata}: returns the key-value metadata as a named list. +Modify or replace by assigning in (\code{sch$metadata <- new_metadata}). All +list elements are coerced to string. } } +\section{Metadata}{ + + +Attributes from the \code{data.frame} are saved alongside tables so that the +object can be reconstructed faithfully in R (e.g. with \code{as.data.frame()}). +This metadata can be both at the top-level of the \code{data.frame} (e.g. +\code{attributes(df)}) or at the column (e.g. \code{attributes(df$col_a)}) or element +level (e.g. \code{attributes(df[1, "col_a"])}). For example, this allows for +storing \code{haven} columns in a table and being able to faithfully re-create +them when pulled back into R. This metadata is separate from the schema +(e.g. types of the columns) which is compatible with other Arrow clients. +The R metadata is only read by R and is ignored by other clients (e.g. +pyarrow which has its own custom metadata for things like Pandas metadata). +This metadata is stored (and can be accessed with) \code{table$metadata$r}. + +This metadata is saved by serializing R's attribute list structure to a +serialized string. Because of this, large amounts of metadata can quickly +increase the size of tables (and therefore the size of tables written to +parquet or feather files). If the (serialized) metadata exceeds 100Kbs in +size, it is first compressed before saving. To disable this compression +(e.g. for tables that are compatible with Arrow versions before 3.0.0 and +include large amounts of metadata) you can set the option +\code{arrow.compress_metadata} to \code{FALSE}. + +One exception to storing all metadata: \code{readr}'s \code{problems} attribute if it +exists is not saved with the metadata in order to prevent what are +sometimes excessively large when serialized. +} + \examples{ \donttest{ df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5)) diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 18c7da123934..46e9afeaf535 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -68,7 +68,7 @@ There are also some active bindings: \item \verb{$schema} \item \verb{$metadata}: Returns the key-value metadata of the \code{Schema} as a named list. Modify or replace by assigning in (\code{tab$metadata <- new_metadata}). -All list elements are coerced to string. +All list elements are coerced to string. See \code{schema()} for more information. \item \verb{$columns}: Returns a list of \code{ChunkedArray}s } } diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 277c81974755..691adbeef051 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -56,4 +56,6 @@ write_feather(mtcars, tf) } \seealso{ \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data. + +\link{Schema} for information about schemas and metadata handling. } diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd index 9ea977b7e559..a1604cb2358b 100644 --- a/r/vignettes/arrow.Rmd +++ b/r/vignettes/arrow.Rmd @@ -154,7 +154,7 @@ Arrow supports custom key-value metadata attached to Schemas. When we convert a This metadata is preserved when writing the table to Feather or Parquet, and when reading those files into R, or when calling `as.data.frame()` on a Table/RecordBatch, the column attributes are restored to the columns of the resulting `data.frame`. This means that custom data types, including `haven::labelled`, `vctrs` annotations, and others, are preserved when doing a round-trip through Arrow. -Note that the `attributes()` stored in `$metadata$r` are only understood by R. If you write a `data.frame` with `haven` columns to a Feather file and read that in Pandas, the `haven` metadata won't be recognized there. (Similarly, Pandas writes its own custom metadata, which the R package does not consume.) You are free, however, to define custom metadata conventions for your application and assign any (string) values you want to other metadata keys. +Note that the `attributes()` stored in `$metadata$r` are only understood by R. If you write a `data.frame` with `haven` columns to a Feather file and read that in Pandas, the `haven` metadata won't be recognized there. (Similarly, Pandas writes its own custom metadata, which the R package does not consume.) You are free, however, to define custom metadata conventions for your application and assign any (string) values you want to other metadata keys. For more details, see the documentation for `schema()`. ## Class structure and package conventions From 92fa1f393091eb7e3cf99e7b6e8a7b3397ea7eb8 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 11:47:31 -0600 Subject: [PATCH 08/13] CI bump From 82679fa37caee0f448a895f27e00093e768023ca Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 13:27:49 -0600 Subject: [PATCH 09/13] PR comments --- r/NAMESPACE | 2 +- r/R/schema.R | 45 +++++++++++++--------------- r/man/Schema.Rd | 80 ++++++++++++++++++++++++------------------------- 3 files changed, 62 insertions(+), 65 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 25434ee7fc4b..fdc84aa5189d 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -332,8 +332,8 @@ importFrom(tidyselect,vars_select) importFrom(utils,head) importFrom(utils,install.packages) importFrom(utils,modifyList) -importFrom(utils,packageVersion) importFrom(utils,object.size) +importFrom(utils,packageVersion) importFrom(utils,tail) importFrom(vctrs,s3_register) importFrom(vctrs,vec_cast) diff --git a/r/R/schema.R b/r/R/schema.R index 57d8614baad6..46eab693bec2 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -51,32 +51,29 @@ #' Modify or replace by assigning in (`sch$metadata <- new_metadata`). #' All list elements are coerced to string. #' -#' @section Metadata: +#' @section R Metadata: #' -#' Attributes from the `data.frame` are saved alongside tables so that the -#' object can be reconstructed faithfully in R (e.g. with `as.data.frame()`). -#' This metadata can be both at the top-level of the `data.frame` (e.g. -#' `attributes(df)`) or at the column (e.g. `attributes(df$col_a)`) or element -#' level (e.g. `attributes(df[1, "col_a"])`). For example, this allows for -#' storing `haven` columns in a table and being able to faithfully re-create -#' them when pulled back into R. This metadata is separate from the schema -#' (e.g. types of the columns) which is compatible with other Arrow clients. -#' The R metadata is only read by R and is ignored by other clients (e.g. -#' pyarrow which has its own custom metadata for things like Pandas metadata). -#' This metadata is stored (and can be accessed with) `table$metadata$r`. +#' When converting a data.frame to an Arrow Table or RecordBatch, attributes +#' from the `data.frame` are saved alongside tables so that the object can be +#' reconstructed faithfully in R (e.g. with `as.data.frame()`). This metadata +#' can be both at the top-level of the `data.frame` (e.g. `attributes(df)`) or +#' at the column (e.g. `attributes(df$col_a)`) or for list columns only: +#' element level (e.g. `attributes(df[1, "col_a"])`). For example, this allows +#' for storing `haven` columns in a table and being able to faithfully +#' re-create them when pulled back into R. This metadata is separate from the +#' schema (column names and types) which is compatible with other Arrow +#' clients. The R metadata is only read by R and is ignored by other clients +#' (e.g. Pandas has its own custom metadata). This metadata is stored in +#' `$metadata$r`. #' -#' This metadata is saved by serializing R's attribute list structure to a -#' serialized string. Because of this, large amounts of metadata can quickly -#' increase the size of tables (and therefore the size of tables written to -#' parquet or feather files). If the (serialized) metadata exceeds 100Kbs in -#' size, it is first compressed before saving. To disable this compression -#' (e.g. for tables that are compatible with Arrow versions before 3.0.0 and -#' include large amounts of metadata) you can set the option -#' `arrow.compress_metadata` to `FALSE`. -#' -#' One exception to storing all metadata: `readr`'s `problems` attribute if it -#' exists is not saved with the metadata in order to prevent what are -#' sometimes excessively large when serialized. +#' Since Schema metadata keys and values must be strings, this metadata is +#' saved by serializing R's attribute list structure to a string. If the +#' serialized metadata exceeds 100Kb in size, by default it is compressed +#' starting in version 3.0.0. To disable this compression (e.g. for tables +#' that are compatible with Arrow versions before 3.0.0 and include large +#' amounts of metadata), set the option `arrow.compress_metadata` to `FALSE`. +#' Files with compressed metadata are readable by older versions of arrow, but +#' the metadata is dropped. #' #' @rdname Schema #' @name Schema diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd index 7471757115cb..c2fb2fac6813 100644 --- a/r/man/Schema.Rd +++ b/r/man/Schema.Rd @@ -12,20 +12,22 @@ schema(...) \item{...}{named list of \link[=data-type]{data types}} } \description{ -A \code{Schema} is a list of \link{Field}s, which map names to Arrow \link[=data-type]{data types}. Create a \code{Schema} when you want to convert an R -\code{data.frame} to Arrow but don't want to rely on the default mapping of R -types to Arrow types, such as when you want to choose a specific numeric -precision, or when creating a \link{Dataset} and you want to ensure a specific -schema rather than inferring it from the various files. +A \code{Schema} is a list of \link{Field}s, which map names to +Arrow \link[=data-type]{data types}. Create a \code{Schema} when you +want to convert an R \code{data.frame} to Arrow but don't want to rely on the +default mapping of R types to Arrow types, such as when you want to choose a +specific numeric precision, or when creating a \link{Dataset} and you want to +ensure a specific schema rather than inferring it from the various files. -Many Arrow objects, including \link{Table} and \link{Dataset}, have a \verb{$schema} -method (active binding) that lets you access their schema. +Many Arrow objects, including \link{Table} and \link{Dataset}, have a \verb{$schema} method +(active binding) that lets you access their schema. } \section{Methods}{ \itemize{ -\item \verb{$ToString()}: convert to a string - \verb{$field(i)}: returns the field at -index \code{i} (0-based) - \verb{$GetFieldByName(x)}: returns the field with name \code{x} +\item \verb{$ToString()}: convert to a string +\item \verb{$field(i)}: returns the field at index \code{i} (0-based) +\item \verb{$GetFieldByName(x)}: returns the field with name \code{x} \item \verb{$WithMetadata(metadata)}: returns a new \code{Schema} with the key-value \code{metadata} set. Note that all list elements in \code{metadata} will be coerced to \code{character}. @@ -35,43 +37,41 @@ to \code{character}. \section{Active bindings}{ \itemize{ -\item \verb{$names}: returns the field names (called in \code{names(Schema)}) - -\verb{$num_fields}: returns the number of fields (called in \code{length(Schema)}) - -\verb{$fields}: returns the list of \code{Field}s in the \code{Schema}, suitable for -iterating over - \verb{$HasMetadata}: logical: does this \code{Schema} have extra -metadata? - \verb{$metadata}: returns the key-value metadata as a named list. -Modify or replace by assigning in (\code{sch$metadata <- new_metadata}). All -list elements are coerced to string. +\item \verb{$names}: returns the field names (called in \code{names(Schema)}) +\item \verb{$num_fields}: returns the number of fields (called in \code{length(Schema)}) +\item \verb{$fields}: returns the list of \code{Field}s in the \code{Schema}, suitable for +iterating over +\item \verb{$HasMetadata}: logical: does this \code{Schema} have extra metadata? +\item \verb{$metadata}: returns the key-value metadata as a named list. +Modify or replace by assigning in (\code{sch$metadata <- new_metadata}). +All list elements are coerced to string. } } -\section{Metadata}{ +\section{R Metadata}{ -Attributes from the \code{data.frame} are saved alongside tables so that the -object can be reconstructed faithfully in R (e.g. with \code{as.data.frame()}). -This metadata can be both at the top-level of the \code{data.frame} (e.g. -\code{attributes(df)}) or at the column (e.g. \code{attributes(df$col_a)}) or element -level (e.g. \code{attributes(df[1, "col_a"])}). For example, this allows for -storing \code{haven} columns in a table and being able to faithfully re-create -them when pulled back into R. This metadata is separate from the schema -(e.g. types of the columns) which is compatible with other Arrow clients. -The R metadata is only read by R and is ignored by other clients (e.g. -pyarrow which has its own custom metadata for things like Pandas metadata). -This metadata is stored (and can be accessed with) \code{table$metadata$r}. +When converting a data.frame to an Arrow Table or RecordBatch, attributes +from the \code{data.frame} are saved alongside tables so that the object can be +reconstructed faithfully in R (e.g. with \code{as.data.frame()}). This metadata +can be both at the top-level of the \code{data.frame} (e.g. \code{attributes(df)}) or +at the column (e.g. \code{attributes(df$col_a)}) or for list columns only: +element level (e.g. \code{attributes(df[1, "col_a"])}). For example, this allows +for storing \code{haven} columns in a table and being able to faithfully +re-create them when pulled back into R. This metadata is separate from the +schema (column names and types) which is compatible with other Arrow +clients. The R metadata is only read by R and is ignored by other clients +(e.g. Pandas has its own custom metadata). This metadata is stored in +\verb{$metadata$r}. -This metadata is saved by serializing R's attribute list structure to a -serialized string. Because of this, large amounts of metadata can quickly -increase the size of tables (and therefore the size of tables written to -parquet or feather files). If the (serialized) metadata exceeds 100Kbs in -size, it is first compressed before saving. To disable this compression -(e.g. for tables that are compatible with Arrow versions before 3.0.0 and -include large amounts of metadata) you can set the option -\code{arrow.compress_metadata} to \code{FALSE}. - -One exception to storing all metadata: \code{readr}'s \code{problems} attribute if it -exists is not saved with the metadata in order to prevent what are -sometimes excessively large when serialized. +Since Schema metadata keys and values must be strings, this metadata is +saved by serializing R's attribute list structure to a string. If the +serialized metadata exceeds 100Kb in size, by default it is compressed +starting in version 3.0.0. To disable this compression (e.g. for tables +that are compatible with Arrow versions before 3.0.0 and include large +amounts of metadata), set the option \code{arrow.compress_metadata} to \code{FALSE}. +Files with compressed metadata are readable by older versions of arrow, but +the metadata is dropped. } \examples{ From 920cfb1306ec8be407422f12e502e90b86436ec2 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 15:01:17 -0600 Subject: [PATCH 10/13] =?UTF-8?q?=F0=9F=93=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- r/NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index 40a943c39ff1..521af3eacd4d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -37,6 +37,8 @@ * Option `arrow.skip_nul` (default `FALSE`, as in `base::scan()`) allows conversion of Arrow string (`utf8()`) type data containing embedded nul `\0` characters to R. If set to `TRUE`, nuls will be stripped and a warning is emitted if any are found. * `arrow_info()` for an overview of various run-time and build-time Arrow configurations, useful for debugging * Set environment variable `ARROW_DEFAULT_MEMORY_POOL` before loading the Arrow package to change memory allocators. Windows packages are built with `mimalloc`; most others have `jemalloc`. These are used by default if they were built, and they're generally much faster than the system malloc, but sometimes it is useful to turn them off for debugging purposes. To disable them, set `ARROW_DEFAULT_MEMORY_POOL=system`. +* List columns that have attributes on each element are now also included with the metadata that is saved when creating Arrow tables. This allows `sf` tibbles to faithfully preserved and roundtripped (ARROW-10386)[https://issues.apache.org/jira/browse/ARROW-10386]. +* R metadata that exceeds 100Kb is now compressed before being written to a table; see `schema()` for more details. ## Bug fixes From a66818d3185398aa1c9daf8e70694ec1aa11fee6 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 15:12:30 -0600 Subject: [PATCH 11/13] add extra-tests for compressed metadata --- r/extra-tests/test-read-files.R | 22 ++++++++++++++++++++++ r/extra-tests/write-files.R | 3 +++ r/tests/testthat/helper-data.R | 2 ++ 3 files changed, 27 insertions(+) diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 90efce3d7910..5aa3a7b2fd22 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -162,4 +162,26 @@ test_that("Can see the metadata (stream)", { ) }) +test_that("Can see the extra metadata (parquet)", { + pq_file <- "files/ex_data_extra_metadata.parquet" + df <- read_parquet(pq_file) + expect_s3_class(df, "tbl") + + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + class = c("tbl_df", "tbl", "data.frame"), + top_level = list( + field_one = 12, + field_two = "more stuff" + ) + ) + ) + + # column-level attributes for the large column. + expect_named(attributes(df$b), "lots") + expect_length(attributes(df$b)$lots, 100) +}) diff --git a/r/extra-tests/write-files.R b/r/extra-tests/write-files.R index e0927ead4eb4..75889b61407a 100644 --- a/r/extra-tests/write-files.R +++ b/r/extra-tests/write-files.R @@ -37,3 +37,6 @@ example_with_metadata_v1$c <- NULL write_feather(example_with_metadata_v1, "extra-tests/files/ex_data_v1.feather", version = 1) write_ipc_stream(example_with_metadata, "extra-tests/files/ex_data.stream") + +write_parquet(example_with_extra_metadata, "extra-tests/files/ex_data_extra_metadata.parquet") + diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R index 06f0b48cb8ed..ecce77336b34 100644 --- a/r/tests/testthat/helper-data.R +++ b/r/tests/testthat/helper-data.R @@ -72,3 +72,5 @@ make_string_of_size <- function(size = 1) { purrr::map_chr(1000*size, ~paste(sample(letters, ., replace = TRUE), collapse = "")) } +example_with_extra_metadata <- example_with_metadata +attributes(example_with_extra_metadata$b) <- list(lots = rep(make_string_of_size(1), 100)) From 306751f0de58cae320ec38ad50564b2181b93f20 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 15:41:30 -0600 Subject: [PATCH 12/13] expect warning for compressed metadata prior to 3.0.0 --- r/extra-tests/helpers.R | 4 ++++ r/extra-tests/test-read-files.R | 39 ++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/r/extra-tests/helpers.R b/r/extra-tests/helpers.R index 61b7da4ec250..af57d45e5d28 100644 --- a/r/extra-tests/helpers.R +++ b/r/extra-tests/helpers.R @@ -19,6 +19,10 @@ if_version <- function(version, op = `==`) { op(packageVersion("arrow"), version) } +if_version_less_than <- function(version) { + if_version(version, op = `<`) +} + skip_if_version_less_than <- function(version, msg) { if(if_version(version, `<`)) { skip(msg) diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 5aa3a7b2fd22..9de224aff436 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -165,23 +165,32 @@ test_that("Can see the metadata (stream)", { test_that("Can see the extra metadata (parquet)", { pq_file <- "files/ex_data_extra_metadata.parquet" - df <- read_parquet(pq_file) - expect_s3_class(df, "tbl") + if (if_version_less_than("3.0.0")) { + expect_warning( + df <- read_parquet(pq_file), + "Invalid metadata$r" + ) + expect_s3_class(df, "tbl") + } else { + # version 3.0.0 and greater + df <- read_parquet(pq_file) + expect_s3_class(df, "tbl") - expect_equal( - attributes(df), - list( - names = letters[1:4], - row.names = 1L, - class = c("tbl_df", "tbl", "data.frame"), - top_level = list( - field_one = 12, - field_two = "more stuff" + expect_equal( + attributes(df), + list( + names = letters[1:4], + row.names = 1L, + class = c("tbl_df", "tbl", "data.frame"), + top_level = list( + field_one = 12, + field_two = "more stuff" + ) ) ) - ) - # column-level attributes for the large column. - expect_named(attributes(df$b), "lots") - expect_length(attributes(df$b)$lots, 100) + # column-level attributes for the large column. + expect_named(attributes(df$b), "lots") + expect_length(attributes(df$b)$lots, 100) + } }) From fa0041b6c1a7a4e81bdfdabc70a5ccb562239f3e Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Wed, 13 Jan 2021 16:07:05 -0600 Subject: [PATCH 13/13] backwards compatibility + fixed = TRUE --- r/extra-tests/test-read-files.R | 3 ++- .../data-arrow-extra-meta_3.0.0.parquet | Bin 0 -> 13263 bytes r/tests/testthat/test-backwards-compatibility.R | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R index 9de224aff436..10e9f9579206 100644 --- a/r/extra-tests/test-read-files.R +++ b/r/extra-tests/test-read-files.R @@ -168,7 +168,8 @@ test_that("Can see the extra metadata (parquet)", { if (if_version_less_than("3.0.0")) { expect_warning( df <- read_parquet(pq_file), - "Invalid metadata$r" + "Invalid metadata$r", + fixed = TRUE ) expect_s3_class(df, "tbl") } else { diff --git a/r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet b/r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bf95f23cd8655e0953d999e8f7f77692bfd0dbf1 GIT binary patch literal 13263 zcmeI3O>Z056^1Fsah%(v2!c?M0WX505wb{Xa`;^!KxoNP1ZIYIBG({y1w8A?_h2QAQMCqW{IbMHO(ocFxv;|}dR zqjy`4D~<0qUVNufZ(Mn?R(odWsamb}(TA%>t?^=G`!#Pjt~RdT=+$0*irYKBFyq;m z>%7*tw(7Or-)`M#Ja@f&rFQlD&ept9->z?;ZvSd)x3_!ktIMvxhOjG*dJW;e)qH!Q z@zUAm*KTmjpEn=hedk&=PmwZ_lK$}Bb0+L;EknUyZf*Z&Yxm3NcfWXE8eCla1SRU%f7h+; zT>Z5C@#^8cajn*P?v*+*`AY2_L#vVew5WA?EgM&>QTTRoxL#(}yn5N*EMIQMx9it$ zT>s#I3HRM^GaPq+`~Qfe-EW!ayMOz3dAR$(a%lVS&urImGVd?dAo|YMCp-WA{wTFm z*2`L*UTO{dgTWwm)4ij&ULT~alUm(gH}wZ;Sf2ZRxb^jY1K*# zhSq7`^>jmCV~fPxkyVkV@{a{?X+kqYBfy5 zMJnfMwMz4ODr0nsu|rB%5GY$-FMY@`5HZkslQ77 zMOqJ4ez{BwKE}{GORH|`FvR46lh=zhT&Mmzt=DOZQ7b?8xL&&H8XShsX)heYDW>IQjd#)L*6y6MzwX+G#jUt#xXm3=)QEmFbruYJ@)BMe3jon&Z2++YQ^Py;6CY zBO0YQn?T2=yoLw6OQ&U-=Aa(F=nT@Fn|*O;!WpdYr)(}m&>8owfdyso6slrxxdAg< zrXD_RX->()R!|N!umwirl(|Fbz+TW8@-#6JD~9rCxDvNvGj&55OlAr6!*LAZ&@Q|} z>>xEUjhXDeN+s&8QV-p*F;MLdmBTB1#x={8_t6ve=CFe3Xd63yRO(7j_}}gs8tj73 zD+G2Eu&lV*Qi1N!@M(9|q_;@jl?uQ;E;^{eD@G9lSVt7ZXo*EZBurz7fX3>1CV`0~ z6vjHd47Nau0-78SCG<*ILBwHNW`NIO8u1%WgxJJ8a)v=9%vU!&Ma*I{sUX6Z2`Gkq zXbGNBD{H0!AsJCj2xPJQ3Zq~Sz95u_sRUQ6-T}+!4a?1pE=+(FK@hoOYeC$n+yR!+-n&;N&iqZ_0`$ zAxSnMkKtt^1z0c*X}A)^<`P?WbyJg9@*CJjicl0J{M%{NaGS1O0xyOkC||S{6^%;fq5F>eSEYqYKTfn4tpaT8%m%}6aYx%hb0k8 zSP>2+JtSiw?hc#{9IpEjjKn+jg*hz)3akd<1(=V4WHa1twhZDy-7fJC&43hE%*{M3 zHxRIqAdTbzGjR$@4__bw?+{r)O=vT}a#ojxWSG9>hZ|^1%A!P4UzmV_c)zL?u#}JN z)yq=!m2!z`L=eajq=X(Oz)8Fl^$TK4{wN}N2fpDBMlG_q zgEUBP1gl72#16@@+mfzu3E|Ly^p0XAD>#n?Y_z%!y#og-ZLtntGp&W%WCqwmxm@}Y zaA9$SCR&UW13+M=?r65EbRI7k*Yt^a8mQNV!k^4xMi@ByquTA`Z6( zos2JB8BC)YAp6ZRKXN`LPXy+c2Dl1PD8L;yMRS6#8;R5mj4cQZvYC^EMM#$a2qch+ zGJF?M<9+lT5?bapIs@(*slDDbsYG zG$d#33dM^)$L5d&Y0H+wGYBU~nlz5ylq7zs{`^MQ$I+5z!1D#wjaHdjOe0N~8|Qs0 zFP#_-1Lp-^A3NJp$Jw`P$Z5GLvNW8CUA8?`(g67UggLy@o1qDfbUZMjouHo5DfuKJ zx>kB8PRceOboiJMPYFD2MYG`{eQA@)$ptyKSkczHdp!5xj6fx&K-1DNlL_|5v>8Z? z33PFsO`H%N@;cf-tfm0~E)N~_%+!6_353U58hdPEIWPhsoeUasRIKehH+uKo$^9Qz z&op=b-o5G1&yVkoMx(p@`7<7kI-^_1qqjz*2YT=`lz+{!H@2} z`+l>tcXHU|eycdSH)d=+xu1_`_wSBp`*+9F>~K6exHq01-9^UnI6s<>C;KBl-yTmN zGUs4Ao*vzczsW&9p1R)mhquT1!+czvl;ixAcjo1%qw!Q-?jJJ$!8k{LesqkCL-slz z7wlWGX0p%vliRV!jCm91XMDcTKKI!-tLDx2{fy4&lp}YBJ`-dW?2{`C8TrPZ0-3WD z_CICKLv%*J*-;rXXAiaS!MMnfg}t+r2Vwi{0Q(v9e#|*x3|kBJ#jcsUyY?X%-JddV z$~AxcVso#$=V#=o+br-!HuH1#agG>HPaFf&ef2_~^1*PnUyciGoPgcAt|!WO%>({e z12*oNXX*-`V55!2Dc8u*?!X)0X?NJCF6chhKVk*8VY@b*ZseVU4|`3~+4bR5_YQrv zoxL5y<_UVv_OS^J16S;u9ihLxKxU48VS{$TtEqgzX9Z(Zv4C6JUZ|hc?41xF`e%Z_3h`mC7%C%T3NAS( zxW=4nPvV@tCd7w->NM3R<3k(dm$9KA!ASYO8^2RILcVZ3?2|9bK;9J0%_qevHlTy? zZ2aebMwWVL?+lv?_b>+;+uT#vLZ8FgoH1jdfIEe2#6`JW7up){?2A5yc?d3quhrpH zT(Cj^A}{6`cj5>>5%cC3V;&vlq5jlXb*(Vma1DH?=lPL87&bG60&Bd-~6=$RAq6Z0N+nFq}QIeWxB><#>a4{!ln zr{sYN_80O-4n@v$Ob*Hid>(ikAM6PiC*YoAhrHAWg)v~>4PEttcp%ICjm_p6b5CFc zZrW)6jyRFq;X}p)e=+7y8<;zFPx+<2$~7jMjNhPlk#5&j^zMLdCL#aH%%lja_6 zoWiHV7{_1n8NJLEGvfhl#VG2UaUj3NHgYd}po2b}AS?2E*a$x&R(#iI=#CEx<5NDO zhxdxu-)mxsy|gc|G;in!^@!ZfTznV#D13pw*yMil!91$1=Dfg~&sDw0M){3AZH!zA zMxmP=4}93u^_$o>&xBtcBTfTPY_cXs{`5Lxs=`N^6LALv{TulNUq@_Xmz=X!RXGOS zA`gf){(?I*dJ!>_PgT8kU$7E;`AwWuxx~-dVO}vF+0$GpPaW4ElQSO38`n{1f}jy^XuzhB4$i0)6yK$v$ z_FUvxdocTtSnp?ede%2Yz2{!!O5-YOVWp@1h&~ECte5s2;WIFg+^I}4Gp~>vB7WhJ zb0SyCAGjK|!<-VaPreBbux1nI@+^8ga~!rttm+Rr8u#!Uye-U0#xZ^md*GNgJ^BD^ z82T7{;uU?8^@aPvSNs?C#&`nr$cx53IZn)Sun&%T51WDu<_GHudg~*5F!Qqcriu&R z<&k@1ck~X#T<{+Zv_0^{A7?Ux2XYkuT5~7n6YzC??8SciLSC2)%zF_N`o^4PyeZq* z7I*Tdxj+n~N7R0~1<#Dfn9DqIiJT07?7Irbj2-K!7zV$q*s1C*dMTfMqn;wup2glZ z_!K#4KuuJi|ZgFPFuRJp%{0pzq~~d_R+KZI6D-9L1W@4PI%#+_9Di zSFk1Otn~!F?fs*^izRv%Vih^4(m(2h7|7Mgx#GxPaA}G!f&=PoT!L?vujP)IV+U~- zeJ46cJva8{fc}Ha+Gsu2ANn(N)IPbTKlK;>wNEmqMIXl8Gv7q-4>r+TsE>A8yOC`S zh>x{1&P?W1_Md5wF^3-NZ9c5>GV;~M+!J-iYcZDd+KMg4dvKFIiIwP`tR>n(Z*NZ= z+|i%%8Qsmp(HDgc$iY`pH_T_|u83vr0~fi=+9@_vy)Ay%E^Lo@4u0E{AUk}1IrkEW zOFk5radDX+F7qVy=CWQ~*3GZ?1aa21Ux^-y9?9N;KcAykXAt_2=+~LE>B*y~v%hDK z&suYP5PH1oc?JEPX9RjHpRL)S*-ypuQO{G+*V0Ev-_L!|BA#LFq3n_9nI^FhJ*fSZ zXY|;EeWHK$TtQ!T;d!3@Wt=_isqCTX@$Bt9>&5e0dZIZ0c?My>=$+%e>P_u8?e9Em z*%Qav5S!y17yYbu*njz~#=X&j{x6=fc@Ci`jOPGhGvloFZ0EDJc-AX^_VR)AW`FTA zFIk`BzH?X?Ytd7i!K>=?#nQf4-&A(R^J$-5Y|b-rb`K1FHi15#RoT-st!EzPc{UM` z3Qzk+#+>I_7MuL4BEHXfo3Ak9`;HM`itx2Z5npnA>dp8*V|2=wC