From 1120f2a2fd9ec4c85bad03ddc856b6710f5f58ba Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 5 Nov 2020 12:18:42 -0500 Subject: [PATCH 1/7] [R] Allow rename Table columns with names() --- r/NAMESPACE | 1 + r/R/arrowExports.R | 4 ++++ r/R/table.R | 6 ++++++ r/src/arrowExports.cpp | 17 +++++++++++++++++ r/src/table.cpp | 6 ++++++ r/tests/testthat/test-Table.R | 12 ++++++++++++ 6 files changed, 46 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 0d0d0c2f9d78..25c1fcc7c4b6 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -18,6 +18,7 @@ S3method("[[",RecordBatch) S3method("[[",Schema) S3method("[[",Table) S3method("[[<-",Table) +S3method("names<-",Table) S3method(Ops,Array) S3method(Ops,ChunkedArray) S3method(Ops,Expression) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index a2c34fb96a3c..be5e78f1d25f 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1552,6 +1552,10 @@ Table__ColumnNames <- function(table){ .Call(`_arrow_Table__ColumnNames` , table) } +Table__RenameColumns <- function(table, names){ + .Call(`_arrow_Table__RenameColumns` , table, names) +} + Table__Slice1 <- function(table, offset){ .Call(`_arrow_Table__Slice1` , table, offset) } diff --git a/r/R/table.R b/r/R/table.R index 37d01dd3dcd4..87409183586a 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -96,6 +96,9 @@ Table <- R6Class("Table", inherit = ArrowObject, Table__column(self, i) }, ColumnNames = function() Table__ColumnNames(self), + RenameColumns = function(value) { + shared_ptr(Table, Table__RenameColumns(self, value)) + }, GetColumnByName = function(name) { assert_is(name, "character") assert_that(length(name) == 1) @@ -257,6 +260,9 @@ dim.Table <- function(x) c(x$num_rows, x$num_columns) #' @export names.Table <- function(x) x$ColumnNames() +#' @export +`names<-.Table` <- function(x, value) x$RenameColumns(value) + #' @export `[.Table` <- `[.RecordBatch` diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 0beda04ccae9..59b6ecdffa72 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -6085,6 +6085,22 @@ extern "C" SEXP _arrow_Table__ColumnNames(SEXP table_sexp){ } #endif +// table.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Table__RenameColumns(const std::shared_ptr& table, const std::vector& names); +extern "C" SEXP _arrow_Table__RenameColumns(SEXP table_sexp, SEXP names_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type table(table_sexp); + arrow::r::Input&>::type names(names_sexp); + return cpp11::as_sexp(Table__RenameColumns(table, names)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Table__RenameColumns(SEXP table_sexp, SEXP names_sexp){ + Rf_error("Cannot call Table__RenameColumns(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // table.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Table__Slice1(const std::shared_ptr& table, R_xlen_t offset); @@ -6762,6 +6778,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, + { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, diff --git a/r/src/table.cpp b/r/src/table.cpp index 56b10088fdfc..14e5f4e92b5d 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -74,6 +74,12 @@ std::vector Table__ColumnNames(const std::shared_ptr& return table->ColumnNames(); } +// [[arrow::export]] +std::shared_ptr Table__RenameColumns( + const std::shared_ptr& table, const std::vector& names) { + return ValueOrStop(table->RenameColumns(names)); +} + // [[arrow::export]] std::shared_ptr Table__Slice1(const std::shared_ptr& table, R_xlen_t offset) { diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index e33f1ed43a7a..2dbbab6b92cd 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -434,3 +434,15 @@ test_that("Table$SelectColumns()", { expect_error(tab$SelectColumns(2:4)) expect_error(tab$SelectColumns("")) }) + +test_that("Table name assignment", { + tab <- Table$create(x = 1:10, y = 1:10) + expect_identical(names(tab), c("x", "y")) + names(tab) <- c("a", "b") + expect_identical(names(tab), c("a", "b")) + expect_error(names(tab) <- "f") + expect_error(names(tab) <- letters) + expect_error(names(tab) <- character(0)) + expect_error(names(tab) <- NULL) + expect_error(names(tab) <- c(TRUE, FALSE)) +}) From c29bee846eb949b3c665cf3a7cc5b113df88a8f2 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Thu, 5 Nov 2020 12:20:12 -0500 Subject: [PATCH 2/7] [R] Allow rename RecordBatch columns with names() --- r/NAMESPACE | 1 + r/R/arrowExports.R | 4 ++++ r/R/record-batch.R | 8 ++++++++ r/src/arrowExports.cpp | 17 +++++++++++++++++ r/src/recordbatch.cpp | 12 ++++++++++++ r/tests/testthat/test-RecordBatch.R | 12 ++++++++++++ 6 files changed, 54 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index 25c1fcc7c4b6..316679ac8601 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -18,6 +18,7 @@ S3method("[[",RecordBatch) S3method("[[",Schema) S3method("[[",Table) S3method("[[<-",Table) +S3method("names<-",RecordBatch) S3method("names<-",Table) S3method(Ops,Array) S3method(Ops,ChunkedArray) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index be5e78f1d25f..5e519080cee4 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1316,6 +1316,10 @@ RecordBatch__schema <- function(x){ .Call(`_arrow_RecordBatch__schema` , x) } +RecordBatch__RenameColumns <- function(batch, names){ + .Call(`_arrow_RecordBatch__RenameColumns` , batch, names) +} + RecordBatch__ReplaceSchemaMetadata <- function(x, metadata){ .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata` , x, metadata) } diff --git a/r/R/record-batch.R b/r/R/record-batch.R index c050ef806b82..28de49f8f864 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -75,6 +75,11 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, column = function(i) RecordBatch__column(self, i), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), + RenameColumns = function(value) { + assert_that(identical(class(value), "character"), msg = "names must be character") + assert_that(identical(length(self$schema$names), length(value)), msg = "names has inconsistent length") + shared_ptr(RecordBatch, RecordBatch__RenameColumns(self, value)) + }, Equals = function(other, check_metadata = FALSE, ...) { inherits(other, "RecordBatch") && RecordBatch__Equals(self, other, isTRUE(check_metadata)) }, @@ -204,6 +209,9 @@ record_batch <- RecordBatch$create #' @export names.RecordBatch <- function(x) x$names() +#' @export +`names<-.RecordBatch` <- function(x, value) x$RenameColumns(value) + #' @importFrom methods as #' @export `[.RecordBatch` <- function(x, i, j, ..., drop = FALSE) { diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 59b6ecdffa72..975ff72f7f15 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -5163,6 +5163,22 @@ extern "C" SEXP _arrow_RecordBatch__schema(SEXP x_sexp){ } #endif +// recordbatch.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr RecordBatch__RenameColumns(const std::shared_ptr& batch, const std::vector& names); +extern "C" SEXP _arrow_RecordBatch__RenameColumns(SEXP batch_sexp, SEXP names_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type batch(batch_sexp); + arrow::r::Input&>::type names(names_sexp); + return cpp11::as_sexp(RecordBatch__RenameColumns(batch, names)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_RecordBatch__RenameColumns(SEXP batch_sexp, SEXP names_sexp){ + Rf_error("Cannot call RecordBatch__RenameColumns(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // recordbatch.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr RecordBatch__ReplaceSchemaMetadata(const std::shared_ptr& x, cpp11::strings metadata); @@ -6719,6 +6735,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index bae5b8e713a8..899d48c78dff 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -42,6 +42,18 @@ std::shared_ptr RecordBatch__schema( return x->schema(); } +// [[arrow::export]] +std::shared_ptr RecordBatch__RenameColumns( + const std::shared_ptr& batch, const std::vector& names) { + int n = batch->num_columns(); + std::vector> fields(n); + for (int i = 0; i < n; i++) { + fields[i] = batch->schema()->field(i)->WithName(names[i]); + } + auto schema = std::make_shared(std::move(fields)); + return arrow::RecordBatch::Make(schema, batch->num_rows(), batch->columns()); +} + // [[arrow::export]] std::shared_ptr RecordBatch__ReplaceSchemaMetadata( const std::shared_ptr& x, cpp11::strings metadata) { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 1a7f7eecc870..add97823657e 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -366,3 +366,15 @@ test_that("RecordBatch$Equals(check_metadata)", { expect_false(rb1$Equals(24)) # Not a RecordBatch }) + +test_that("RecordBatch name assignment", { + rb <- record_batch(x = 1:10, y = 1:10) + expect_identical(names(rb), c("x", "y")) + names(rb) <- c("a", "b") + expect_identical(names(rb), c("a", "b")) + expect_error(names(rb) <- "f") + expect_error(names(rb) <- letters) + expect_error(names(rb) <- character(0)) + expect_error(names(rb) <- NULL) + expect_error(names(rb) <- c(TRUE, FALSE)) +}) From 2677680e7721e914ec49d56987c66c24fc6aa98d Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 9 Nov 2020 11:25:20 -0500 Subject: [PATCH 3/7] [R] Improve error handling on rename RecordBatch columns --- r/R/record-batch.R | 3 +-- r/src/recordbatch.cpp | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 28de49f8f864..098593040f06 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -76,8 +76,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), RenameColumns = function(value) { - assert_that(identical(class(value), "character"), msg = "names must be character") - assert_that(identical(length(self$schema$names), length(value)), msg = "names has inconsistent length") + assert_is(value, "character") shared_ptr(RecordBatch, RecordBatch__RenameColumns(self, value)) }, Equals = function(other, check_metadata = FALSE, ...) { diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 899d48c78dff..72fc98c434c3 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -46,6 +46,10 @@ std::shared_ptr RecordBatch__schema( std::shared_ptr RecordBatch__RenameColumns( const std::shared_ptr& batch, const std::vector& names) { int n = batch->num_columns(); + if (names.size() != n) { + cpp11::stop("RecordBatch has %d columns but %d names were provided", + n, names.size()); + } std::vector> fields(n); for (int i = 0; i < n; i++) { fields[i] = batch->schema()->field(i)->WithName(names[i]); From 55b789ee8ef3cf2b0b3030e79bc72af76fa4398c Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 9 Nov 2020 12:35:49 -0500 Subject: [PATCH 4/7] [R] Fix integer types error in RecordBatch__RenameColumns() --- r/src/recordbatch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 72fc98c434c3..4e8066d10924 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -45,7 +45,7 @@ std::shared_ptr RecordBatch__schema( // [[arrow::export]] std::shared_ptr RecordBatch__RenameColumns( const std::shared_ptr& batch, const std::vector& names) { - int n = batch->num_columns(); + auto n = batch->num_columns(); if (names.size() != n) { cpp11::stop("RecordBatch has %d columns but %d names were provided", n, names.size()); From 72a390746bba1985830ca82a956dd2befd03bd5c Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 9 Nov 2020 13:18:06 -0500 Subject: [PATCH 5/7] [R] Fix integer types error in RecordBatch__RenameColumns() --- r/src/recordbatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 4e8066d10924..f4934a303037 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -45,8 +45,8 @@ std::shared_ptr RecordBatch__schema( // [[arrow::export]] std::shared_ptr RecordBatch__RenameColumns( const std::shared_ptr& batch, const std::vector& names) { - auto n = batch->num_columns(); - if (names.size() != n) { + int n = batch->num_columns(); + if (names.size() != static_cast(n)) { cpp11::stop("RecordBatch has %d columns but %d names were provided", n, names.size()); } From 0bae482836c9afdfcd175f438b9af0de7d36bcb1 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Mon, 9 Nov 2020 13:53:35 -0500 Subject: [PATCH 6/7] [R] Lint RecordBatch__RenameColumns() --- r/src/recordbatch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index f4934a303037..66606a7b05dc 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -44,11 +44,11 @@ std::shared_ptr RecordBatch__schema( // [[arrow::export]] std::shared_ptr RecordBatch__RenameColumns( - const std::shared_ptr& batch, const std::vector& names) { + const std::shared_ptr& batch, + const std::vector& names) { int n = batch->num_columns(); if (names.size() != static_cast(n)) { - cpp11::stop("RecordBatch has %d columns but %d names were provided", - n, names.size()); + cpp11::stop("RecordBatch has %d columns but %d names were provided", n, names.size()); } std::vector> fields(n); for (int i = 0; i < n; i++) { From 8fc776bdde8ae85e137e0e18e826ea52c55e06dc Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 11 Nov 2020 12:56:49 -0800 Subject: [PATCH 7/7] Cleanup after rebase --- r/R/record-batch.R | 5 +---- r/R/table.R | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 098593040f06..331a7a772530 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -75,10 +75,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, column = function(i) RecordBatch__column(self, i), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), - RenameColumns = function(value) { - assert_is(value, "character") - shared_ptr(RecordBatch, RecordBatch__RenameColumns(self, value)) - }, + RenameColumns = function(value) RecordBatch__RenameColumns(self, value), Equals = function(other, check_metadata = FALSE, ...) { inherits(other, "RecordBatch") && RecordBatch__Equals(self, other, isTRUE(check_metadata)) }, diff --git a/r/R/table.R b/r/R/table.R index 87409183586a..1d2190589f7f 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -96,9 +96,7 @@ Table <- R6Class("Table", inherit = ArrowObject, Table__column(self, i) }, ColumnNames = function() Table__ColumnNames(self), - RenameColumns = function(value) { - shared_ptr(Table, Table__RenameColumns(self, value)) - }, + RenameColumns = function(value) Table__RenameColumns(self, value), GetColumnByName = function(name) { assert_is(name, "character") assert_that(length(name) == 1)