From d0f6d9a15e2fb7f0885f098c86e177f5a23ab80a Mon Sep 17 00:00:00 2001 From: Sun Rui Date: Sat, 15 Aug 2015 18:11:12 +0800 Subject: [PATCH 1/2] [SPARK-8844][SPARKR] head/collect is broken in SparkR. --- R/pkg/R/deserialize.R | 16 ++++++++++------ R/pkg/R/nohup.out | 0 R/pkg/inst/tests/test_sparkSQL.R | 8 ++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 R/pkg/R/nohup.out diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f77be7ee..33bf13ec9e784 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow <- function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol <- function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { - value <- readObject(inputCon) - # Replace NULL with NA so we can coerce to vectors - if (is.null(value)) NA else value - })) + if (numRows > 0) { + # sapply can not work with POSIXlt + do.call(c, lapply(1:numRows, function(x) { + value <- readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value + })) + } else { + vector() + } } diff --git a/R/pkg/R/nohup.out b/R/pkg/R/nohup.out new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 7377fc8f1ca9c..3d5d4e3baf1cc 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", { expect_equal(names(rdf)[1], "age") expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 <- limit(df, 0) + rdf <- collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], "age") + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that("limit() returns DataFrame with the correct number of rows", { From cce54aa42aceb6def561acf1df4a61f80b75006b Mon Sep 17 00:00:00 2001 From: Sun Rui Date: Sun, 16 Aug 2015 10:04:20 +0800 Subject: [PATCH 2/2] Address comments. --- R/pkg/R/nohup.out | 0 R/pkg/inst/tests/test_sparkSQL.R | 12 ++++++++++++ 2 files changed, 12 insertions(+) delete mode 100644 R/pkg/R/nohup.out diff --git a/R/pkg/R/nohup.out b/R/pkg/R/nohup.out deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 3d5d4e3baf1cc..5cfe6ce2d5382 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -500,6 +500,18 @@ test_that("head() and first() return the correct data", { testFirst <- first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 <- limit(df, 0) + + testHead <- head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst <- first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that("distinct() and unique on DataFrames", {