diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md new file mode 100644 index 0000000..81f617a --- /dev/null +++ b/tests/testthat/_snaps/utils.md @@ -0,0 +1,28 @@ +# as_count + + Code + as_count(1:2) + Condition + Error in `as_count()`: + ! x must be a count, i.e. an integer scalar + Code + as_count(0) + Condition + Error in `as_count()`: + ! x must be a count, i.e. an integer scalar + Code + as_count(NA_real_) + Condition + Error in `as_count()`: + ! x must be a count, i.e. an integer scalar + Code + as_count(-100) + Condition + Error in `as_count()`: + ! x must be a count, i.e. an integer scalar + Code + as_count(-100L) + Condition + Error in `as_count()`: + ! x must be a count, i.e. an integer scalar + diff --git a/tests/testthat/_snaps/write-parquet-row-groups.md b/tests/testthat/_snaps/write-parquet-row-groups.md new file mode 100644 index 0000000..ef8989d --- /dev/null +++ b/tests/testthat/_snaps/write-parquet-row-groups.md @@ -0,0 +1,72 @@ +# errors + + Code + parquet_options(num_rows_per_row_group = "foobar") + Condition + Error in `as_count()`: + ! num_rows_per_row_group must be a count, i.e. an integer scalar + +--- + + Code + write_parquet(df, tmp, row_groups = "foobar") + Condition + Error in `parse_row_groups()`: + ! Row groups must be specified as a growing positive integer vector, starting with 1. + Code + write_parquet(df, tmp, row_groups = c(100L, 1L)) + Condition + Error in `parse_row_groups()`: + ! Row groups must be specified as a growing positive integer vector, starting with 1. + Code + write_parquet(df, tmp, row_groups = c(1L, 100L)) + Condition + Error in `write_parquet()`: + ! Internal nanoparquet error, row index too large + +# grouped df + + Code + write_parquet(df, tmp) + Message + Ordering data frame according to row groups. + +--- + + Code + as.data.frame(read_parquet(tmp)[, c("nam", "cyl")]) + Output + nam cyl + 1 Datsun 710 4 + 2 Merc 240D 4 + 3 Merc 230 4 + 4 Fiat 128 4 + 5 Honda Civic 4 + 6 Toyota Corolla 4 + 7 Toyota Corona 4 + 8 Fiat X1-9 4 + 9 Porsche 914-2 4 + 10 Lotus Europa 4 + 11 Volvo 142E 4 + 12 Mazda RX4 6 + 13 Mazda RX4 Wag 6 + 14 Hornet 4 Drive 6 + 15 Valiant 6 + 16 Merc 280 6 + 17 Merc 280C 6 + 18 Ferrari Dino 6 + 19 Hornet Sportabout 8 + 20 Duster 360 8 + 21 Merc 450SE 8 + 22 Merc 450SL 8 + 23 Merc 450SLC 8 + 24 Cadillac Fleetwood 8 + 25 Lincoln Continental 8 + 26 Chrysler Imperial 8 + 27 Dodge Challenger 8 + 28 AMC Javelin 8 + 29 Camaro Z28 8 + 30 Pontiac Firebird 8 + 31 Ford Pantera L 8 + 32 Maserati Bora 8 + diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index f9067c6..b15ab03 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -51,3 +51,39 @@ test_that("is_uint32", { expect_false(is_uint32(NA_real_)) expect_false(is_uint32("foo")) }) + +test_that("is_icount", { + expect_true(is_icount(1L)) + expect_true(is_icount(100L)) + expect_true(is_icount(2147483647L)) + + expect_false(is_icount(NA_integer_)) + expect_false(is_icount(1:2)) + expect_false(is_icount(1)) + expect_false(is_icount(0L)) + expect_false(is_icount(-100L)) +}) + +test_that("is_dcount", { + expect_true(is_dcount(1)) + expect_true(is_dcount(100)) + expect_true(is_dcount(2147483647)) + + expect_false(is_dcount(NA_real_)) + expect_false(is_dcount(1:2)) + expect_false(is_dcount(1L)) + expect_false(is_dcount(0)) + expect_false(is_dcount(-100)) +}) + +test_that("as_count", { + expect_equal(as_count(1), 1L) + expect_equal(as_count(100), 100L) + expect_snapshot(error = TRUE, { + as_count(1:2) + as_count(0) + as_count(NA_real_) + as_count(-100) + as_count(-100L) + }) +}) diff --git a/tests/testthat/test-write-parquet-row-groups.R b/tests/testthat/test-write-parquet-row-groups.R new file mode 100644 index 0000000..2eb28a7 --- /dev/null +++ b/tests/testthat/test-write-parquet-row-groups.R @@ -0,0 +1,54 @@ +test_that("errors", { + expect_snapshot(error = TRUE, { + parquet_options(num_rows_per_row_group = "foobar") + }) + + df <- test_df() + tmp <- tempfile(fileext = ".parquet") + on.exit(unlink(tmp), add = TRUE) + expect_snapshot(error = TRUE, { + write_parquet(df, tmp, row_groups = "foobar") + write_parquet(df, tmp, row_groups = c(100L, 1L)) + write_parquet(df, tmp, row_groups = c(1L, 100L)) + }) +}) + +test_that("row groups", { + tmp1 <- tempfile(fileext = ".parquet") + tmp2 <- tempfile(fileext = ".parquet") + on.exit(unlink(c(tmp1, tmp2)), add = TRUE) + + df <- test_df() + write_parquet(df, tmp1, row_groups = 1L) + write_parquet(df, tmp2, row_groups = c(1L, 16L)) + expect_equal(read_parquet(tmp1), read_parquet(tmp2)) + expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 2L) + + unlink(tmp2) + write_parquet(df, tmp2, row_groups = seq_len(nrow(df))) + expect_equal(read_parquet(tmp1), read_parquet(tmp2)) + expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), nrow(df)) + + unlink(tmp2) + withr::local_options(nanoparquet.num_rows_per_row_group = 10L) + write_parquet(df, tmp2) + expect_equal(read_parquet(tmp1), read_parquet(tmp2)) + expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 4L) +}) + +test_that("grouped df", { + df <- test_df() + attr(df, "groups") <- data.frame( + cyl = c(4L, 6L, 8L), + .rows = I(list( + c(3L, 8L, 9L, 18L, 19L, 20L, 21L, 26L, 27L, 28L, 32L), + c(1L, 2L, 4L, 6L, 10L, 11L, 30L), + c(5L, 7L, 12L, 13L, 14L, 15L, 16L, 17L, 22L, 23L, 24L, 25L, 29L, 31L) + )) + ) + + tmp <- tempfile(fileext = ".parquet") + expect_snapshot(write_parquet(df, tmp)) + expect_equal(nrow(read_parquet_metadata(tmp)[["row_groups"]]), 3L) + expect_snapshot(as.data.frame(read_parquet(tmp)[, c("nam", "cyl")])) +})