forked from hannes/miniparquet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6cbcd8c
commit aa4cfd6
Showing
4 changed files
with
190 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# as_count | ||
|
||
Code | ||
as_count(1:2) | ||
Condition | ||
Error in `as_count()`: | ||
! x must be a count, i.e. an integer scalar | ||
Code | ||
as_count(0) | ||
Condition | ||
Error in `as_count()`: | ||
! x must be a count, i.e. an integer scalar | ||
Code | ||
as_count(NA_real_) | ||
Condition | ||
Error in `as_count()`: | ||
! x must be a count, i.e. an integer scalar | ||
Code | ||
as_count(-100) | ||
Condition | ||
Error in `as_count()`: | ||
! x must be a count, i.e. an integer scalar | ||
Code | ||
as_count(-100L) | ||
Condition | ||
Error in `as_count()`: | ||
! x must be a count, i.e. an integer scalar | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# errors | ||
|
||
Code | ||
parquet_options(num_rows_per_row_group = "foobar") | ||
Condition | ||
Error in `as_count()`: | ||
! num_rows_per_row_group must be a count, i.e. an integer scalar | ||
|
||
--- | ||
|
||
Code | ||
write_parquet(df, tmp, row_groups = "foobar") | ||
Condition | ||
Error in `parse_row_groups()`: | ||
! Row groups must be specified as a growing positive integer vector, starting with 1. | ||
Code | ||
write_parquet(df, tmp, row_groups = c(100L, 1L)) | ||
Condition | ||
Error in `parse_row_groups()`: | ||
! Row groups must be specified as a growing positive integer vector, starting with 1. | ||
Code | ||
write_parquet(df, tmp, row_groups = c(1L, 100L)) | ||
Condition | ||
Error in `write_parquet()`: | ||
! Internal nanoparquet error, row index too large | ||
|
||
# grouped df | ||
|
||
Code | ||
write_parquet(df, tmp) | ||
Message | ||
Ordering data frame according to row groups. | ||
|
||
--- | ||
|
||
Code | ||
as.data.frame(read_parquet(tmp)[, c("nam", "cyl")]) | ||
Output | ||
nam cyl | ||
1 Datsun 710 4 | ||
2 Merc 240D 4 | ||
3 Merc 230 4 | ||
4 Fiat 128 4 | ||
5 Honda Civic 4 | ||
6 Toyota Corolla 4 | ||
7 Toyota Corona 4 | ||
8 Fiat X1-9 4 | ||
9 Porsche 914-2 4 | ||
10 Lotus Europa 4 | ||
11 Volvo 142E 4 | ||
12 Mazda RX4 6 | ||
13 Mazda RX4 Wag 6 | ||
14 Hornet 4 Drive 6 | ||
15 Valiant 6 | ||
16 Merc 280 6 | ||
17 Merc 280C 6 | ||
18 Ferrari Dino 6 | ||
19 Hornet Sportabout 8 | ||
20 Duster 360 8 | ||
21 Merc 450SE 8 | ||
22 Merc 450SL 8 | ||
23 Merc 450SLC 8 | ||
24 Cadillac Fleetwood 8 | ||
25 Lincoln Continental 8 | ||
26 Chrysler Imperial 8 | ||
27 Dodge Challenger 8 | ||
28 AMC Javelin 8 | ||
29 Camaro Z28 8 | ||
30 Pontiac Firebird 8 | ||
31 Ford Pantera L 8 | ||
32 Maserati Bora 8 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
test_that("errors", { | ||
expect_snapshot(error = TRUE, { | ||
parquet_options(num_rows_per_row_group = "foobar") | ||
}) | ||
|
||
df <- test_df() | ||
tmp <- tempfile(fileext = ".parquet") | ||
on.exit(unlink(tmp), add = TRUE) | ||
expect_snapshot(error = TRUE, { | ||
write_parquet(df, tmp, row_groups = "foobar") | ||
write_parquet(df, tmp, row_groups = c(100L, 1L)) | ||
write_parquet(df, tmp, row_groups = c(1L, 100L)) | ||
}) | ||
}) | ||
|
||
test_that("row groups", { | ||
tmp1 <- tempfile(fileext = ".parquet") | ||
tmp2 <- tempfile(fileext = ".parquet") | ||
on.exit(unlink(c(tmp1, tmp2)), add = TRUE) | ||
|
||
df <- test_df() | ||
write_parquet(df, tmp1, row_groups = 1L) | ||
write_parquet(df, tmp2, row_groups = c(1L, 16L)) | ||
expect_equal(read_parquet(tmp1), read_parquet(tmp2)) | ||
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 2L) | ||
|
||
unlink(tmp2) | ||
write_parquet(df, tmp2, row_groups = seq_len(nrow(df))) | ||
expect_equal(read_parquet(tmp1), read_parquet(tmp2)) | ||
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), nrow(df)) | ||
|
||
unlink(tmp2) | ||
withr::local_options(nanoparquet.num_rows_per_row_group = 10L) | ||
write_parquet(df, tmp2) | ||
expect_equal(read_parquet(tmp1), read_parquet(tmp2)) | ||
expect_equal(nrow(read_parquet_metadata(tmp2)[["row_groups"]]), 4L) | ||
}) | ||
|
||
test_that("grouped df", { | ||
df <- test_df() | ||
attr(df, "groups") <- data.frame( | ||
cyl = c(4L, 6L, 8L), | ||
.rows = I(list( | ||
c(3L, 8L, 9L, 18L, 19L, 20L, 21L, 26L, 27L, 28L, 32L), | ||
c(1L, 2L, 4L, 6L, 10L, 11L, 30L), | ||
c(5L, 7L, 12L, 13L, 14L, 15L, 16L, 17L, 22L, 23L, 24L, 25L, 29L, 31L) | ||
)) | ||
) | ||
|
||
tmp <- tempfile(fileext = ".parquet") | ||
expect_snapshot(write_parquet(df, tmp)) | ||
expect_equal(nrow(read_parquet_metadata(tmp)[["row_groups"]]), 3L) | ||
expect_snapshot(as.data.frame(read_parquet(tmp)[, c("nam", "cyl")])) | ||
}) |