diff --git a/DESCRIPTION b/DESCRIPTION index a74dabf2..72f391fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: isoreader Title: Read Stable Isotope Data Files Description: Interface to the raw data file formats commonly encountered in scientific disciplines that make use of stable isotopes. -Version: 1.4.0 +Version: 1.4.1 Authors@R: c(person( given = "Sebastian", family = "Kopf", diff --git a/R/isoread_isodat.R b/R/isoread_isodat.R index 9e51a391..1e0c9d25 100644 --- a/R/isoread_isodat.R +++ b/R/isoread_isodat.R @@ -499,6 +499,9 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU if (nrow(extracted_dt$cell_values) == 0L) { stop("could not find any vendor table data", call. = FALSE) } + + # propagated newly registered problems + ds <- ds |> set_problems(combined_problems(ds, extracted_dt)) # store vendor data table data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check = TRUE), by = "Nr.") @@ -727,24 +730,40 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun = ds$source <- cap_at_fun(ds$source) } - columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include) - - # safety check: to make sure all columns have the same format specification - if (!all(ok <- columns$n_formats == 1)) { - formats <- map_chr(columns$data[!ok], ~collapse(unique(.x$format), ", ")) - problems <- glue("column {columns$column[!ok]} has multiple formats '{formats}'") + # start output + output <- list() + + # add columns + output$columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include) + + # safety check: to make sure all output$columns have the same format specification + if (!all(ok <- output$columns$n_types == 1)) { + formats <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$format), ", ")) + problems <- glue("column {output$columns$column[!ok]} has multiple formats '{formats}'") iso_source_file_op_error(ds$source, glue("mismatched data column formats:\n{collapse(problems, '\n')}")) } + # safety check: warn if different precisions + if (!all(ok <- output$columns$n_precisions == 1)) { + precisions <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$precision), ", ")) + problems <- glue("column {output$columns$column[!ok]} has multiple precisions '{precisions}'") + output <- register_warning( + output, + details = glue("mismatched data column formats:\n{collapse(problems, '\n')}"), + func = "extract_isodat_main_vendor_data_table" + ) + } + # safety check: to make sure all formats are resolved - if (!all(ok <- !is.na(columns$type))) { - problems <- glue("column {columns$column[!ok]} has unknown format '{columns$column_format[!ok]}'") + if (!all(ok <- !is.na(output$columns$column_type))) { + problems <- glue("column {output$columns$column[!ok]} has unknown format '{output$columns$column_format[!ok]}'") iso_source_file_op_error(ds$source, glue("unknown column formats:\n{collapse(problems, '\n')}")) } - cell_values <- extract_isodat_main_vendor_data_table_values(ds, columns) + # finish output with cell values + output$cell_values <- extract_isodat_main_vendor_data_table_values(ds, output$columns) - return(list(columns = columns, cell_values = cell_values)) + return(output) } # extract the main (recurring) portion of the vendor data table @@ -804,32 +823,45 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$source$po dplyr::mutate(row = cumsum(.data$column == .data$column[1])) |> # remove duplicates dplyr::group_by(.data$column, .data$row) |> - dplyr::summarize( - group = .data$group[1], - continue_pos = .data$continue_pos[1], - id = .data$id[1], - format = .data$format[1], - `gas_config?` = .data$`gas_config?`[1], - units = .data$units[1], - ref_frame = .data$units[1], - .groups = "drop" - ) |> + dplyr::filter(dplyr::row_number() == 1) |> + dplyr::ungroup() |> + # dplyr::summarize( + # group = .data$group[1], + # continue_pos = .data$continue_pos[1], + # id = .data$id[1], + # format = .data$format[1], + # `gas_config?` = .data$`gas_config?`[1], + # units = .data$units[1], + # ref_frame = .data$units[1], + # .groups = "drop" + # ) |> dplyr::arrange(.data$group) |> - # nest by column and expand column details - tidyr::nest(data = c(-"column")) |> + # parse column format dplyr::mutate( - n_formats = purrr::map_int(.data$data, ~length(unique(.x$format))), - column_format = purrr::map_chr(.data$data, ~.x$format[1]), - column_units = purrr::map_chr(.data$data, ~.x$units[1]), type = dplyr::case_when( - .data$column_format == "%s" ~ "text", - .data$column_format %in% c("%u", "%d") ~ "integer", - str_detect(.data$column_format, "\\%[0-9.]*f") ~ "double", + .data$format == "%s" ~ "text", + .data$format %in% c("%u", "%d") ~ "integer", + str_detect(.data$format, "\\%[0-9.]*f") ~ "double", TRUE ~ NA_character_ - ) + ), + precision = dplyr::if_else( + type == "double", + stringr::str_extract(.data$format, "(?<=\\.)\\d*(?=f)"), + NA_character_ + ) + ) |> + # nest by column and expand column details + tidyr::nest(data = c(-"column")) |> + dplyr::mutate( + n_types = purrr::map_int(.data$data, ~length(unique(.x$type))), + n_precisions = purrr::map_int(.data$data, ~length(unique(.x$precision))), + line1 = purrr::map(.data$data, ~.x[1,c("format", "units", "type", "precision")]) ) |> + tidyr::unnest(line1) |> # naming adjustments + dplyr::rename("column_format" = "format", "column_units" = "units", + "column_type" = "type", "column_precision" = "precision") |> dplyr::mutate( # avoid issues with delta symbol on different OS column = stringr::str_replace(.data$column, fixed("\U03B4"), "d"), @@ -883,7 +915,7 @@ extract_isodat_main_vendor_data_table_values <- function(ds, columns) { # get cell values columns |> - filter(!is.na(type)) |> + filter(!is.na(.data$column_type)) |> unnest("data") |> select("column", "continue_pos", "type", "row") |> nest(data = c(-row)) |> diff --git a/tests/testthat/test-continuous-flow.R b/tests/testthat/test-continuous-flow.R index fb7d0992..23140209 100644 --- a/tests/testthat/test-continuous-flow.R +++ b/tests/testthat/test-continuous-flow.R @@ -154,11 +154,14 @@ test_that("test that additional continous flow files can be read", { iso_turn_reader_caching_off() # testing wrapper - check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL) { + check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL, n_problems = 0) { file_path <- get_isoreader_test_file(file, local_folder = test_folder) expect_true(file.exists(file_path)) - expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow") - expect_equal(nrow(problems(file)), 0) + if (n_problems > 0) + expect_warning(file <- iso_read_continuous_flow(file_path, read_cache = FALSE)) + else + expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow") + expect_equal(nrow(problems(file)), n_problems) expect_equal(nrow(file$file_info), 1) if (!is.null(file_info_cols)) expect_equal(names(file$file_info), file_info_cols) @@ -190,6 +193,28 @@ test_that("test that additional continous flow files can be read", { ) ) + dxf2 <- check_continuous_flow_test_file( + "dxf_example_H_02.dxf", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Check Ref. Dilution", "H3 Stability", + "H3 Factor", "Conditioning", "Seed Oxidation", "GC Method", "AS Sample", + "AS Method", "Identifier 1", "Identifier 2", "Analysis", "Preparation", "Method", + "measurement_info", "MS_integration_time.s"), + 53, + c(Nr. = NA, Start = "s", Rt = "s", End = "s", `Ampl 2` = "mV", + `Ampl 3` = "mV", `BGD 2` = "mV", `BGD 3` = "mV", `rIntensity 2` = "mVs", + `rIntensity 3` = "mVs", `rIntensity All` = "mVs", `Intensity 2` = "Vs", + `Intensity 3` = "Vs", `Intensity All` = "Vs", `Sample Dilution` = "%", + `List First Peak` = NA, `rR 3H2/2H2` = NA, `Is Ref.?` = NA, `R 3H2/2H2` = NA, + `Ref. Name` = NA, `rd 3H2/2H2` = "permil", `d 3H2/2H2` = "permil", + `R 2H/1H` = NA, `d 2H/1H` = "permil", `AT% 2H/1H` = "%", `Rps 3H2/2H2` = NA, + `Master Peak` = NA, `DeltaDelta 3H2/2H2` = "permil" + ), + n_problems = 1L + ) + expect_equal(problems(dxf2)$type, "warning") + expect_true(stringr::str_detect(problems(dxf2)$details, "has multiple precisions")) + check_continuous_flow_test_file( "dxf_example_HO_01.dxf", c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", diff --git a/tests/testthat/test_data/dxf_example_H_02.dxf b/tests/testthat/test_data/dxf_example_H_02.dxf new file mode 100755 index 00000000..569428f4 Binary files /dev/null and b/tests/testthat/test_data/dxf_example_H_02.dxf differ