Merge pull request #97 from spectral-cockpit/96-allow-non-parsable-bl…

…ocks - allow non-parsable blocks
spectral-cockpit · Nov 11, 2023 · 009ed2b · 009ed2b
2 parents 328ed2e + caa3167
commit 009ed2b
Show file tree

Hide file tree

Showing 10 changed files with 70 additions and 25 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(calc_parameter_chunk_size,data)
+S3method(calc_parameter_chunk_size,default)
 S3method(calc_parameter_chunk_size,parameter)
 S3method(calc_parameter_chunk_size,text)
 export(calc_parameter_chunk_size)

diff --git a/R/calc_parameter_chunk_size.R b/R/calc_parameter_chunk_size.R
@@ -4,6 +4,12 @@
 #' @export
 calc_parameter_chunk_size <- function(ds) UseMethod("calc_parameter_chunk_size")
 
+#' @export
+calc_parameter_chunk_size.default <- function(ds) {
+  return(ds)
+}
+
+
 #' @export
 calc_parameter_chunk_size.parameter <- function(ds) {
   ds$chunk_size <- calc_chunk_size(ds)

diff --git a/R/create_dataset.R b/R/create_dataset.R
@@ -26,11 +26,11 @@ create_dataset <- function(header_data) {
     "b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),
     "b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"),
     "b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"),
+    "b0-c0-t144-a1" = c(read_class = "parameter", block_type_name = "report_unknown"),
     "b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"),
     "b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"),
     "b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"),
     # guess general text
-    "b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),
 
     # block code 7 -----------------------------------------------------------------------------
     # spectrum types of sample
@@ -125,28 +125,20 @@ create_dataset <- function(header_data) {
   is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key)))
   key_value_match <- key_value_map[is_match]
   nm_matches <- names(key_value_match)
-  # because of "b0-c0-t\\d+-a(0|64)" regex
-  is_match_guess <- grepl("\\\\d\\+", nm_matches)
-
-  if (length(key_value_match) == 1L) {
-    if (any(is_match_guess)) {
-      message(paste(
-        "Guessing header entry for block type 0 to be text information:\n",
-        "* Composite key :=", composite_key
-      ))
-    }
-  } else if (length(key_value_match) > 1L) {
-    # in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)")
-    # has to be removed
-    key_value_match[is_match_guess] <- NULL
+
+  if (length(key_value_match) != 0L) {
+    key_value_match_vec <- key_value_match[[1]]
+    read_class <- unname(key_value_match_vec["read_class"])
+    block_type_name <- unname(key_value_match_vec["block_type_name"])
   } else if (length(key_value_match) == 0L) {
     # inform about details and what to do for improving {opusreader2}
-    stop_proactively(composite_key)
+    read_class <- NULL
+
+    block_type_name <- "unknown"
+
+    warn_proactively(composite_key)
   }
 
-  key_value_match_vec <- key_value_match[[1]]
-  read_class <- unname(key_value_match_vec["read_class"])
-  block_type_name <- unname(key_value_match_vec["block_type_name"])
 
   # create a dataset
   ds <- structure(
@@ -157,8 +149,8 @@ create_dataset <- function(header_data) {
   return(ds)
 }
 
-stop_proactively <- function(composite_key) {
-  stop(
+warn_proactively <- function(composite_key) {
+  warning(
     paste(
       "Unknown header entry.\n The following 'composite key' is not yet",
       "mapped in the {opusreader2} key-value map of the header:\n",

diff --git a/R/extract_metadata.R b/R/extract_metadata.R
@@ -17,7 +17,11 @@ get_basic_metadata <- function(ds_list) {
 get_meta_timestamp <- function(ds_list) {
   text <- ds_list$history$text
   history <- paste0(text, collapse = "")
-  save_file_time <- gsub(".*Save File\t\t(\\d.*)\t\t\t.*", "\\1", history)
+  save_file_time <- gsub(
+    ".*\t\t(\\d.*)\t\t\t.*",
+    "\\1",
+    history
+  )
 
   time_hour_tz <- strsplit(x = save_file_time, split = " ")[[1L]]
   time_hour <- paste(time_hour_tz[1L], time_hour_tz[2L])
@@ -31,7 +35,7 @@ get_meta_timestamp <- function(ds_list) {
   ))
 
   list_datetime_tz <- list(
-    datetime = as.character(time),
+    datetime = as.character(sort(time)[1]),
     timezone = tz
   )
 

diff --git a/R/get_nice_parameter_name.R b/R/get_nice_parameter_name.R
@@ -211,7 +211,8 @@ sort_list_by <- function(dataset_list) {
     "lab_and_process_param_raw",
     "lab_and_process_param_processed",
     "info_block",
-    "history"
+    "history",
+    "unknown"
   )
 
   to_sort_by <- intersect(sort_by, names(dataset_list))

diff --git a/R/parse_chunk.R b/R/parse_chunk.R
@@ -8,6 +8,18 @@
 #' @family parsing
 parse_chunk <- function(ds, con) UseMethod("parse_chunk")
 
+
+
+#' read chunk method for default
+#'
+#' @inheritParams parse_chunk
+#'
+#' @keywords internal
+parse_chunk.default <- function(ds, con) {
+  return(ds)
+}
+
+
 #' read chunk method for text
 #'
 #' @inheritParams parse_chunk
@@ -26,7 +38,7 @@ parse_chunk.text <- function(ds, con) {
 #'
 #' @keywords internal
 parse_chunk.parameter <- function(ds, con) {
-  if (ds$text_type %in% c(112, 104)) {
+  if (ds$text_type %in% c(112, 104, 144)) {
     cursor <- ds$offset + 12
   } else {
     cursor <- ds$offset

diff --git a/R/read_opus.R b/R/read_opus.R
@@ -117,6 +117,10 @@
 #' * **`lab_and_process_param_2`**:
 #' * **`info_block`**:
 #' * **`history`**:
+#' * **`unknown`**: if a block-type can not be matched, no parsing is done and
+#'  an empty list entry is returned. This gives you a hint that there is
+#'  a block that can not yet be parsed. You can take further steps by
+#'  opening an issue.
 #'
 #' @section Details:
 #' `read_opus()` is the high-level interface to read multiple OPUS files at

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -1,3 +1,4 @@
+Agroecosystems
 ai
 aut
 automatec
@@ -11,9 +12,11 @@ Config
 Continous
 cph
 cre
+CSIRO
 decrypter
 DPF
 dsn
+ETH
 ETL
 fledge
 fnd
@@ -84,3 +87,4 @@ unlist
 utf
 vapour
 VignetteBuilder
+Zürich
diff --git a/man/parse_chunk.default.Rd b/man/parse_chunk.default.Rd
diff --git a/man/read_opus.Rd b/man/read_opus.Rd