Skip to content

Commit

Permalink
Merge pull request #97 from spectral-cockpit/96-allow-non-parsable-bl…
Browse files Browse the repository at this point in the history
…ocks

- allow non-parsable blocks
  • Loading branch information
philipp-baumann authored Nov 11, 2023
2 parents 328ed2e + caa3167 commit 009ed2b
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 25 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(calc_parameter_chunk_size,data)
S3method(calc_parameter_chunk_size,default)
S3method(calc_parameter_chunk_size,parameter)
S3method(calc_parameter_chunk_size,text)
export(calc_parameter_chunk_size)
Expand Down
6 changes: 6 additions & 0 deletions R/calc_parameter_chunk_size.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
#' @export
calc_parameter_chunk_size <- function(ds) UseMethod("calc_parameter_chunk_size")

#' @export
calc_parameter_chunk_size.default <- function(ds) {
return(ds)
}


#' @export
calc_parameter_chunk_size.parameter <- function(ds) {
ds$chunk_size <- calc_chunk_size(ds)
Expand Down
34 changes: 13 additions & 21 deletions R/create_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ create_dataset <- function(header_data) {
"b0-c0-t0-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),
"b0-c0-t8-a0" = c(read_class = "parameter", block_type_name = "info_block"),
"b0-c0-t104-a64" = c(read_class = "text", block_type_name = "history"),
"b0-c0-t144-a1" = c(read_class = "parameter", block_type_name = "report_unknown"),
"b0-c0-t152-a(0|64)" = c(read_class = "text", block_type_name = "curve_fit"),
"b0-c0-t168-a(0|64)" = c(read_class = "text", block_type_name = "signature"),
"b0-c0-t240-a(0|64)" = c(read_class = "text", block_type_name = "integration_method"),
# guess general text
"b0-c0-t\\d+-a(0|64)" = c(read_class = "text", block_type_name = "text_information"),

# block code 7 -----------------------------------------------------------------------------
# spectrum types of sample
Expand Down Expand Up @@ -125,28 +125,20 @@ create_dataset <- function(header_data) {
is_match <- unlist(lapply(key_names, function(pat) grepl(pat, composite_key)))
key_value_match <- key_value_map[is_match]
nm_matches <- names(key_value_match)
# because of "b0-c0-t\\d+-a(0|64)" regex
is_match_guess <- grepl("\\\\d\\+", nm_matches)

if (length(key_value_match) == 1L) {
if (any(is_match_guess)) {
message(paste(
"Guessing header entry for block type 0 to be text information:\n",
"* Composite key :=", composite_key
))
}
} else if (length(key_value_match) > 1L) {
# in block code 0, the less specific guess ("b0-c0-t\\d+-a(0|64)")
# has to be removed
key_value_match[is_match_guess] <- NULL

if (length(key_value_match) != 0L) {
key_value_match_vec <- key_value_match[[1]]
read_class <- unname(key_value_match_vec["read_class"])
block_type_name <- unname(key_value_match_vec["block_type_name"])
} else if (length(key_value_match) == 0L) {
# inform about details and what to do for improving {opusreader2}
stop_proactively(composite_key)
read_class <- NULL

block_type_name <- "unknown"

warn_proactively(composite_key)
}

key_value_match_vec <- key_value_match[[1]]
read_class <- unname(key_value_match_vec["read_class"])
block_type_name <- unname(key_value_match_vec["block_type_name"])

# create a dataset
ds <- structure(
Expand All @@ -157,8 +149,8 @@ create_dataset <- function(header_data) {
return(ds)
}

stop_proactively <- function(composite_key) {
stop(
warn_proactively <- function(composite_key) {
warning(
paste(
"Unknown header entry.\n The following 'composite key' is not yet",
"mapped in the {opusreader2} key-value map of the header:\n",
Expand Down
8 changes: 6 additions & 2 deletions R/extract_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ get_basic_metadata <- function(ds_list) {
get_meta_timestamp <- function(ds_list) {
text <- ds_list$history$text
history <- paste0(text, collapse = "")
save_file_time <- gsub(".*Save File\t\t(\\d.*)\t\t\t.*", "\\1", history)
save_file_time <- gsub(
".*\t\t(\\d.*)\t\t\t.*",
"\\1",
history
)

time_hour_tz <- strsplit(x = save_file_time, split = " ")[[1L]]
time_hour <- paste(time_hour_tz[1L], time_hour_tz[2L])
Expand All @@ -31,7 +35,7 @@ get_meta_timestamp <- function(ds_list) {
))

list_datetime_tz <- list(
datetime = as.character(time),
datetime = as.character(sort(time)[1]),
timezone = tz
)

Expand Down
3 changes: 2 additions & 1 deletion R/get_nice_parameter_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ sort_list_by <- function(dataset_list) {
"lab_and_process_param_raw",
"lab_and_process_param_processed",
"info_block",
"history"
"history",
"unknown"
)

to_sort_by <- intersect(sort_by, names(dataset_list))
Expand Down
14 changes: 13 additions & 1 deletion R/parse_chunk.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@
#' @family parsing
parse_chunk <- function(ds, con) UseMethod("parse_chunk")



#' read chunk method for default
#'
#' @inheritParams parse_chunk
#'
#' @keywords internal
parse_chunk.default <- function(ds, con) {
return(ds)
}


#' read chunk method for text
#'
#' @inheritParams parse_chunk
Expand All @@ -26,7 +38,7 @@ parse_chunk.text <- function(ds, con) {
#'
#' @keywords internal
parse_chunk.parameter <- function(ds, con) {
if (ds$text_type %in% c(112, 104)) {
if (ds$text_type %in% c(112, 104, 144)) {
cursor <- ds$offset + 12
} else {
cursor <- ds$offset
Expand Down
4 changes: 4 additions & 0 deletions R/read_opus.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@
#' * **`lab_and_process_param_2`**:
#' * **`info_block`**:
#' * **`history`**:
#' * **`unknown`**: if a block-type can not be matched, no parsing is done and
#' an empty list entry is returned. This gives you a hint that there is
#' a block that can not yet be parsed. You can take further steps by
#' opening an issue.
#'
#' @section Details:
#' `read_opus()` is the high-level interface to read multiple OPUS files at
Expand Down
4 changes: 4 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Agroecosystems
ai
aut
automatec
Expand All @@ -11,9 +12,11 @@ Config
Continous
cph
cre
CSIRO
decrypter
DPF
dsn
ETH
ETL
fledge
fnd
Expand Down Expand Up @@ -84,3 +87,4 @@ unlist
utf
vapour
VignetteBuilder
Zürich
17 changes: 17 additions & 0 deletions man/parse_chunk.default.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_opus.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 009ed2b

Please sign in to comment.