From d0bcc5bef7115f6f1de6bae27bab8602d000d092 Mon Sep 17 00:00:00 2001 From: Claudia Beleites Date: Wed, 1 May 2024 17:55:58 +0200 Subject: [PATCH] fix test for plotmat for NoSuggests-scenario --- R/DollarNames.R | 29 + R/NEW-functions.R | 78 ++ R/call.list.R | 16 + R/chk.hy.R | 20 + R/chondro.R | 51 + R/count_lines.R | 37 + R/cov.R | 53 + R/deprecated.R | 43 + R/factor2num.R | 10 + R/fileio.optional.R | 90 ++ R/fix_spc_colnames.R | 24 + R/getbynames.R | 17 + R/guesswavelength.R | 51 + R/makeraster.R | 135 +++ R/map.identify.R | 80 ++ R/map.sel.poly.R | 126 +++ R/mark.dendrogram.R | 73 ++ R/mark.peak.R | 24 + R/matlab.palette.R | 49 + R/mvtnorm.R | 96 ++ R/normalize01.R | 90 ++ R/options.R | 174 ++++ R/orderwl.R | 53 + R/paste.row.R | 63 ++ R/pearson.dist.R | 60 ++ R/plotc.R | 154 +++ R/plotmap.R | 116 +++ R/plotmat.R | 111 +++ R/plotspc.R | 714 ++++++++++++++ R/plotvoronoi.R | 46 + R/qplot.R | 239 +++++ R/qplotmixmap.R | 264 +++++ R/read.ENVI.HySpex.R | 34 + R/read.ENVI.Nicolet.R | 109 +++ R/read.ENVI.R | 440 +++++++++ R/read.asc.Andor.R | 46 + R/read.asc.PerkinElmer.R | 36 + R/read.ini.R | 52 + R/read.jdx.R | 386 ++++++++ R/read.mat.Cytospec.R | 83 ++ R/read.mat.Witec.R | 24 + R/read.spc.Kaiser.R | 102 ++ R/read.spc.R | 926 ++++++++++++++++++ R/read.spc.Shimadzu.R | 12 + R/read.spe.R | 390 ++++++++ R/read.txt.Horiba.R | 48 + R/read.txt.Renishaw.R | 202 ++++ R/read.txt.Shimadzu.R | 204 ++++ R/read.txt.Witec.R | 434 ++++++++ R/read.txt.long.R | 172 ++++ R/read.txt.wide.R | 84 ++ R/regexps.R | 1 + R/spc.NA.approx.R | 142 +++ R/spc.bin.R | 79 ++ R/spc.fit.poly.R | 297 ++++++ R/spc.identify.R | 276 ++++++ R/spc.loess.R | 59 ++ R/spc.rubberband.R | 145 +++ R/spc.spline.R | 53 + R/split.string.R | 33 + R/splitdots.R | 35 + R/trellis.factor.key.R | 50 + R/units.R | 3 + R/unittest.R | 57 ++ R/validate.R | 11 + R/wc.R | 11 + R/wleval.R | 75 ++ R/write.txt.long.R | 99 ++ R/write.txt.wide.R | 88 ++ R/y-pastenames.R | 17 + .../testthat/_snaps/attached/plot-voronoi.svg | 185 ++++ tests/testthat/test-plotspc.r | 10 + tests/unittests.R | 3 + 73 files changed, 8699 insertions(+) create mode 100644 R/DollarNames.R create mode 100644 R/NEW-functions.R create mode 100644 R/call.list.R create mode 100644 R/chk.hy.R create mode 100644 R/chondro.R create mode 100644 R/count_lines.R create mode 100644 R/cov.R create mode 100644 R/deprecated.R create mode 100644 R/factor2num.R create mode 100644 R/fileio.optional.R create mode 100644 R/fix_spc_colnames.R create mode 100644 R/getbynames.R create mode 100644 R/guesswavelength.R create mode 100644 R/makeraster.R create mode 100644 R/map.identify.R create mode 100644 R/map.sel.poly.R create mode 100644 R/mark.dendrogram.R create mode 100644 R/mark.peak.R create mode 100644 R/matlab.palette.R create mode 100644 R/mvtnorm.R create mode 100644 R/normalize01.R create mode 100644 R/options.R create mode 100644 R/orderwl.R create mode 100644 R/paste.row.R create mode 100644 R/pearson.dist.R create mode 100644 R/plotc.R create mode 100644 R/plotmap.R create mode 100644 R/plotmat.R create mode 100644 R/plotspc.R create mode 100644 R/plotvoronoi.R create mode 100644 R/qplot.R create mode 100644 R/qplotmixmap.R create mode 100644 R/read.ENVI.HySpex.R create mode 100644 R/read.ENVI.Nicolet.R create mode 100644 R/read.ENVI.R create mode 100644 R/read.asc.Andor.R create mode 100644 R/read.asc.PerkinElmer.R create mode 100644 R/read.ini.R create mode 100644 R/read.jdx.R create mode 100644 R/read.mat.Cytospec.R create mode 100644 R/read.mat.Witec.R create mode 100644 R/read.spc.Kaiser.R create mode 100644 R/read.spc.R create mode 100644 R/read.spc.Shimadzu.R create mode 100644 R/read.spe.R create mode 100644 R/read.txt.Horiba.R create mode 100644 R/read.txt.Renishaw.R create mode 100644 R/read.txt.Shimadzu.R create mode 100644 R/read.txt.Witec.R create mode 100644 R/read.txt.long.R create mode 100644 R/read.txt.wide.R create mode 100644 R/regexps.R create mode 100644 R/spc.NA.approx.R create mode 100644 R/spc.bin.R create mode 100644 R/spc.fit.poly.R create mode 100644 R/spc.identify.R create mode 100644 R/spc.loess.R create mode 100644 R/spc.rubberband.R create mode 100644 R/spc.spline.R create mode 100644 R/split.string.R create mode 100644 R/splitdots.R create mode 100644 R/trellis.factor.key.R create mode 100644 R/units.R create mode 100644 R/unittest.R create mode 100644 R/validate.R create mode 100644 R/wc.R create mode 100644 R/wleval.R create mode 100644 R/write.txt.long.R create mode 100644 R/write.txt.wide.R create mode 100644 R/y-pastenames.R create mode 100644 tests/testthat/_snaps/attached/plot-voronoi.svg create mode 100644 tests/testthat/test-plotspc.r create mode 100644 tests/unittests.R diff --git a/R/DollarNames.R b/R/DollarNames.R new file mode 100644 index 00000000..fe1ba1d1 --- /dev/null +++ b/R/DollarNames.R @@ -0,0 +1,29 @@ +##' command line completion for $ +##' +##' @aliases .DollarNames .DollarNames,hyperSpec-method +##' @author C. Beleites +##' @seealso \code{\link[utils]{.DollarNames}} +##' @export +##' @rdname dollarnames +##' @keywords utilities +##' @title command line completion for $ +##' @param x the hyperSpecobject +##' @param pattern pattern to look for +##' @return the name of the extra data slot +##' @importFrom utils .DollarNames +.DollarNames.hyperSpec <- function (x, pattern = "") + grep (pattern, colnames (x@data), value = TRUE) + +.test (.DollarNames.hyperSpec) <- function(){ + context (".DollarNames") + + test_that("expansion on missing pattern", { + expect_equal(.DollarNames (flu), colnames (flu)) + }) + + test_that("expansion on missing pattern", { + expect_equal(.DollarNames (flu, "f"), "filename") + expect_equal(.DollarNames (flu, "c"), c ("spc", "c")) + }) + +} diff --git a/R/NEW-functions.R b/R/NEW-functions.R new file mode 100644 index 00000000..55c0f5a7 --- /dev/null +++ b/R/NEW-functions.R @@ -0,0 +1,78 @@ + +#' Future functions +#' +#' These functions will be introduced in \pkg{hyperSpec} v1.0 and will replace +#' some current functions. Now they appear here just for compatibility with +#' other packages, which should be released on CRAN. They are not intended to +#' be used by \pkg{hyperSpec} v0.100 users directly. +#' +#' @param ... Arguments to functions. +#' @param x,from,to,ref_wl Arguments to functions. +#' +#' @name Future-functions +NULL + + +#' @rdname Future-functions +#' @include fileio.optional.R +#' @export +.spc_io_postprocess_optional <- function(...) { + .fileio.optional(...) +} + + +#' @rdname Future-functions +#' @include wl.R +#' @export +wl_convert_units <- function(x, from, to, ref_wl = NULL) { + wlconv(points = x, src = from, dst = to, laser = ref_wl) +} + +#' @rdname Future-functions +#' @include options.R +#' @export +hy_set_options <- function(...) { + hy.setOption(...) +} + +#' @rdname Future-functions +#' @include options.R +#' @export +hy_get_option <- function(...) { + hy.getOption(...) +} + +#' @rdname Future-functions +#' @include options.R +#' @export +hy_set_options <- function(...) { + hy.getOptions(...) +} + +#' @rdname Future-functions +#' @include read.txt.long.R +#' @export +read_txt_long <- function(...) { + read.txt.long(...) +} + +#' @rdname Future-functions +#' @include read.txt.wide.R +#' @export +read_txt_wide <- function(...) { + read.txt.wide(...) +} + +#' @rdname Future-functions +#' @include wl.R +#' @export +.wl_fix_unit_name <- function(...) { + .fixunitname(...) +} + +#' @rdname Future-functions +#' @include chk.hy.R +#' @export +assert_hyperSpec <- function(...) { + chk.hy(...) +} diff --git a/R/call.list.R b/R/call.list.R new file mode 100644 index 00000000..06aa7066 --- /dev/null +++ b/R/call.list.R @@ -0,0 +1,16 @@ +###----------------------------------------------------------------------------- +### +### generate a list of function arguments for the calling function +### +##'@noRd +.call.list <- function (x = NULL) { + if (is.null (x)) + x <- sys.call (-1) + + if (length (x) < 3L) + I (list ()) + else { + x <- as.list (x [- (1 : 2)]) + I (x) + } +} diff --git a/R/chk.hy.R b/R/chk.hy.R new file mode 100644 index 00000000..c9de075c --- /dev/null +++ b/R/chk.hy.R @@ -0,0 +1,20 @@ +##' Check whether an object is a hyperSpec object and validate the object +##' +##' @title Validation of hyperSpec objects +##' @aliases validObject validObject,hyperSpec-method chk.hy +##' @author C. Beleites +##' @seealso \code{\link[methods]{validObject}} +##' @param object the object to check +##' @return \code{TRUE} if the check passes, otherwise stop with an +##' error. +##' @keywords methods +##' @export +##' @examples +##' chk.hy (chondro) +##' validObject (chondro) +chk.hy <- function (object){ + if (! is (object, "hyperSpec")) + stop ("no hyperSpec object") + + TRUE +} diff --git a/R/chondro.R b/R/chondro.R new file mode 100644 index 00000000..a9d595a3 --- /dev/null +++ b/R/chondro.R @@ -0,0 +1,51 @@ +.make.chondro <- function (){ + new ("hyperSpec", + spc = (tcrossprod (.chondro.scores, .chondro.loadings) + + rep (.chondro.center, each = nrow (.chondro.scores))), + wavelength = .chondro.wl, + data = .chondro.extra, labels = .chondro.labels) +} + +##' Raman spectra of 2 Chondrocytes in Cartilage +##' A Raman-map (laterally resolved Raman spectra) of chondrocytes in +##' cartilage. +##' +##' See the vignette \code{vignette ("chondro", package = "hyperSpec")}. +##' +##' @name chondro +##' @docType data +##' @format The data set has 875 Raman spectra measured on a 25 \eqn{\times}{x} +##' 35 grid with 1 micron step size. Spatial information is in +##' \code{chondro$x} and \code{chondro$y}. Each spectrum has 300 data points +##' in the range of ca. 600 - 1800 cm\eqn{^{-1}}{^-1}. +##' @author A. Bonifacio and C. Beleites +##' @keywords datasets +##' @references The raw data is available at \url{http://hyperspec.r-forge.r-project.org/blob/chondro.zip} +##' @export chondro +##' @examples +##' +##' +##' chondro +##' +##' ## do baseline correction +##' baselines <- spc.fit.poly.below (chondro) +##' chondro <- chondro - baselines +##' +##' ## area normalization +##' chondro <- chondro / colMeans (chondro) +##' +##' ## substact common composition +##' chondro <- chondro - quantile (chondro, 0.05) +##' +##' cols <- c ("dark blue", "orange", "#C02020") +##' plotmap (chondro, clusters ~ x * y, col.regions = cols) +##' +##' cluster.means <- aggregate (chondro, chondro$clusters, mean_pm_sd) +##' plot (cluster.means, stacked = ".aggregate", fill = ".aggregate", col = cols) +##' +##' ## plot nucleic acid bands +##' plotmap (chondro[, , c( 728, 782, 1098, 1240, 1482, 1577)], +##' col.regions = colorRampPalette (c ("white", "gold", "dark green"), space = "Lab") (20)) +##' +delayedAssign ("chondro", .make.chondro ()) + diff --git a/R/count_lines.R b/R/count_lines.R new file mode 100644 index 00000000..9da86ad1 --- /dev/null +++ b/R/count_lines.R @@ -0,0 +1,37 @@ +##' count lines (of an ASCII file) +##' +##' @param file the file name or connection +##' @param chunksize `file` is read in chunks of `chunksize` lines. +##' @return number of lines in file +##' @export +##' @md +##' @author C. Beleites +count_lines <- function(file, chunksize = 1e4) { + nlines <- 0 + + con <- file(file, open = "r") + on.exit(close (con)) + + while ((n <- length (readLines(con, n = chunksize))) > 0L) + nlines <- nlines + n + + nlines +} + +.test (count_lines) <- function (){ + context ("count_lines") + + tmpfile <- tempfile() + on.exit (unlink (tmpfile)) + + writeLines("blabla\nblubb", con = tmpfile) + + test_that("file read in one chunk", + expect_equal (count_lines (tmpfile), 2) + ) + + test_that("file read in more chunks", + expect_equal (count_lines (tmpfile, chunksize = 1L), 2) + ) + +} \ No newline at end of file diff --git a/R/cov.R b/R/cov.R new file mode 100644 index 00000000..525c42a4 --- /dev/null +++ b/R/cov.R @@ -0,0 +1,53 @@ +##' Covariance matrices for hyperSpec objects +##' +##' +##' @param x hyperSpec object +##' @param y not supported +##' @param use,method handed to \code{\link[stats]{cov}} +##' @return covariance matrix of size \code{nwl (x)} x \code{nwl (x)} +##' @seealso \code{\link[stats]{cov}} +##' @author C. Beleites +##' @rdname cov +##' @export +##' @examples +##' image (cov (chondro)) +setMethod ("cov", signature = signature (x = "hyperSpec", y = "missing"), function (x, y, use, method){ + validObject (x) + + cov (x@data$spc, use = use, method = method) +}) + + +##' @param ... ignored +##' @param regularize regularization of the covariance matrix. Set \code{0} to switch off +##' +##' \code{pooled.cov} calculates pooled covariance like e.g. in LDA. +##' @param groups factor indicating the groups +##' @rdname cov +##' @export +##' @examples +##' pcov <- pooled.cov (chondro, chondro$clusters) +##' plot (pcov$means) +##' image (pcov$COV) +##' +pooled.cov <- function (x, groups, ..., regularize = 1e-5 * max (abs (COV))){ + chk.hy (x) + validObject (x) + + if (! is.factor (groups)) + stop ("groups must be a factor") + + x <- x [! is.na (groups)] + groups <- groups [! is.na (groups)] + + means <- aggregate (x, groups, "mean") # TODO: speed up? + + COV <- cov (x@data$spc - means@data$spc [as.numeric (groups),, drop = FALSE]) + + ## regularization + COV <- COV + diag (regularize, nrow (COV)) + + list (COV = COV, + means = means) +} + diff --git a/R/deprecated.R b/R/deprecated.R new file mode 100644 index 00000000..a68ab8e5 --- /dev/null +++ b/R/deprecated.R @@ -0,0 +1,43 @@ + + +##' @rdname read.asc.Andor +##' @export +##' @keywords internal +scan.asc.Andor <- function (...) {.Deprecated("read.asc.Andor"); read.asc.Andor(...)} + +##' @rdname read.txt.Renishaw +##' @export +##' @keywords internal +scan.txt.Renishaw <- function (...) {.Deprecated("read.txt.Renishaw()"); read.txt.Renishaw(...)} + +##' @rdname read.txt.Renishaw +##' @export +##' @keywords internal +scan.zip.Renishaw <- function (...) {.Deprecated("read.(zip.Renishaw)"); read.zip.Renishaw(...)} + +##' @rdname read.txt.Witec +##' @export +##' @keywords internal +scan.txt.Witec <- function (...) {.Deprecated("read.txt.Witec()"); read.txt.Witec(...)} + +##' @rdname read.txt.Witec +##' @export +##' @keywords internal +scan.dat.Witec <- function (...) {.Deprecated("read.dat.Witec())"); read.dat.Witec(...)} + +##' @rdname read.txt.Witec +##' @export +##' @keywords internal +scan.txt.Witec.Graph <- function (...) {.Deprecated("read.txt.Witec.Graph()"); read.txt.Witec.Graph(...)} + + +#### DEFUNCT ################################################################################################## + +##' @export +##' @rdname read.mat.Cytospec +read.cytomat <- function (...){ + .Defunct ("read.mat.Cytospec", + package = "hyperSpec", + msg = "read.mat.Cytospec is now defunct.\nPlease use read.mat.Cytospec instead.") +} + diff --git a/R/factor2num.R b/R/factor2num.R new file mode 100644 index 00000000..45fea789 --- /dev/null +++ b/R/factor2num.R @@ -0,0 +1,10 @@ +###----------------------------------------------------------------------------- +### +### factor2num - conversion of a factor containing numerical levels +### +### +##TODO: export + +factor2num <- function (f) + as.numeric(levels (f)) [as.numeric (f)] + diff --git a/R/fileio.optional.R b/R/fileio.optional.R new file mode 100644 index 00000000..e0af22ee --- /dev/null +++ b/R/fileio.optional.R @@ -0,0 +1,90 @@ +#' Helper function to harmonize treatment of file import results +#' +#' This function provides two ways of post-processing imported spectra: +#' +#' - optionally remove empty spectra (some spectrograph software will produce +#' empty spectra when measurements are cancelled) +#' - optionally keep the filenames in column `spc$filename` +#' +#' The desired overall behavior can be set by options via [hy.setOptions()]. All +#' file import filters should call `.fileio.optional()` to ensure the same +#' behavior. +#' +#' @param spc hyperSpec object for file import post-processing +#' @param filename filename(s) to become extra data column of `spc` +#' @param ... (ignored) +#' @param file.remove.emptyspc should empty (all `NA` or all `0`) spectra be +#' removed? +#' @param file.keep.name should file names be kept and put into `spc$filename`? +#' @param tolerance intensities in +/- `tolerance` are considered `0` for +#' `file.remove.emptyspc = TRUE` +#' @keywords internal +#' @return hyperSpec object +#' @export +.fileio.optional <- function(spc, filename, ..., + file.remove.emptyspc = hy.getOption("file.remove.emptyspc"), + file.keep.name = hy.getOption("file.keep.name"), + tolerance = hy.getOption("tolerance")) { + tolerance <- .checkpos(tolerance, "tolerance") + + if (file.remove.emptyspc) { + ## number of NAs in each spectrum + nas <- rowSums (is.na (spc)) + + ## number of zero-values in each spectrum + zeros <- rowSums (abs (spc) < tolerance, na.rm = TRUE) + + spc <- spc[nas + zeros < nwl(spc)] + } + + if (file.keep.name & nrow (spc) > 0L){ + if (is.null (spc@data$filename)){ + if (is (filename, "connection")) + filename <- summary(filename)$description + + spc@data$filename <- filename + spc@label$filename <- "filename" + } else { + warning ("$filename already exists. => Skipping file.keep.name") + } + } + + spc +} + +##' @include unittest.R +.test (.fileio.optional) <- function (){ + context (".fileio.optional") + + test_that ("removing of zero/NA spectra", { + tmp <- fluNA # spectrum 2 is all NA + tmp [[3]] <- 0 + tmp [[5]] <- runif (nwl(tmp), min = - hy.getOption("tolerance") / 2, max = hy.getOption("tolerance") / 2) + tmp [[,, 450~455]] <- NA + + expect_equivalent (.fileio.optional (tmp, file.remove.emptyspc = TRUE, file.keep.name = FALSE), + tmp [- c(2, 3, 5)]) + }) + + test_that ("filenames", { + flu$filename <- NULL + tmp <- .fileio.optional (flu, filename = "test", file.remove.emptyspc = FALSE, file.keep.name = TRUE) + expect_true (all (tmp$filename == "test")) + + expect_warning(tmp <- .fileio.optional (tmp, filename = "test2", + file.remove.emptyspc = FALSE, file.keep.name = TRUE) + ) + expect_true (all (tmp$filename == "test")) + }) + + + options.state <- .options + on.exit(do.call (hy.setOptions, options.state)) + + test_that("option treatment", { + hy.setOptions(file.remove.emptyspc = FALSE) + skip ("TODO: implement") + do.call (hy.setOptions, options.state) + }) +} + diff --git a/R/fix_spc_colnames.R b/R/fix_spc_colnames.R new file mode 100644 index 00000000..1a5ff301 --- /dev/null +++ b/R/fix_spc_colnames.R @@ -0,0 +1,24 @@ +#' Ensure that the spectra matrix has the wavelengths in column names +#' +#' @param spc hyperSpec object +#' +#' @return hyperSpec object with wavelengths in column names of `$spc` +#' @md +#' @export +.fix_spc_colnames <- function(spc){ + colnames (spc@data$spc) <- signif (spc@wavelength, digits = 6) + + spc +} + +.test (.fix_spc_colnames) <- function(){ + context(".fix_spc_colnames") + + test_that("colnames get fixed", { + tmp <- flu + colnames(tmp@data$spc) <- NULL + + tmp <- .fix_spc_colnames(tmp) + expect_equal(colnames (tmp@data$spc), as.character (wl (tmp))) + }) +} \ No newline at end of file diff --git a/R/getbynames.R b/R/getbynames.R new file mode 100644 index 00000000..a7d7ff4b --- /dev/null +++ b/R/getbynames.R @@ -0,0 +1,17 @@ +###----------------------------------------------------------------------------- +### +### getbynames - get list elements by name and if no such element exists, NA +### +### + +getbynames <- function (x, e) { + x <- x [e] + if (length (x) > 0) { + if (is.character (e)) + names (x) <- e + x [sapply (x, is.null)] <- NA + x + } else { + list () + } +} diff --git a/R/guesswavelength.R b/R/guesswavelength.R new file mode 100644 index 00000000..115fcef3 --- /dev/null +++ b/R/guesswavelength.R @@ -0,0 +1,51 @@ +#' guess wavelengths from character vector +#' +#' character vectors used for names (e.g. colnames for matrices or data.frames) +#' are often treated by \code{\link[base]{make.names}} or similar functions that +#' produce suitable names (e.g. by prepending "X" to numbers.). Such names +#' cannot be directly converted to numeric. +#' +#' \code{guess.wavlength} tries to extract numbers from X which may be +#' surrounded by such "protecting" characters. +#' +#' @param X character with numbers hidden inside +#' +#' @return numeric +#' @export +#' +#' @examples +#' tmp <- data.frame(flu [[,, 400 ~ 410]]) +#' (wl <- colnames (tmp)) +#' guess.wavelength (wl) +guess.wavelength <- function (X) { + wl <- regmatches (X, regexpr(.PATTERN.number, X)) + wl <- as.numeric (wl) + + if (is.null (wl) || length (wl) == 0L || any (is.na (wl))){ + if (hy.getOption("debuglevel") >= 1L) + message("could not guess wavelengths") + wl <- NULL + } + + wl +} + +##' @include regexps.R +##' @include options.R +.test (guess.wavelength) <- function (){ + context ("guess.wavelength") + + test_that("simple test", { + expect_equal (guess.wavelength(1:5), 1:5) + }) + + test_that("wavelengths containing characters", { + wl <- seq(600,602,length.out = 11) + expect_equal (guess.wavelength(make.names(wl)), wl) + }) + + test_that("return NULL if could not guess wavelenths", { + expect_equal (guess.wavelength(colnames(matrix(1:12,3))), NULL) + expect_equal (guess.wavelength(letters[1:4]), NULL) + }) +} \ No newline at end of file diff --git a/R/makeraster.R b/R/makeraster.R new file mode 100644 index 00000000..3fb05929 --- /dev/null +++ b/R/makeraster.R @@ -0,0 +1,135 @@ +##' find an evenly spaced grid for x +##' +##' \code{makeraster} fits the data to the specified raster. +##' +##' \code{fitraster} tries different raster parameter and returns the raster that covers most of the +##' \code{x} values: The differences between the values of \code{x} are calculated (possible step +##' sizes). For each of those step sizes, different points are tried (until all points have been +##' covered by a raster) and the parameter combination leading to the best coverage (i.e. most points +##' on the grid) ist used. +##' +##' Note that only differences between the sorted values of x are considered as step size. +##' @title makeraster +##' @param x numeric to be fitted with a raster +##' @param startx starting point ("origin") for calculation of the raster +##' @param d step size of the raster +##' @param tol tolerance for rounding to new levels: elements of x within \code{tol} of the distance between the levels of the new grid are rounded to the new grid point. +##' @param newlevels levels of the raster +##' @return list with elements +##' \item{x}{the values of \code{x}, possibly rounded to the raster values} +##' \item{levels}{the values of the raster} +##' @export +##' @author Claudia Beleites +##' @examples +##' x <- c (sample (1:20, 10), (0 : 5) + 0.5) +##' raster <- makeraster (x, x [1], 2) +##' raster +##' plot (x) +##' abline (h = raster$levels, col = "#00000040") +##' +##' ## unoccupied levels +##' missing <- setdiff (raster$levels, raster$x) +##' abline (h = missing, col = "red") +##' +##' ## points acutally on the raster +##' onraster <- raster$x %in% raster$levels +##' points (which (onraster), raster$x [onraster], col = "blue", pch = 20) +##' +##' @importFrom utils tail +makeraster <- function (x, startx, d, newlevels, tol = 0.1){ + + if (missing (newlevels)) + ## make sure to cover the whole data range + 1 point + newlevels <- c (rev (seq (startx, min (x, na.rm = TRUE) - d, by = -d) [-1]), + seq (startx, max (x, na.rm = TRUE) + d, by = d) + ) + + inew <- approx (newlevels, seq_along (newlevels), x)$y + + ## rounding + rinew <- round (inew) + wholenum <- abs (inew - rinew) < tol + + xnew <- x + xnew [wholenum] <- newlevels [rinew [wholenum]] + + + list (x = xnew, + + ## usually: drop outside levels 1 and length (newlevels) + levels = newlevels [min (rinew [wholenum]) : max (rinew [wholenum])] + ) + +} + +##' @rdname makeraster +##' @export +##' @examples +##' +##' raster <- fitraster (x) +##' raster +##' plot (x) +##' abline (h = raster$levels, col = "#00000040") +##' +##' ## unoccupied levels +##' missing <- setdiff (raster$levels, raster$x) +##' abline (h = missing, col = "red") +##' +##' ## points acutally on the raster +##' onraster <- raster$x %in% raster$levels +##' points (which (onraster), raster$x [onraster], col = "blue", pch = 20) +##' +##' x <- c (sample (1:20, 10), (0 : 5) + 0.45) +##' raster <- fitraster (x) +##' raster +##' plot (x) +##' abline (h = raster$levels, col = "#00000040") +##' +##' ## unoccupied levels +##' missing <- setdiff (raster$levels, raster$x) +##' abline (h = missing, col = "red") +##' +##' ## points acutally on the raster +##' onraster <- raster$x %in% raster$levels +##' points (which (onraster), raster$x [onraster], col = "blue", pch = 20) +##' +fitraster <- function (x, tol = 0.1){ + levels <- sort (unique (x)) + + if (length (levels) == 1L) + return (list (x = x, levels = levels)) + + dx <- sort (unique (diff (levels))) + + ## reduce by rounding? + dx <- c (dx [! diff (dx) < tol], tail (dx, 1)) + + dx <- rev (dx) + + max.covered <- 0 + + for (d in dx){ + totry <- order (x) + while (length (totry) > 0L){ + ## cat ("totry: ", totry, "\n") + startx <- x [totry [1]] + ## cat ("startx: ", startx, "\n") + + ## cat ("fit: ", c (startx, d), "\n") + raster <- makeraster (x, startx, d, tol = tol) + tmp <- sum (raster$x %in% raster$levels, na.rm = TRUE) + ## cat (" ", tmp, "\n") + if (tmp > max.covered) { + max.covered <- tmp + fit <- raster + } + + if (max.covered == length (x)) + break + + totry <- totry [! raster$x [totry] %in% raster$levels] + } + } + + fit +} diff --git a/R/map.identify.R b/R/map.identify.R new file mode 100644 index 00000000..d6fa48b7 --- /dev/null +++ b/R/map.identify.R @@ -0,0 +1,80 @@ +##' @aliases levelplot,hyperSpec,missing-method +##' @include plotmap.R +##' @rdname levelplot +##' @export +##' @seealso \code{\link[hyperSpec:options]{hyperSpec options}} \code{\link{spc.identify}} +##' \code{\link{map.sel.poly}} +##' @param tol tolerance for \code{map.identify} as fraction of the viewport +##' (i.e. in "npc" \link[grid]{unit}s) +##' @param warn should a warning be issued if no point is within the specified +##' tolerance? See also details. +##' @importFrom grid convertX convertY grid.locator grid.circle gpar +##' @importFrom lattice trellis.focus ltext +##' @importFrom utils modifyList +map.identify <- function (object, model = spc ~ x * y, voronoi = FALSE, ..., + tol = .02, warn = TRUE){ + + if (! interactive ()) + stop ("map.identify works only on interactive graphics devices.") + + chk.hy (object) + validObject (object) + + dots <- modifyList (list (object = object, model = model, ...), + list (subscripts = TRUE)) + + if (voronoi) { dots <- modifyList (list (col = "black", border = "#00000080"), dots) + + ## we need to mix the spectra, otherwise the voronoi plot does not work with + ## complete rectangular maps. mix keeps track of the reordering. + dots$mix <- FALSE + mix <- sample (nrow (object)) + dots$object <- object [mix] + lattice <- do.call (plotvoronoi, dots) + mix <- order (mix) + } else { + lattice <- do.call (plotmap, dots) + mix <- row.seq (object) + } + + print (lattice) + trellis.focus () + + tol = tol^2 + xn <- lattice$panel.args.common$x [mix] + yn <- lattice$panel.args.common$y [mix] + x = as.numeric (convertX (unit (xn, "native"), "npc")) + y = as.numeric (convertY (unit (yn, "native"), "npc")) + + debuglevel <- hy.getOption ("debuglevel") + + res <- numeric (0) + repeat { + tmp <- grid.locator (unit = "npc") + if (debuglevel == 2L) + grid.circle (tmp[1], tmp[2], sqrt (tol), gp = gpar (col = "red")) + + if (is.null (tmp)) + break + + tmp <- as.numeric (tmp) + d2 <- (x - tmp [1])^2 + (y - tmp [2])^2 + pt <- which.min (d2) + if (d2 [pt] <= tol) { + res <- c (res, pt) + if (debuglevel >= 1L) + ltext (xn [pt], yn [pt], label = pt) + } else { + if (warn) { + warning ("No point within tolerance (", tol, " = ", + convertX (unit (sqrt (tol), "npc"), "native")," x-units or", + convertY (unit (sqrt (tol), "npc"), "native")," y-units).") + if (debuglevel == 1L) + grid.circle (tmp[1], tmp[2], sqrt (tol), gp = gpar (col = "red")) + } + } + } + + res +} + diff --git a/R/map.sel.poly.R b/R/map.sel.poly.R new file mode 100644 index 00000000..70271cfa --- /dev/null +++ b/R/map.sel.poly.R @@ -0,0 +1,126 @@ +##' Interactively select a polygon (grid graphics) and highlight points +##' +##' Click the points that should be connected as polygon. Input ends with right click (see +##' \code{\link[grid]{grid.locator}}). Polygon will be drawn closed. +##' +##' \code{map.sel.poly} is a convenience wrapper for \code{\link{plotmap}}, \code{sel.poly}, +##' and \code{\link[sp]{point.in.polygon}}. For custiomized plotting, the plot can be produced by +##' \code{\link{plotmap}}, \code{\link{plotvoronoi}} or \code{\link{levelplot}}, and the result of +##' that plot command handed over to \code{map.sel.poly}, see the example below. +##' +##' If even more customized plotting is required,\code{sel.poly} should be used (see example). +##' +##' @param data hyperSpec object for plotting map or list returned by \code{\link{plotmap}} +##' @param pch symbol to display the points of the polygon for \code{\link{sel.poly}} +##' @param size size for polygon point symbol for \code{\link{sel.poly}} +##' @param ... further arguments for \code{\link[grid]{grid.points}} and +##' \code{\link[grid]{grid.lines}} +##' @return \code{map.sel.poly}: array of indices for points within the selected polygon +##' @author Claudia Beleites, Sebastian Mellor +##' @seealso \code{\link[grid]{grid.locator}}, \code{\link{map.identify}} +##' @export +##' @rdname map-sel-poly +##' @keywords iplot +##' @examples +##' if (interactive ()){ +##' ## convenience wrapper +##' map.sel.poly (chondro) +##' +##' ## customized version +##' data <- sample (chondro [,, 1004 - 2i ~ 1004 + 2i], 300) +##' +##' plotdata <- plotvoronoi (data, clusters ~ y * x, col.regions = alois.palette ()) +##' print (plotdata) +##' map.sel.poly (plotdata) +##' +##' ## even more customization: +##' plotvoronoi (data) +##' +##' ## interactively retrieve polygon +##' polygon <- sel.poly () +##' +##' ## find data points within polygon +##' require ("sp") +##' i.sel <- which (point.in.polygon (data$x, data$y, polygon [, 1], polygon [, 2]) > 0) +##' +##' ## work with selected points +##' grid.points (unit (data$x [i.sel], "native"), unit (data$y [i.sel], "native")) +##' } +map.sel.poly <- function (data, pch = 19, size = 0.3, ...){ + + if (! interactive ()) + stop ("map.sel.poly works only on interactive graphics devices.") + + ## sp is only in Suggests, not a strict Dependency. + if (! requireNamespace ("sp")) + stop ("package sp required for point.in.polygon ()") + + if (is (data, "hyperSpec")) { + ## plot hyperSpec object + print (plotmap (data)) + x <- data$x + y <- data$y + } else if (is (data, "trellis")) { + + ## data is list with plotting data of hyperSpec object + x <- data$panel.args.common$x + y <- data$panel.args.common$y + } else { + stop ("data must either be a hyperSpec object or a trellis object as returned by plotmap, plotvoronoi, or levelplot") + } + + poly <- sel.poly (pch = pch, size = size, ...) + + pts <- sp::point.in.polygon (x, y, poly [, 1], poly [, 2]) + + ind <- pts > 0 + + if (! any (ind)) + warning ("Empty selection: no point in polygon.") + + ind +} + + + +##' @return \code{sel.poly}: n x 2 matrix with the corner points of the polygon +##' @author Claudia Beleites +##' @seealso \code{\link[grid]{grid.locator}} +##' @export +##' @keywords iplot +##' @rdname map-sel-poly +##' @importFrom grid grid.lines grid.points +##' @importFrom utils tail +sel.poly <- function (pch = 19, size = 0.3, ...){ + if (! interactive ()) + stop ("sel.poly works only on interactive graphics devices.") + + trellis.focus () + + pts <- matrix (NA, nrow = 0, ncol = 2) + + repeat { + pt <- grid.locator (unit="native") + if (!is.null (pt)){ + pts <- rbind (pts, as.numeric (pt)) # comparably few executions: low performance doesn't matter + + ## display the clicked point + grid.points (unit (tail (pts [, 1], 1), "native"), + unit (tail (pts [, 2], 1), "native"), pch = pch, + size = unit (size, "char"), gp = gpar (...)) + + ## connect last 2 points by line + if (nrow (pts) > 1L) + grid.lines (unit (tail (pts [, 1L], 2L) , "native"), + unit (tail (pts [, 2L], 2L) , "native"), gp = gpar (...)) + } else { + ## visually close polygon (if at least 3 pts) + if (nrow (pts) > 2L) + grid.lines (unit (c (tail (pts [, 1L], 1L), pts [1L, 1L]), "native"), + unit (c (tail (pts [, 2L], 1L), pts [1L, 2L]), "native"), gp = gpar (...)) + break + } + } + + pts +} diff --git a/R/mark.dendrogram.R b/R/mark.dendrogram.R new file mode 100644 index 00000000..1615ec6e --- /dev/null +++ b/R/mark.dendrogram.R @@ -0,0 +1,73 @@ +##' Groups are marked by colored rectangles as well as by their levels. +##' +##' The dendrogram should be plotted separately, see the example. +##' @title Mark groups in \code{\link[stats]{hclust}} dendrograms +##' @param dendrogram the dendrogram +##' @param groups factor giving the the groups to mark +##' @param col vector with colors for each group +##' @param pos.marker top of the marker rectangle +##' @param height height of the marker rectangle +##' @param pos.text position of the text label +##' @param border see \code{\link[graphics]{text}} +##' @param text.col color (vector) giving the color for the text markers +##' @param label side label see example +##' @param label.right should the side labels be at the right side? +##' @param ... handed to \code{\link[graphics]{rect}} and \code{\link[graphics]{text}} +##' @author Claudia Beleites +##' @export +##' @rdname mark.dendrogram +##' @examples +##' +##' dend <- hclust (pearson.dist (laser[[]])) +##' par (xpd = TRUE, mar = c (5.1, 4, 4, 3)) # allows plotting into the margin +##' plot (dend, hang = -1, labels = FALSE) +##' +##' ## mark clusters +##' clusters <- as.factor (cutree (dend, k = 4)) +##' levels (clusters) <- LETTERS [1 : 4] +##' mark.dendrogram (dend, clusters, label = "cluster") +##' +##' ## mark independent factor +##' mark.dendrogram (dend, as.factor (laser [,,405.36] > 11000), +##' pos.marker = -0.02, pos.text = - 0.03) +##' +##' ## mark continuous variable: convert it to a factor and omit labels +##' mark.dendrogram (dend, cut (laser [[,, 405.36]], 100), alois.palette (100), +##' pos.marker = -.015, text.col = NA, +##' label = expression (I [lambda == 405.36~nm]), label.right = FALSE) +##' +##' @importFrom utils head tail +mark.dendrogram <- function (dendrogram, groups, col = seq_along (unique (groups)), + pos.marker = 0, + height = 0.025 * max (dendrogram$height), + pos.text = -2.5 * height, + border = NA, text.col = "black", label, label.right = TRUE, + ...){ + if (! is.factor (groups)) + groups <- as.factor (groups) + + groups.x <- groups [dendrogram$order] # clusters in order on x axis + + rle.groups <- rle (as.integer (groups.x)) # run-length encoding gives borders + + end <- cumsum (rle.groups$lengths) + 0.5 + start <- c (0.5, (head (end, -1))) + text <- (start + end) / 2 + + text.col <- rep (text.col, length.out = length (text)) + + for (g in seq_along (rle.groups$lengths)){ + rect (xleft = start [g], ybottom = pos.marker - height, + xright = end [g], ytop = pos.marker, + col = col [rle.groups$values[g]], border = border, ...) + if (! is.na (text.col [g])) + text (x = text [g], y = pos.text, + levels (groups) [rle.groups$values [g]], + col = text.col [rle.groups$values [g]], ...) + } + + if (! missing (label)) + text (x = label.right * tail (end, 1) * 1.01, y = pos.marker - height/2, + label, adj = c(1 - label.right, .5)) + +} diff --git a/R/mark.peak.R b/R/mark.peak.R new file mode 100644 index 00000000..a9937470 --- /dev/null +++ b/R/mark.peak.R @@ -0,0 +1,24 @@ +##' Mark peak +##' +##' Marks location of the \emph{first} spectrum at the data point closest to the +##' specified position on the current plot. +##' +##' @param spc the \code{hyperSpec} object +##' @param xpos position of the peak(s) in current x-axis units +##' @param col color of the markers and text +##' +##' +##' @author R. Kiselev +##' @export +##' @examples +##' plot (chondro [7]) +##' markpeak (chondro [7], 1662) +markpeak <- function(spc, xpos, col="red"){ + + chk.hy (spc) + validObject (spc) + + plot(spc[1,,xpos], add=T, lines.args=list(type="p"), col=col) + text(x=xpos, y=spc[[1,,xpos]], col=col, labels=sprintf("<- %.1f", xpos), + adj=c(-0.2,0.37), srt=90, cex=0.75) +} diff --git a/R/matlab.palette.R b/R/matlab.palette.R new file mode 100644 index 00000000..c6770491 --- /dev/null +++ b/R/matlab.palette.R @@ -0,0 +1,49 @@ +##' Matlab-like Palettes +##' Two palettes going from blue over green to red, approximately as the +##' standard palette of Matlab does. The second one has darker green values and +##' is better suited for plotting lines on white background. +##' +##' +##' @rdname palettes +##' @aliases matlab.palette +##' @param n the number of colors to be in the palette. +##' @param ... further arguments are handed to \code{\link[grDevices]{rainbow}} +##' (\code{alois.palette}: \code{\link[grDevices]{colorRampPalette}}) +##' @return A vector containing the color values in the form "#rrbbggaa". +##' @author C. Beleites and A. Bonifacio +##' @seealso \code{\link[grDevices]{rainbow}} +##' @export +##' @importFrom grDevices rainbow +##' @keywords color +##' @examples +##' +##' plotmap (chondro [,, 778], col.regions = matlab.palette ()) +##' +matlab.palette <- function (n = 100, ...) { + rev (rainbow (n, start = 0, end = 4/6, ...)) +} + +##' @rdname palettes +##' @aliases matlab.dark.palette +##' @export +##' @examples +##' +##' plot (flu, col = matlab.dark.palette (nrow (flu))) +##' @importFrom grDevices col2rgb rgb +matlab.dark.palette <- function (n = 100, ...) { + pal <- rev (rainbow (n, start = 0, end = 4/6, ...)) + pal <- col2rgb(pal) + pal ["green",] <- pal ["green",] / 2 + + rgb (t (pal)/255) +} + +##' @rdname palettes +##' @export +##' @examples +##' +##' plotmap (chondro, col = alois.palette) +##' @importFrom grDevices colorRampPalette +alois.palette <- function (n = 100, ...) { + colorRampPalette(c("black", "blue","cyan", "green", "yellow", "red"), ...) (n) +} diff --git a/R/mvtnorm.R b/R/mvtnorm.R new file mode 100644 index 00000000..2700bd8c --- /dev/null +++ b/R/mvtnorm.R @@ -0,0 +1,96 @@ +.rmmvnorm <- function (n, mean, sigma) { + + if (! requireNamespace("mvtnorm")) + stop ("package 'mvtnorm' needed to generate multivariate normal random data.") + + .group <- rep.int (seq_along (n), n) + + ## make indices so that pooled or individual covariance matrices can be used. + if (length (dim (sigma)) == 3L) + isigma <- seq_len (dim (sigma) [3]) + else { + isigma <- rep (1L, nrow (mean)) + dim (sigma) <- c (dim (sigma), 1L) + } + + tmp <- matrix (NA_real_, sum (n), ncol (mean)) + for (i in seq_along (n)) + tmp [.group == i,] <- mvtnorm::rmvnorm (n [i], mean [i,], sigma [,, isigma [i]]) + + attr (tmp, "group") <- .group + + tmp +} + +##' @export +##' @name rmmvnorm +setGeneric ("rmmvnorm", .rmmvnorm) + + +##' Multivariate normal random numbers +##' +##' Interface functions to use \code{\link[mvtnorm]{rmvnorm}} for +##' \code{\link[hyperSpec]{hyperSpec-class}} objects. +##' +##' The \code{mvtnorm} method for hyperSpec objects supports producing multivariate normal data for +##' groups with different mean but common covariance matrix, see the examples. +##' +##' @param n vector giving the numer of cases to generate for each group +##' @param mean matrix with mean cases in rows +##' @param sigma common covariance matrix or array (\code{ncol (mean)} x \code{ncol (mean)} x \code{nrow (mean)}) with individual covariance matrices for the groups. +##' @export +##' @seealso \code{\link[mvtnorm]{rmvnorm}} +##' +##' \code{\link[hyperSpec]{cov}} and \code{\link[hyperSpec]{pooled.cov}} about calculating covariance of hyperSpec objects. +##' @rdname rmmvnorm +##' @aliases rmmvnorm rmmvnorm,hyperSpec-method +##' @docType methods +##' @examples +##' ## multiple groups, common covariance matrix +##' +##' if (require ("mvtnorm")){ +##' pcov <- pooled.cov (chondro, chondro$clusters) +##' rnd <- rmmvnorm (rep (10, 3), mean = pcov$mean, sigma = pcov$COV) +##' plot (rnd, col = rnd$.group) +##' } + +setMethod ("rmmvnorm", signature (n = "numeric", mean = "hyperSpec", sigma = "matrix"), + function (n, mean, sigma){ + tmp <- .rmmvnorm (n, mean@data$spc, sigma) + + data <- mean [attr (tmp, "group"),, drop = FALSE] + if (hy.getOption ("gc")) gc () + data@data$spc <- tmp + if (hy.getOption ("gc")) gc () + data$.group <- attr (tmp, "group") + if (hy.getOption ("gc")) gc () + data + }) + +##' @rdname rmmvnorm +##' @export +setMethod ("rmmvnorm", signature (n = "numeric", mean = "hyperSpec", sigma = "array"), + function (n, mean, sigma){ + tmp <- .rmmvnorm (n, mean@data$spc, sigma) + + data <- mean [attr (tmp, "group"),, drop = FALSE] + if (hy.getOption ("gc")) gc () + data@data$spc <- tmp + if (hy.getOption ("gc")) gc () + data$.group <- attr (tmp, "group") + if (hy.getOption ("gc")) gc () + data + }) + +##' @rdname rmmvnorm +##' @export +setMethod ("rmmvnorm", signature (n = "numeric", mean = "matrix", sigma = "matrix"), + .rmmvnorm) + +##' @rdname rmmvnorm +##' @export +setMethod ("rmmvnorm", signature (n = "numeric", mean = "matrix", sigma = "array"), + .rmmvnorm) + +## produces matrices instead of hyperSpec objects. +## mapply (rmvnorm, n = 1:3, mean = pcov$mean, MoreArgs= list (sigma = pcov$COV), SIMPLIFY = FALSE)) diff --git a/R/normalize01.R b/R/normalize01.R new file mode 100644 index 00000000..f1299386 --- /dev/null +++ b/R/normalize01.R @@ -0,0 +1,90 @@ +##' Normalize numbers -> [0, 1] +##' +##' The input \code{x} is mapped to [0, 1] by subtracting the minimum and subsequently dividing by +##' the maximum. If all elements of \code{x} are equal, 1 is returned. +##' +##' @title normalization for mixed colors +##' @name normalize01 +##' @param x vector with values to transform +##' @param tolerance tolerance level for determining what is 0 and 1 +##' @param ... additional parameters such as \code{tolerance} handed down. +##' @return vector with \code{x} values mapped to the interval [0, 1] +##' @author C. Beleites +##' @seealso \code{\link[hyperSpec]{wl.eval}}, \code{\link[hyperSpec]{vanderMonde}} +##' @export +setGeneric ("normalize01", function (x, ...) standardGeneric ("normalize01")) + +##' @export +##' @rdname normalize01 +setMethod (normalize01, signature (x = "matrix"), + function (x, tolerance = hy.getOption ("tolerance")){ + m <- apply (x, 1, min) + x <- sweep (x, 1, m, `-`) + m <- apply (x, 1, max) + x <- sweep (x, 1, m, `/`) + x [m < tolerance, ] <- 1 + x +}) + +##' @export +##' @rdname normalize01 +setMethod ("normalize01", signature (x = "numeric"), function (x, tolerance = hy.getOption ("tolerance")){ + x <- x - min (x) + + m <- max (x) + if (m < tolerance) + rep (1, length (x)) + else + x / m +}) + +##' @export +##' @rdname normalize01 +setMethod (normalize01, signature (x = "hyperSpec"), function (x, ...){ + validObject (x) + + x@data$spc <- normalize01 (unclass (x@data$spc), ...) + + ## logbook + x +}) + +##' @include unittest.R +.test (normalize01) <- function (){ + context ("normalize01") + + test_that("random numbers", { + x <- runif (10, min = -1e3, max = 1e3) + tmp.x <- normalize01 (x) + + expect_equivalent (min (normalize01 (x)), 0) + expect_equivalent (max (normalize01 (x)), 1) + + expect_equivalent (normalize01 (x), (x - min (x)) / diff (range (x))) + }) + + test_that("0, 1, constant", { + expect_equivalent (normalize01 (1), 1) + expect_equivalent (normalize01 (0), 1) + expect_equivalent (normalize01 (5), 1) + expect_equivalent (normalize01 (rep (5, 3L)), rep (1, 3L)) + }) + + + test_that("matrix method", { + m <- matrix (runif (12), 3) + m [3, ] <- 7 + + tmp.m <- normalize01 (m) + + expect_equal (apply (tmp.m, 1, max), c (1, 1, 1)) + expect_equal (apply (tmp.m, 1, min), c (0, 0, 1)) + }) + + test_that("hyperSpec method", { + tmp.hy <- normalize01 (-vanderMonde (flu, 1)) + + expect_equal (apply (tmp.hy [[]], 1, min), 1 : 0) + expect_equal (apply (tmp.hy [[]], 1, max), c (1, 1)) + }) +} diff --git a/R/options.R b/R/options.R new file mode 100644 index 00000000..0121db8c --- /dev/null +++ b/R/options.R @@ -0,0 +1,174 @@ + + +.options <- list (debuglevel = 0L, + gc = FALSE, + file.remove.emptyspc = TRUE, + file.keep.name = TRUE, + tolerance = sqrt (.Machine$double.eps), + wl.tolerance = sqrt (.Machine$double.eps), + plot.spc.nmax = 25, + ggplot.spc.nmax = 10 + ) + +##' Options for package hyperSpec +##' Functions to access and set hyperSpec's options. +##' +##' Currently, the following options are defined: +##' \tabular{llll}{ +##' \bold{Name} \tab \bold{Default Value (range)} \tab \bold{Description} \tab \bold{Used by}\cr +##' debuglevel \tab 0 (1L 2L 3L) \tab amount of debugging information produced \tab \code{\link{spc.identify}} \code{\link{map.identify}}\cr +##' \tab \tab \tab various file import functions\cr +##' \tab \tab \tab \code{\link{spc.fit.poly.below}}\cr +##' gc \tab FALSE \tab triggers frequent calling of gc () \tab \code{\link{read.ENVI}}, \code{new ("hyperSpec")}\cr +##' file.remove.emptyspc \tab TRUE \tab remove empty spectra directly on file import \tab various file import functions\cr +##' file.keep.name \tab TRUE \tab always create filename column \tab various file import functions\cr +##' tolerance \tab \code{sqrt (.Machine$double.eps)} \tab tolerance for numerical comparisons \tab \code{\link{normalize01}}, file import: \code{file.remove.emptyspc}\cr +##' wl.tolerance \tab \code{sqrt (.Machine$double.eps)} \tab tolerance for comparisons of the wavelength axis \tab \code{\link{all.equal}}, \code{\link{collapse}}, \code{\link{rbind}}\cr +##' plot.spc.nmax \tab 25 \tab number of spectra to be plotted by default \tab \code{\link{plotspc}}\cr +##' ggplot.spc.nmax \tab 10 \tab \tab \code{\link{qplotspc}}\cr +##' } +##' +##' \code{hy.setOptions} will discard any values that were given without a +##' name. +##' +##' @rdname options +##' @param ... \code{hy.setOptions}: pairs of argument names and values. +##' +##' \code{hy.getOptions}: indices (or names) of the options. +##' @return +##' \tabular{ll}{ +##' \code{hy.getOptions} \tab returns a list of all options\cr +##' \code{hy.setOptions} \tab invisibly returns a list with the options \cr +##' \code{hy.getOption} \tab returns the value of the requested option \cr +##' } +##' @author C. Beleites +##' @keywords misc +##' @export +##' @examples +##' +##' hy.getOptions () +##' +hy.getOptions <- function (...){ + dots <- c (...) + if (length (dots) == 0L) + .options + else + .options [dots] +} + +##' @include unittest.R +.test (hy.getOptions) <- function (){ + context("hy.getOptions") + + test_that("proper return", { + hy.opts <- get (".options", asNamespace("hyperSpec")) + expect_equal (hy.getOptions (), hy.opts) + + expect_equal (hy.getOptions ("debuglevel"), + hy.opts["debuglevel"]) + + .options <- list () + expect_equal (hy.getOptions (), hy.opts) + }) +} + +##' @rdname options +##' @export +##' @param name the name of the option +hy.getOption <- function (name){ + .options [[name]] +} + +##' @rdname options +##' @export +##' @importFrom utils modifyList +hy.setOptions <- function (...){ + new <- list (...) + + ## if called with list in 1st argument, use that list + if (length (new) == 1 && is.list (new [[1]])) + new <- new [[1]] + + names <- nzchar (names (new)) + + if (! all (names) || length (names) != length (new)) + warning ("options without name are discarded: ", which (! names)) + + opts <- modifyList (.options, new [names]) + + opts$tolerance <- .checkpos (opts$tolerance, "tolerance") + opts$wl.tolerance <- .checkpos (opts$wl.tolerance, "wl.tolerance") + + assign(".options", opts, envir = asNamespace ("hyperSpec")) + + invisible (opts) +} + +## check particular options that should exist and be finite and strictly positive +.checkpos <- function (opt, name){ + if (length (opt) != 1L || ! is.finite (opt) || opt < .Machine$double.eps){ + warning (name, " must be a strictly positive finite number => set to .Machine$double.eps (", .Machine$double.eps, ").") + opt <- .Machine$double.eps + } + + opt +} + +.test (hy.setOptions) <- function (){ + context ("hy.setOptions") + + old <- hy.getOptions () + on.exit(hy.setOptions (old)) + + test_that("new option and proper return value", { + expect_equal(hy.setOptions (bla = 1)$bla, 1) + expect_equal (hy.getOption ("bla"), 1) + }) + + test_that("setting", { + tmp <- hy.setOptions (debuglevel = 27) + expect_equal(tmp$debuglevel, 27) + + tmp <- hy.setOptions (list (debuglevel = 20)) + expect_equal(tmp$debuglevel, 20) + + tmp <- hy.setOptions (debuglevel = 27, tolerance = 4) + expect_equal(tmp$debuglevel, 27) + expect_equal(tmp$tolerance, 4) + + tmp <- hy.setOptions (list (debuglevel = 20, tolerance = 5)) + expect_equal(tmp$debuglevel, 20) + expect_equal(tmp$tolerance, 5) + }) + + test_that ("restrictions on tolerances", { + for (o in c ("tolerance", "wl.tolerance")){ + expect_warning(hy.setOptions (structure (list (0), .Names = o))) + expect_equal(hy.getOption (o), .Machine$double.eps, label = o) + + hy.setOptions (structure (list (1), .Names = o)) + expect_equal(hy.getOption (o), 1) + expect_warning(hy.setOptions (structure (list (-1), .Names = o))) + expect_equal(hy.getOption (o), .Machine$double.eps, label = o) + + hy.setOptions (structure (list (1), .Names = o)) + expect_equal(hy.getOption (o), 1) + expect_warning(hy.setOptions (structure (list (NA), .Names = o))) + expect_equal(hy.getOption (o), .Machine$double.eps, label = o) + } + + expect_warning(hy.setOptions (tolerance = NULL)) + expect_equal(hy.getOption ("tolerance"), .Machine$double.eps) + + expect_warning(hy.setOptions (wl.tolerance = NULL)) + expect_equal(hy.getOption ("wl.tolerance"), .Machine$double.eps) + }) + + + test_that("options must be named", { + tmp.a <- hy.getOptions () + expect_warning (tmp.b <- hy.setOptions (1)) + expect_equal(tmp.a, tmp.b) + }) + +} diff --git a/R/orderwl.R b/R/orderwl.R new file mode 100644 index 00000000..4ee8ed35 --- /dev/null +++ b/R/orderwl.R @@ -0,0 +1,53 @@ + +##' Sorting the Wavelengths of a hyperSpec Object +##' Rearranges the \code{hyperSpec} object so that the wavelength vector is in increasing (or +##' decreasing) order. +##' +##' The wavelength vector is sorted and the columns of the spectra matrix are rearranged accordingly. +##' +##' @param x The \code{hyperSpec} object. +##' @param na.last,decreasing Handed to \code{\link[base]{order}}. +##' @return A \code{hyperSpec} object. +##' @author C. Beleites +##' @export +##' @seealso \code{\link[base]{order}} +##' @examples +##' +##' ## Example 1: different drawing order in plotspc +##' spc <- new ("hyperSpec", spc = matrix (rnorm (5) + 1:5, ncol = 5)) +##' spc <- cbind (spc, spc+.5) +##' +##' plot (spc, "spc") +##' text (wl (spc), spc [[]], as.character (1:10)) +##' spc <- orderwl (spc) +##' plot (spc, "spc") +##' text (wl (spc), spc [[]], as.character (1:10)) +##' +##' ## Example 2 +##' spc <- new ("hyperSpec", spc = matrix (rnorm (5)*2 + 1:5, ncol = 5)) +##' spc <- cbind (spc, spc) +##' +##' plot (seq_len(nwl(spc)), spc[[]], type = "b") +##' spc[[]] +##' +##' spc <- orderwl (spc) +##' lines (seq_len(nwl(spc)), spc[[]], type = "l", col = "red") +##' spc[[]] +##' +orderwl <- function (x, na.last = TRUE, decreasing = FALSE){ + chk.hy (x) + validObject (x) + + .orderwl (x) +} + +.orderwl <- function (x, na.last = TRUE, decreasing = FALSE){ + ord <- order (x@wavelength, na.last = na.last, decreasing = decreasing) + + if (any (ord != seq_along (x@wavelength))){ + x@data$spc <- x@data$spc [, ord, drop = FALSE] + .wl(x) <- x@wavelength [ord] + } + + x +} diff --git a/R/paste.row.R b/R/paste.row.R new file mode 100644 index 00000000..bca900c1 --- /dev/null +++ b/R/paste.row.R @@ -0,0 +1,63 @@ + +###----------------------------------------------------------------------------- +### +### .paste.row +### +### +##' @importFrom utils head tail +.paste.row <- function (x, label = "", name = "", ins = 0, i = NULL, val = FALSE, + ...){ + .print.val <- function (x, range = TRUE, digits = getOption ("digits"), + max.print = 5, shorten.to = c (2,1)){ + if (is.list (x)){ # also for data.frames + paste ("", "columns/entries", paste (names (x), collapse = ", ")) + } else { + if (length (x) == 0) + return ("") + + if (any (is.na (x))) + text <- "+ NA" + else + text <- "" + + if (range) + x <- sort (unique (as.vector (x))) + else + x <- as.vector (x) + + if (length (x) > max.print){ + from <- format (head (x, shorten.to [1]), digits = digits, trim = TRUE) + to <- format (tail (x, shorten.to [2]), digits = digits, trim = TRUE) + + text <- paste (paste (from, collapse = " "), "...", + paste (to, collapse = " "), text, collapse = " ") + } else { + text <- paste (paste (format (x, digits = digits, trim = TRUE), + collapse = " "), + text, collapse = " ") + } + + paste (if (range) " rng ", text, collapse = "") + + } + } + + label <- paste (as.character (label), "", collapse = " ") + + paste (paste (rep (" ", ins), collapse = ""), + if (!is.null (i)) paste(i, ". ", collapse = "", sep = ""), + name, + ": ", + label, + "[", + paste (class (x), collapse = ", "), + if (! is.null (dim (x))) + paste (if (is.matrix (x) & all (class (x) != "matrix")) " matrix x " else + if (is.array (x) & all (class (x) != "array") & all (class (x) != "matrix")) + " array x ", + paste (dim (x) [-1], collapse = " x ") + , sep = ""), + "]", + if (val) .print.val (x, ...), + sep ="", collapse = "") +} diff --git a/R/pearson.dist.R b/R/pearson.dist.R new file mode 100644 index 00000000..1bf488e4 --- /dev/null +++ b/R/pearson.dist.R @@ -0,0 +1,60 @@ +##' Distance based on Pearson's \eqn{R^2}{R squared} +##' +##' The calculated distance is +##' \eqn{D^2 = \frac{1 - COR (\code{x}')}{2}}{D^2 = (1 - COR (x')) / 2} +##' +##' The distance between the rows of \code{x} is calculated. The possible +##' values range from 0 (prefectly correlated) over 0.5 (uncorrelated) to 1 +##' (perfectly anti-correlated). +##' +##' @param x a matrix +##' @return distance matrix (distance object) +##' @author C. Beleites +##' @seealso \code{\link[stats]{as.dist}} +##' @references S. Theodoridis and K. Koutroumbas: Pattern Recognition, 3rd ed., p. 495 +##' @keywords cluster +##' @export +##' @examples +##' +##' pearson.dist (flu [[]]) +##' pearson.dist (flu) +pearson.dist <- function (x) { + + x <- as.matrix (x) + + ## center & scale *row*s + ## (n - 1) factor cancels out between variance scaling and calculating correlation + x <- x - rowMeans (x) + x <- x / sqrt (rowSums (x^2)) + + if (hy.getOption("gc")) gc () + x <- tcrossprod (x) + + ## keep only lower triagonal + if (hy.getOption("gc")) gc () + x <- as.dist (x) + + if (hy.getOption("gc")) gc () + 0.5 - x / 2 +} + +##' @include unittest.R +.test (pearson.dist) <- function (){ + context ("pearson.dist") + + test_that("pearson.dist against manual calculation", { + expect_equivalent ( + pearson.dist (flu), + as.dist (0.5 - cor (t (as.matrix (flu))) / 2)) + }) +} + +## benchmark +# function (){ +# m <- sample (chondro, 10000) [[]] +# microbenchmark ( +# cor = as.dist (0.5 - cor (t (as.matrix (m))) / 2), +# tcross = pearson.dist (m), +# times = 10L +# ) +# } diff --git a/R/plotc.R b/R/plotc.R new file mode 100644 index 00000000..08cb8f9d --- /dev/null +++ b/R/plotc.R @@ -0,0 +1,154 @@ +###----------------------------------------------------------------------------- +### +### plotc - plot timeseries, concentration, ... +### +### C. Beleites + + + +##' Calibration- and Timeseries Plots, Depth-Profiles and the like +##' \code{plotc} plots intensities of a \code{hyperSpec} object over another +##' dimension such as concentration, time, or a spatial coordinate. +##' +##' If \code{func} is not \code{NULL}, the summary characteristic is calculated +##' first by applying \code{func} with the respective arguments (in +##' \code{func.args}) to each of the spectra. If \code{func} returns more than +##' one value (for each spectrum), the different values end up as different +##' wavelengths. +##' +##' If the wavelength is not used in the model specification nor in +##' \code{groups}, nor for specifying \code{subsets}, and neither is +##' \code{func} given, then only the first wavelength's intensities are plotted +##' and a warning is issued. +##' +##' The special column names \code{.rownames} and \code{.wavelength} may be +##' used. +##' +##' The actual plotting is done by \code{\link[lattice]{xyplot}}. +##' +##' @param object the \code{hyperSpec} object +##' @param model the lattice model specifying the plot +##' @param func function to compute a summary value from the spectra to be +##' plotted instead of single intensities +##' @param func.args further arguments to \code{func} +##' @param groups grouping variable, e.g. \code{.wavelength} if intensities of +##' more than one wavelength should be plotted +##' @param ... further arguments to \code{\link[lattice]{xyplot}}. +##' @author C. Beleites +##' @seealso \code{\link[lattice]{xyplot}} +##' @keywords hplot +##' @export +##' @import graphics +##' @importFrom lattice xyplot +##' @examples +##' +##' +##' ## example 1: calibration of fluorescence +##' plotc (flu) ## gives a warning +##' +##' plotc (flu, func = mean) +##' plotc (flu, func = range, groups = .wavelength) +##' +##' plotc (flu[,,450], ylab = expression (I ["450 nm"] / a.u.)) +##' +##' +##' calibration <- lm (spc ~ c, data = flu[,,450]$.) +##' summary (calibration) +##' plotc (flu [,, 450], type = c("p", "r")) +##' +##' conc <- list (c = seq (from = 0.04, to = 0.31, by = 0.01)) +##' ci <- predict (calibration, newdata = conc, interval = "confidence", level = 0.999) +##' +##' panel.ci <- function (x, y, ..., +##' conc, ci.lwr, ci.upr, ci.col = "#606060") { +##' panel.xyplot (x, y, ...) +##' panel.lmline (x, y,...) +##' panel.lines (conc, ci.lwr, col = ci.col) +##' panel.lines (conc, ci.upr, col = ci.col) +##' } +##' +##' plotc (flu [,, 450], panel = panel.ci, +##' conc = conc$c, ci.lwr = ci [, 2], ci.upr = ci [, 3]) +##' +##' ## example 2: time-trace of laser emission modes +##' cols <- c ("black", "blue", "#008000", "red") +##' wl <- i2wl (laser, c(13, 17, 21, 23)) +##' +##' plotspc (laser, axis.args=list (x = list (at = seq (404.5, 405.8, .1)))) +##' for (i in seq_along (wl)) +##' abline (v = wl[i], col = cols[i], lwd = 2) +##' +##' plotc (laser [,, wl], spc ~ t, groups = .wavelength, type = "b", +##' col = cols) +##' +##' @importFrom utils modifyList +plotc <- function (object, model = spc ~ c, groups = NULL, + func = NULL, func.args = list (), ...){ + chk.hy (object) + validObject (object) + + dots <- list (...) + + if (! is.null (func)) + object <- do.call (apply, c (list (object, 1, func), func.args)) + + ## allow to plot against the row number + object$.row <- row.seq (object) + + groups <- substitute (groups) + + ## find out whether the wavelengths are needed individually, + ## if not, use only the first wavelength and issue a warning + parsed.formula <- latticeParseFormula (model, + as.long.df (object [1, , 1, wl.index = TRUE], rownames = TRUE), + groups = groups, dimension = 2) + + use.c <- parsed.formula$right.name + use.spc <- parsed.formula$left.name + + if (use.spc == "spc" && nwl (object) > 1 && is.null (func) && + !any (grepl (".wavelength", c(as.character (model), + as.character (groups), + as.character (dots$subset))))) { + object <- object [,, 1, wl.index = TRUE] + warning ("Intensity at first wavelengh only is used.") + } + + if (is.null (func)) + ylab <- object@label [[use.spc]] + else { + ylab <- substitute (func ()) + ylab [[2]] <- object@label [[use.spc]][[1]] + for (i in seq_along (func.args)){ + if (names (func.args)[[i]] == "") + ylab [[i + 2]] <- func.args [[i]] + else + ylab [[i + 2]] <- bquote (.(x) == .(y), + list (x = names (func.args) [[i]], + y = as.character (func.args [[i]]))) + + } + ylab <- as.expression (ylab) + } + + ## set defaults: axis labels, plot style + dots <- modifyList (list (xlab = object@label [[use.c]], + ylab = ylab, + pch = 19), + dots) + + ## expand the data.frame + df <- as.long.df (object, rownames = TRUE, wl.factor = TRUE) + + ## if plots should be grouped or conditioned by wavelength, + ## it is better to have a factor + if ((! is.null (parsed.formula$condition) && + parsed.formula$condition == ".wavelength") || + (! is.null (groups) && + as.character (groups) == ".wavelength")) + df$.wavelength <- as.factor (df$.wavelength) + + ## plot + do.call(xyplot, c (list (x = model, data = df, groups = groups), dots)) +} + diff --git a/R/plotmap.R b/R/plotmap.R new file mode 100644 index 00000000..c6d14d8b --- /dev/null +++ b/R/plotmap.R @@ -0,0 +1,116 @@ +################################################################################# +### +### plotmap - plot spectral maps +### +### plots intensity or extra data column over 2 extra data columns + +## TODO: check wheter func should be applied or not + + + +##' Plot a Map and Identify/Select Spectra in the Map +##' \code{\link[lattice]{levelplot}} functions for hyperSpec objects. An image or map of a summary +##' value of each spectrum is plotted. Spectra may be identified by mouse click. +##' +##' The \code{model} can contain the special column name \code{.wavelength} to specify the wavelength +##' axis. +##' +##' \code{plotmap}, \code{map.identify}, and the \code{levelplot} methods internally use the same +##' gateway function to \code{\link[lattice]{levelplot}}. Thus \code{transform.factor} can be used +##' with all of them and the panel function defaults to \code{\link[lattice]{panel.levelplot.raster}} +##' for all three. Two special column names, \code{.rownames} and \code{.wavelength} may be used. +##' +##' \code{levelplot} plots the spectra matrix. +##' +##' \code{plotvoronoi} calls \code{plotmap} with different default settings, namely the panel +##' function defaults to \code{\link[latticeExtra]{panel.voronoi}}. +##' \code{\link[latticeExtra]{panel.voronoi}} depends on either of the packages 'tripack' or 'deldir' +##' being installed. For further information, please consult the help page of +##' \code{\link[latticeExtra]{panel.voronoi}}. On the \code{\link{chondro}} data set, \code{plotmap} +##' is roughly 5 times faster than \code{plotvoronoi} using tripack, and ca. 15 times faster than +##' \code{plotvoronoi} using deldir. Package tripack, however, is free only for non-commercial +##' use. Also, it seems that tripack version hang (R running at full CPU power, but not responding +##' nor finishing the calculation) for certain data sets. In this case, \code{mix = TRUE} may help. +##' +##' \code{map.identify} calls \code{plotmap} and \code{plotvoronoi}, respectively and waits for +##' (left) mouse clicks on points. Other mouse clicks end the input. +##' +##' Unlike \code{\link[lattice]{panel.identify}}, the indices returned by \code{map.identify} are in +##' the same order as the points were clicked. Also, multiple clicks on the same point are returned +##' as multiple entries with the same index. +##' +##' \code{map.identify} uses option \code{debuglevel} similar to \code{\link{spc.identify}}: +##' \code{debuglevel == 1} will plot the tolerance window if no data point was inside (and +##' additionally labels the point) while \code{debuglevel == 2} will always plot the tolerance +##' window. +##' +##' The \code{map.sel.*} functions offer further interactive selection, see +##' \code{\link{map.sel.poly}}. +##' +##' @rdname levelplot +##' @aliases plotmap plotvoronoi levelplot,formula,hyperSpec-method +##' levelplot,hyperSpec,missing-method map.identify +##' @param object,data the \code{hyperSpec} object +##' @param model,x formula specifying the columns of object that are to be +##' displayed by \code{\link[lattice]{levelplot}} +##' @param func,func.args Before plotting, \code{plotmap} applies function +##' \code{func} with the arguments given in the list \code{func.args} to each +##' of the spectra. Thus a single summary value is displayed for each of the +##' spectra. +##' +##' This can be suppressed manually by setting \code{func} to NULL. It is automatically suppressed if +##' \code{.wavelength} appears in the formula. +##' @param voronoi Should the plot for identifying spectra by mouse click be +##' produced by \code{plotmap} (default) or \code{plotvoronoi}? +##' @param ... further arguments are passed down the call chain, and finally +##' to \code{\link[lattice]{levelplot}} +##' @return \code{map.identify} returns a vector of row indices into +##' \code{object} of the clicked points. +##' +##' The other functions return a lattice object. +##' @author C. Beleites +##' @seealso \code{vignette (plotting)}, \code{vignette (hyperspec)} +##' +##' \code{\link{plot}} +##' @export +##' @keywords hplot +##' @examples +##' +##' \dontrun{ +##' vignette (plotting) +##' vignette (hyperspec) +##' } +##' +##' levelplot (spc ~ y * x, chondro [,,1003]) # properly rotated +##' plotmap (chondro [,,1003]) +##' +##' # plot spectra matrix +##' levelplot (spc ~ .wavelength * t, laser, contour = TRUE, col = "#00000080") +##' # see also plotmat +##' +##' plotmap (chondro, clusters ~ x * y) +##' +##' # Voronoi plots +##' smpl <- sample (chondro, 300) +##' plotmap (smpl, clusters ~ x * y) +##' if (require (deldir)) +##' plotvoronoi (smpl, clusters ~ x * y, +##' use.tripack = FALSE) +##' +##' @importFrom utils modifyList +plotmap <- function (object, model = spc ~ x * y, + func = mean, func.args = list (), ...){ + chk.hy (object) + validObject (object) + + if (! is.null (func) & ! any (grepl ("[.]wavelength", model))) + object <- do.call (apply, c (list (object, 1, func), func.args)) + + dots <- modifyList (list (aspect = "iso"), + list (...)) + + dots <- c (list (x = model, data = object), dots) + + do.call(.levelplot, dots) +} + diff --git a/R/plotmat.R b/R/plotmat.R new file mode 100644 index 00000000..bee2fdab --- /dev/null +++ b/R/plotmat.R @@ -0,0 +1,111 @@ +##' Plot spectra matrix +##' +##' plots the spectra matrix. +##' +##' If package plotrix is available, a color legend is plotted to the right. The right margin is set +##' to at least 5 lines. +##' @param object hyperSpec object +##' @param y character giving the name of the extra data column to label the y axis. +##' @param ylab y axis label, defaults to \code{"row"} and the label of the extra data column used +##' for the y axis, respectively. +##' @param col see \code{\link[graphics]{image}} +##' @param ... further parameters for \code{\link[graphics]{image}} +##' @param contour should \code{\link[graphics]{contour}} be called instead of +##' \code{\link[graphics]{image}}? +##' @author Claudia Beleites +##' @seealso \code{\link[graphics]{image}}, \code{\link[graphics]{contour}}, \code{\link[hyperSpec]{levelplot}} +##' @export +##' @examples +##' plotmat (laser, col = alois.palette (100)) +##' +##' plot (laser, "mat") +##' +##' plotmat (laser) +##' plotmat (laser, contour = TRUE, add = TRUE) +##' +##' ## use different y axis labels +##' +##' plotmat (laser, "t") +##' +##' plotmat (laser, laser$t / 3600, ylab = "t / h") +##' @importFrom utils modifyList +plotmat <- function (object, y = ".row", ylab, col = alois.palette (20), ..., + contour = FALSE){ + + chk.hy (object) + validObject (object) + object <- orderwl (object) + + if (is.character (y)) { + if (missing (ylab)) + ylab <- switch (y, + .row = "row", + labels (object, y)) + + y <- switch (y, + .row = seq_len (nrow (object)), + object@data [[y]]) + } + + dots <- modifyList (list (x = wl (object), + y = y, + z = t (object [[]]), + xlab = labels (object, ".wavelength"), + ylab = ylab, + col = col + ), + list (...)) + + + if (contour) + do.call ("contour", dots) + else { + ## leave at least 4 lines right margin + mar <- par()$ mar + if (mar [4] < 5) + par (mar = c (mar [1 : 3], 5)) + + do.call ("image", dots) + par (mar = mar) + + ## color legend + if (requireNamespace ("plotrix", quietly = TRUE)){ + + usr <- par()$usr + + dx <- diff (usr [1 : 2]) + + plotrix::color.legend (usr [2] + 0.05 * dx, + usr [3], + usr [2] + 0.10 * dx, + usr [4], + pretty (range (object, na.rm = TRUE)), + col, + align="rb",gradient="y") + } else { + warning ("package 'plotrix' not available: omitting legend.") + } + + } + + +} + +##' @include unittest.R +.test (plotmat) <- function (){ + context ("plotmat") + + test_that ("non-increasing wavelength axis", { + tmp <- flu + tmp [[]] <- tmp [[,, max ~ min]] + tmp@wavelength <- rev (tmp@wavelength) + + tmp <- capture_condition (plotmat (flu)) + tmp <- tmp[! grepl("package 'plotrix' not available: omitting legend", tmp)] + expect_equal(length(tmp), 0) +}) + + ## TODO vdiffr + + +} diff --git a/R/plotspc.R b/R/plotspc.R new file mode 100644 index 00000000..8a48fc8c --- /dev/null +++ b/R/plotspc.R @@ -0,0 +1,714 @@ +###-------------------------------------------------------------------------------------------------- +### +### plotspc - Plots spectra of hyperSpec object +### +### convenient plot interface for plotting spectra +### + + + +##' Plotting Spectra +##' Plot the spectra of a \code{hyperSpec} object, i.e. intensity over +##' wavelength. Instead of the intensity values of the spectra matrix summary +##' values calculated from these may be used. +##' +##' This is \code{hyperSpec}'s main plotting function for spectra plots. +##' +##' New plots are created by \code{\link[graphics]{plot}}, but the abscissa and +##' ordinate are drawn separately by \code{\link[graphics]{axis}}. Also, +##' \code{\link[graphics]{title}} is called explicitly to set up titles and +##' axis labels. This allows fine-grained customization of the plots. +##' +##' If package plotrix is available, its function +##' \code{\link[plotrix]{axis.break}} is used to produce break marks for cut +##' wavelength axes. +##' +##' @param object the \code{hyperSpec} object +##' @param wl.range the wavelength range to be plotted. +##' +##' Either a numeric vector or a list of vectors with different wavelength +##' ranges to be plotted separately. +##' +##' The values can be either wavelengths or wavelength indices (according to +##' \code{wl.index}). +##' @param wl.index if \code{TRUE}, \code{wl.range} is considered to give +##' column indices into the spectra matrix. Defaults to specifying wavelength +##' values rather than indices. +##' @param wl.reverse if \code{TRUE}, the wavelength axis is plotted backwards. +##' @param spc.nmax maximal number of spectra to be plotted (to avoid +##' accidentally plotting of large numbers of spectra). +##' @param func a function to apply to each wavelength in order to calculate +##' summary spectra such as mean, min, max, etc. +##' @param func.args \code{list} with furter arguments for \code{func} +##' @param add if \code{TRUE}, the output is added to the existing plot +##' @param bty see \code{\link[graphics]{par}} +##' @param col see \code{\link[graphics]{par}}. \code{col} might be a vector +##' giving individual colors for the spectra. +##' @param xoffset vector with abscissa offsets for each of the +##' \code{wl.range}s. If it has one element less than there are +##' \code{wl.range}s, 0 is padded at the beginning. +##' +##' The values are interpreted as the distance along the wavelength axis that +##' the following parts of the spectra are shifted towards the origin. E.g. +##' if \code{wl.range = list (600 ~ 1800, 2800 ~ 3200)}, \code{xoffset = 750} +##' would result in a reasonable plot. See also the examples. +##' @param yoffset ordinate offset values for the spectra. May be offsets to +##' stack the spectra (\code{\link{stacked.offsets}}). Either one for all +##' spectra, one per spectrum or one per group in \code{stacked}. +##' @param nxticks hint how many tick marks the abscissa should have. +##' @param stacked if not \code{NULL}, a "stacked" plot is produced, see the +##' example. \code{stacked} may be \code{TRUE} to stack single spectra. A +##' numeric or factor is interpreted as giving the grouping, character is +##' interpreted as the name of the extra data column that holds the groups. +##' @param stacked.args a list with further arguments to +##' \code{\link{stacked.offsets}}. +##' @param fill if not \code{NULL}, the area between the specified spectra is +##' filled with color \code{col}. The grouping can be given as factor or +##' numeric, or as a character with the name of the extra data column to use. +##' If a group contains more than 2 spectra, the first and the last are used. +##' +##' If \code{TRUE} spectra n and nrow (spc) - n build a group. +##' @param fill.col character vector with fill color. Defaults to brightened +##' colors from \code{col}. +##' @param border character vector with border color. You will need to set the +##' line color \code{col} to \code{NA} in order see the effect. +##' @param plot.args \code{list} with further arguments to +##' \code{\link[graphics]{plot}} +##' @param axis.args \code{list} with further arguments for +##' \code{\link[graphics]{axis}}. \code{axis.args$x} should contain arguments +##' for plotting the abscissa, \code{axis.args$y} those for the ordinate +##' (again as \code{lists}). +##' @param title.args list with further arguments to +##' \code{\link[graphics]{title}}. +##' +##' \code{title.args} may contain two lists, \code{$x}, and \code{$y} to set +##' parameters individually for each axis. +##' @param lines.args list with further arguments to +##' \code{\link[graphics]{lines}}. +##' +##' \code{lines.args$type} defaults to "l". +##' @param break.args list with arguments for +##' \code{\link[plotrix]{axis.break}}. +##' @param polygon.args list with further arguments to +##' \code{\link[graphics]{polygon}} which draws the filled areas. +##' @param zeroline \code{NA} or a list with arguments +##' \code{\link[graphics]{abline}}, used to plot line (s) marking I = 0. +##' +##' \code{NA} suppresses plotting of the line. The line is by default turned +##' off if \code{yoffset} is not 0. +##' @param debuglevel if > 0, additional debug output is produced, +##' see \code{\link[hyperSpec]{options}} for details +##' @return \code{plotspc} invisibly returns a list with +##' +##' \item{x}{the abscissa coordinates of the plotted spectral data points} +##' +##' \item{y}{a matrix the ordinate coordinates of the plotted spectral data +##' points} +##' +##' \item{wavelengths}{the wavelengths of the plotted spectral data points} +##' +##' This can be used together with \code{\link{spc.identify}}. +##' @author C. Beleites +##' @seealso \code{\link[graphics]{plot}}, \code{\link[graphics]{axis}}, +##' \code{\link[graphics]{title}}, \code{\link[graphics]{lines}}, +##' \code{\link[graphics]{polygon}}, \code{\link[graphics]{par}} for the +##' description of the respective arguments. +##' +##' \code{\link[plotrix]{axis.break}} for cut marks +##' +##' See \code{\link{plot}} for some predefined spectra plots such as mean +##' spectrum +/- one standard deviation and the like. +##' +##' \code{\link[graphics]{identify}} and \code{\link[graphics]{locator}} about +##' interaction with plots. +##' @keywords hplot +##' @export +##' @examples +##' +##' plotspc (flu) +##' +##' ## artificial example to show wavelength axis cutting +##' plotspc (chondro [sample (nrow (chondro), 50)], +##' wl.range = list (600 ~ 650, 1000 ~ 1100, 1600 ~ 1700), +##' xoffset = c (0, 300, 450)) +##' +##' plotspc (chondro [sample (nrow (chondro), 50)], +##' wl.range = list (600 ~ 650, 1000 ~ 1100, 1600 ~ 1700), +##' xoffset = c (300, 450)) +##' +##' ## some journals publish Raman spectra backwards +##' plotspc (chondro [sample (nrow (chondro), 50)], wl.reverse = TRUE) +##' +##' plotspc (laser[(0:4)*20+1,,], stacked = TRUE) +##' +##' plotspc (laser, func = mean_pm_sd, +##' col = c(NA, "red", "black"), lines.args = list (lwd = 2), +##' fill = c (1, NA, 1), +##' fill.col = "yellow", border = "blue", +##' polygon.args = list (lty = 2, lwd = 4), +##' title.args = list (xlab = expression (lambda[emission] / nm), +##' y = list(line = 3.4), +##' col.lab = "darkgreen"), +##' axis.args = list (x = list (col = "magenta"), y = list (las = 1)) +##' ) +##' +##' mean.pm.sd <- aggregate (chondro, chondro$clusters, mean_pm_sd) +##' plot (mean.pm.sd, col = matlab.palette (3), fill = ".aggregate", stacked = ".aggregate") +##' +##' @importFrom utils modifyList relist head tail +##' @importFrom grDevices rgb col2rgb +plotspc <- function (object, + ## what wavelengths to plot + wl.range = TRUE, wl.index = FALSE, wl.reverse = FALSE, + ## what spectra to plot + spc.nmax = hy.getOption("plot.spc.nmax"), + func = NULL, func.args = list (), + stacked = NULL, stacked.args = list (), + ## plot area + add = FALSE, bty = "l", plot.args = list(), + ## lines + col = "black", lines.args = list (), + ## axes + xoffset = 0, yoffset = 0, nxticks = 10, axis.args = list (), + break.args = list (), + ## title (axis labels) + title.args = list (), + ## parameters for filled regions + fill = NULL, fill.col = NULL, border = NA, polygon.args = list (), + ## line indicating zero intensity + zeroline = list (lty = 2, col = col), + debuglevel = hy.getOption("debuglevel")){ + force (zeroline) # otherwise stacking messes up colors + + chk.hy (object) + validObject (object) + if (nrow (object) == 0) stop ("No spectra.") + + ## prepare wavelengths ............................................................................ + ## somewhat more complicated here because of plotting with cut wavelength axis +# wl.range <- lazy (wl.range) +# browser () +# if (is.null (wl.range$expr)) { +# wl.range <- seq_along (object@wavelength) +# wl.index <- TRUE +# } + +# if (!is.list (wl.range$expr)) +# wl.range <- list (wl.range) + + if (!wl.index){ + wl.range <- wl2i (object, wl.range, unlist = FALSE) + wl.range <- lapply (wl.range, function (r) r [! is.na (r)]) + } + + ## xoffset ........................................................................................ + ## may be + ## - one number for all wl.ranges + ## - a number for each wl.range + ## - one less than wl.ranges: first will be 0 + if (length (xoffset) == length (wl.range) - 1) + xoffset = c (0, xoffset) + else if (length (xoffset) == 1) + xoffset = rep (xoffset, times = length (wl.range)) + if (!is.numeric(xoffset) || (length (xoffset) != length (wl.range))) + stop ("xoffset must be a numeric vector of the same length (or one less) as the list with", + "wavenumber ranges.") + xoffset <- cumsum (xoffset) + + ## for indexing wavelength.range is needed unlisted + u.wl.range <- unlist (wl.range) + + ## wavelengths are the numbers to print at the x axis + wavelengths <- relist (object@wavelength [u.wl.range], wl.range) + + ## x are the actual x coordinates + x <- wavelengths + for (i in seq_along(x)) + x [[i]] <- x [[i]] - xoffset[i] + + ## prepare spectra ................................................................................ + ## indices into columns of spectra matrix spc + ispc <- relist (seq_along (u.wl.range), wl.range) + + rm (wl.range) + spc <- object[[,, u.wl.range, drop = FALSE, wl.index = TRUE]] + rm (u.wl.range) + + + ## summary statistics: apply function func to spc + if (!is.null (func)){ + if (!is.function (func)) + stop ("func needs to be a function."); + + apply.args <- c (list (X = spc, MARGIN = 2, FUN = func), func.args) + spc <- matrix (do.call (apply, apply.args), #apply (spc, 2, func), + ncol = ncol (spc) + ) + if (nrow (spc) == 0) + stop ("No spectra after", func, "was applied.") + } + + ## do not plot too many spectra by default: can take very long and there is most probably nothing + ## visible on the resulting picture + if (nrow (spc) > spc.nmax){ + if (debuglevel >= 1L) + message ("Number of spectra exceeds spc.nmax. Only the first", spc.nmax, "are plotted.") + + spc <- spc [seq_len (spc.nmax), , drop = FALSE] + } + + ## stacked plot + if (!is.null (stacked)){ + stacked.args <- modifyList (stacked.args, + list (x = object, stacked = stacked, .spc = spc)) + + if (! is.null (lines.args$type) && lines.args$type == "h") + stacked.args <- modifyList (stacked.args, list (min.zero = TRUE)) + + stacked <- do.call (stacked.offsets, stacked.args) + if (all (yoffset == 0)) + yoffset <- stacked$offsets [stacked$groups] + else if (length (yoffset) == length (unique (stacked$groups))) + yoffset <- yoffset [stacked$groups] + } + + ## yoffset ........................................................................................ + ## either one value for all spectra + ## or one per spectrum or one per group + if (length (yoffset) != nrow (spc)){ + if (length (yoffset) == 1) + yoffset <- rep (yoffset, nrow (spc)) + else if (length (yoffset) > nrow (spc)) + yoffset <- yoffset [seq_len (nrow (spc))] + else + stop ("yoffset must be single number or one number for each spectrum (or stacking group).") + } + + spc <- sweep (spc, 1, yoffset, "+") + + ## plot area -------------------------------------------------------------------------------------- + + ## should a new plot be set up? + if (! add){ + ## set default plot args + plot.args <- modifyList (list (xlim = range (unlist (x), na.rm = TRUE), + ylim = range (spc, na.rm = TRUE)), + plot.args) + + ## the actual spectra are plotted below, so we do not need any line parametrers here + + ## reverse x axis ? + if (wl.reverse) + plot.args$xlim <- rev(plot.args$xlim) + + ## some arguments must be overwritten if given: + plot.args <- modifyList (plot.args, + list (x = unlist (x), y = spc[1,,drop=FALSE], + type = "n", bty = "n", + xaxt = "n", yaxt = "n", # axes and title are called separately + xlab = NA, ylab = NA)) # for finer control + + do.call (plot, plot.args) + + ## reversed x axis leads to trouble with tick positions + ## + if (diff (plot.args$xlim) < 0) + plot.args$xlim <- rev(plot.args$xlim) + + ## Axes ----------------------------------------------------------------------------------------- + axis.args <- modifyList (list (x = list (), y = list ()), axis.args) + + ## x-axis labels & ticks + if (bty %in% c("o", "l", "c", "u", "]", "x") ){ + cuts <- .cut.ticks (sapply (wavelengths, min), sapply (wavelengths, max), xoffset, nxticks) + + axis.args$x <- modifyList (axis.args [! names (axis.args) %in% c ("x", "y")], + axis.args$x) + if (is.null (axis.args$x$labels) & ! is.null (axis.args$x$at)) + axis.args$x$labels <- axis.args$x$at + axis.args$x <- modifyList (list (side = 1, at = cuts$at, labels = cuts$labels), + axis.args$x) + + axis (side = 1, at = max (abs (plot.args$xlim)) * c(-1.1, 1.1)) + do.call (axis, axis.args$x) + + ## plot cut marks for x axis + break.args <- modifyList (list (style = "zigzag"), break.args) + break.args$axis <- NULL + break.args$breakpos <- NULL + + if (length (cuts$cut) > 0) { + if (! requireNamespace ("plotrix")){ + cat ("hyperSpec will use its own replacement for plotrix' axis.break\n\n") + break.fun <- .axis.break + } else { + break.fun <- plotrix::axis.break + } + for (i in cuts$cut) + do.call (break.fun, c (list (axis = 1, breakpos = i), break.args)) + } + } + + ## y-axis labels & ticks + if (bty %in% c("o", "l", "c", "u", "y")){ + axis.args$y <- modifyList (axis.args [! names (axis.args) %in% c ("x", "y", "main", "sub")], + axis.args$y) + + ## default for stacked plots is marking the groups + if (!is.null (stacked)){ + if (! is.null (stacked.args$min.zero) && stacked.args$min.zero) + group.mins <- stacked$offsets + else + group.mins <- apply (spc[!duplicated (stacked$groups),, drop = FALSE], 1, min, na.rm = TRUE) + + axis.args$y <- modifyList (list (at = stacked$offsets, + labels = stacked$levels [!duplicated (stacked$levels)]), + axis.args$y) + } + + axis.args$y <- modifyList (list (side = 2), axis.args$y) + axis (side = 2, at = max (abs (plot.args$ylim)) * c(-1.1, 1.1)) + do.call (axis, axis.args$y) + } + + ## Title: axis labels --------------------------------------------------------------------------- + + tmp <- title.args [! names (title.args) %in% c ("x","y", "ylab", "main", "sub")] + tmp <- modifyList (tmp, as.list (title.args$x)) + + tmp <- modifyList (list (xlab = object@label$.wavelength, line = 2.5), tmp) + do.call (title, tmp) + tmp$xlab <- NULL + + tmp <- title.args [! names (title.args) %in% c ("x","y", "xlab", "main", "sub")] + tmp <- modifyList (tmp, as.list (title.args$y)) + tmp <- modifyList (list (ylab = object@label$spc), tmp) + do.call (title, tmp) + tmp$ylab <- NULL + + tmp <- title.args [! names (title.args) %in% c ("x","y", "xlab", "ylab")] + tmp <- modifyList (tmp, as.list (title.args [c ("main", "sub")])) + do.call (title, tmp) + } + + ## plot the spectra ------------------------------------------------------------------------------- + + ## if necessary, recycle colors + col <- rep (col, each = ceiling (nrow (spc) / length (col)), length.out = nrow (spc)) + + + ## should the intensity zero be marked? + if (!(is.logical (zeroline) && is.na (zeroline))){ + zeroline <- modifyList (list (h = unique (yoffset)), as.list (zeroline)) + do.call (abline, zeroline) + } + + ## start loop over wavelength ranges + for (i in seq_along (x)){ + ## filling for polygons ........................................................................ + + ## groupings for upper and lower bound of the bands + if (!is.null (fill)){ + if (is.character (fill) && length (fill) == 1) + fill <- unlist (object [[, fill]]) + else if (isTRUE (fill)){ + fill <- seq_len (nrow (spc) / 2) + if (nrow (spc) %% 2 == 1) # odd number of spectra + fill <- c (fill, NA, rev (fill)) + else + fill <- c (fill, rev (fill)) + } else if (is.factor (fill)) + fill <- as.numeric (fill) + else if (!is.numeric (fill)) + stop ("fill must be either TRUE, the name of the extra data column to use for grouping,", + "a factor or a numeric.") + + groups = unique (fill) + groups = groups [!is.na (groups)] + + + polygon.args <- modifyList (list (x = NULL, y = NULL), + polygon.args) + + ## fill color + if (is.null (fill.col)){ + fill.col <- character (length (groups)) + + for (j in seq_along (groups)){ + tmp <- which (fill == groups [j]) + fill.col [j] <- rgb( t (col2rgb (col [tmp[1]]) / 255) / 3 + 2/3) + } + } else { + fill.col <- rep (fill.col, length.out = length (groups)) + } + + border <- rep (border, length.out = length (groups)) + + polygon.args$x <- c (x [[i]], rev (x [[i]])) + + for (j in seq_along (groups)){ + tmp <- which (fill == groups [j]) + polygon.args$y <- c (spc[head(tmp, 1), ispc[[i]]], rev (spc [tail (tmp, 1), ispc[[i]]])) + polygon.args$col = fill.col [groups [j]] + polygon.args$border <- border [groups [j]] + + do.call (polygon, polygon.args) + } + } + + ## lines ........................................................................................ + + lines.args <- modifyList (list (x = NULL, y = NULL, type = "l"), lines.args) + + if (lines.args$type == "h" && is.list (stacked)) { + ## specialty: lines from the stacked zero line on! + for (j in seq_len (nrow (spc))){ + keep <- ! is.na (spc [j, ispc[[i]]]) + lines.args$x <- rep (x[[i]] [keep], each = 3) + lines.args$y <- as.numeric (matrix (c (rep (yoffset [j], sum (keep)), + spc [j, ispc[[i]]] [keep], + rep (NA, sum (keep))), + byrow = TRUE, nrow = 3)) + lines.args$type = "l" + lines.args$col <- col [j] + do.call (lines, lines.args) + } + } else { + for (j in seq_len (nrow (spc))){ + keep <- ! is.na (spc [j, ispc[[i]]]) + + lines.args$x <- x[[i]][keep] + lines.args$y <- spc [j, ispc[[i]]] [keep] + lines.args$col <- col [j] + + do.call (lines, lines.args) + } + } + } + + ## return some values that are needed by spc.identify + invisible (list (x = rep (unlist (x), each = nrow (spc)) , + y = spc, + wavelengths = rep (unlist (wavelengths), each = nrow (spc)) + ) + ) +} + + + +##' y Offsets for Stacked Plots +##' Calculate approriate \code{yoffset} values for stacking in \code{\link[hyperSpec]{plotspc}}. +##' +##' Usually, the \code{stacked} argument of \code{\link[hyperSpec]{plotspc}} will do fine, but if you +##' need fine control over the stacking, you may calculate the y offsets yourself. +##' +##' Empty levels of the stacking factor are dropped (as no stacking offset can be calculated in that +##' case.) +##' +##' @param x a \code{hyperSpec} object +##' @param min.zero if \code{TRUE}, the lesser of zero and the minimum intensity of the spectrum is +##' used as minimum. +##' @param add.factor,add.sum proportion and absolute amount of space that should be added. +##' @param .spc for internal use. If given, the ranges are evaluated on \code{.spc}. However, this +##' may change in future. +##' @return a list containing \item{offsets}{numeric with the yoffset for each group in +##' \code{stacked}} \item{groups}{numeric with the group number for each spectrum} \item{levels}{if +##' \code{stacked} is a factor, the levels of the groups} +##' @author C. Beleites +##' @seealso \code{\link[hyperSpec]{plotspc}} +##' @rdname plotspc +##' @export +##' @examples +##' +##' mean.pm.sd <- aggregate (chondro, chondro$clusters, mean_pm_sd) +##' +##' offset <- stacked.offsets (mean.pm.sd, ".aggregate") +##' plot (mean.pm.sd, fill.col = matlab.palette (3), fill = ".aggregate", +##' stacked = ".aggregate") +##' +##' plot (aggregate (chondro, chondro$clusters, mean), yoffset = offset$offsets, +##' lines.args = list (lty = 2, lwd = 2), add = TRUE) +##' +##' barb <- do.call (collapse, barbiturates [1:3]) +##' plot (barb, lines.args = list (type = "h"), stacked = TRUE, +##' stacked.args = list (add.factor = .2)) +##' +##' +stacked.offsets <- function (x, stacked = TRUE, + min.zero = FALSE, add.factor = 0.05, add.sum = 0, + #tight = FALSE, TODO + .spc = NULL, debuglevel = hy.getOption("debuglevel")){ + lvl <- NULL + + if (is.null (.spc)) + .spc <- x@data$spc + + if (is.character (stacked)) + stacked <- unlist (x [[, stacked]]) + else if (isTRUE (stacked)) + stacked <- row.seq (x) + + ## cut stacked if necessary + if (length (stacked) != nrow (.spc)){ + stacked <- rep (stacked, length.out = nrow (.spc)) + if (debuglevel >= 1L) + message ("stacking variable recycled to ", nrow (.spc), " values.") + } + if (is.numeric (stacked)) + stacked <- as.factor (stacked) + else if (!is.factor (stacked)) + stop ("stacked must be either TRUE, the name of the extra data column to use for grouping, a factor or a numeric.") + + stacked <- droplevels (stacked) + lvl <- levels (stacked) + groups <- seq_along (levels (stacked)) + stacked <- as.numeric (stacked) + + offset <- matrix (nrow = 2, ncol = length (groups)) + + for (i in groups) + offset[, i] <- range (.spc [stacked == groups [i], ], na.rm = TRUE) + + ## should the minimum be at zero (or less)? + if (min.zero) + offset [1, ] <- sapply (offset [1, ], min, 0, na.rm = TRUE) + + offset [2,] <- offset[2,] - offset [1,] + + ## add some extra space + offset [2,] <- offset [2, ] * (1 + add.factor) + add.sum + + offset <- c(-offset[1,], 0) + c (0, cumsum (offset[2,])) + + list (offsets = offset [seq_along (groups)], + groups = stacked, + levels = if (is.null (lvl)) stacked else lvl + ) +} + +##' @include unittest.R +.test (stacked.offsets) <- function (){ + context ("stacked.offsets") + + test_that("ranges do not overlap", { + spc <- do.call (collapse, barbiturates [1:3]) + ofs <- stacked.offsets (spc) + spc <- spc + ofs$offsets + rngs <- apply (spc [[]], 1, range, na.rm = TRUE) + + expect_equal (as.numeric (rngs), sort (rngs)) + }) + + test_that("extra space", { + spc <- new ("hyperSpec", spc = matrix (c (0, 0, 2, 1 : 3), nrow = 3)) + + expect_equal (stacked.offsets (spc, add.factor = 0)$offsets, c (0, 1, 1)) + expect_equal (stacked.offsets (spc, add.factor = 1)$offsets, c (0, 2, 4)) + expect_equal (stacked.offsets (spc, add.factor = 0, add.sum = 1)$offsets, c (0, 2, 3)) + }) + + test_that("min.zero", { + ofs <- stacked.offsets (flu, min.zero = TRUE, add.factor = 0) + expect_equal (ofs$offsets, + c (0, cumsum (apply (flu [[- nrow (flu)]], 1, max)))) + }) + + + +} + +### .axis.break - poor man's version of axis.break +.axis.break <- function (axis = 1,breakpos = NULL, ...) + mtext("//", at = breakpos, side = axis, padj = -1, adj = 0.5) + +###.cut.ticks - pretty tick marks for cut axes +##' @importFrom utils head +.cut.ticks <- function (start.ranges, + end.ranges, + offsets, + nticks){ + stopifnot (length (start.ranges) == length (end.ranges) & + length (start.ranges) == length (offsets)) + + ## if (length (start.ranges) == 1) + + + ## what part of the plot is occupied by each range? + part <- abs (end.ranges - start.ranges) / (max (end.ranges) - min (start.ranges) - max (offsets)) + + ## nice labels + labels <- mapply (function (start, end, part) pretty (c (start, end), part * nticks + 1), + start.ranges, end.ranges, part, + SIMPLIFY = FALSE) + + ## user coordinates + at <- mapply (`-`, labels, offsets, SIMPLIFY = FALSE) + + ## cut marks + + ## convert to device x in user coordinates + start.ranges <- start.ranges - offsets + end.ranges <- end.ranges - offsets + + delta <- start.ranges [-1] - head (end.ranges, -1) + + cutmarks <- head (end.ranges, -1) + delta / 2 + + ## make sure that the ticks are not too close + for (i in seq_along (delta)) { + keep <- at [[i]] < end.ranges [i] + delta [i] / 4 + at [[i]] <- at [[i]][keep] + labels [[i]] <- labels [[i]][keep] + + keep <- at [[i + 1]] > start.ranges [i + 1] - delta [i] / 4 + at [[i + 1]] <- at [[i + 1]][keep] + labels [[i + 1]] <- labels [[i + 1]][keep] + } + + list (labels = as.numeric (unlist (labels)), + at = as.numeric (unlist (at)), + cut = cutmarks) +} + +##' @include unittest.R +.test (.cut.ticks) <- function (){ + context (".cut.ticks") + + ## bugfix: + ## plotspc (paracetamol, wl.range = c (min ~ 1800, 2800 ~ max), xoffset = 900) + ## had 2600 1/cm label printed in low wavelength range + test_that("labels not too far outside wl.range",{ + expect_equal (.cut.ticks (start.ranges = c (96.7865, 2799.86), + end.ranges = c(1799.95, 3200.07), + offsets = c (0, 900), + nticks = 10)$labels, + c (seq (0, 1800, 200), seq (2800, 3400, 200)) + ) + }) + + test_that ("correct calculations",{ + + labels = c(seq (1, 2, 0.5), + seq (3, 4, 0.5), + seq (7, 9, 0.5)) + + expect_equal( + .cut.ticks(start.ranges = c (1, 3, 7), + end.ranges = c (2, 4, 9), + nticks = 10, + offsets = c (0, 0, 1)), + list (labels = labels, + at = labels - c (0, 0, 0, + 0, 0, 0, + 1, 1, 1, 1, 1), + cut = c (mean (c (3, 2)), + mean (c (7 - 1, 4))) + ) + ) + }) + + +} + + + diff --git a/R/plotvoronoi.R b/R/plotvoronoi.R new file mode 100644 index 00000000..5a1dfc35 --- /dev/null +++ b/R/plotvoronoi.R @@ -0,0 +1,46 @@ +################################################################################# +### +### plotvoronoi - plot spectral maps with irregular point pattern +### +### plots intensity or extra data column over 2 extra data columns + +##' @param use.tripack Whether package tripack should be used for calculating +##' the voronoi polygons. If \code{FALSE}, package deldir is used instead. +##' See details. +##' @param mix For Voronoi plots using package tripack, I experienced errors if +##' the data was spatially ordered. Randomly rearrangig the rows of the +##' hyperSpec object circumvents this problem. +##' @rdname levelplot +##' @include levelplot.R +##' @export +##' @seealso \code{\link[latticeExtra]{panel.voronoi}} +##' @importFrom latticeExtra panel.voronoi +##' @importFrom lattice prepanel.default.levelplot +##' @importFrom utils modifyList +plotvoronoi <- function (object, model = spc ~ x * y, + use.tripack = FALSE, mix = FALSE, ...){ + if (!requireNamespace ("latticeExtra")) + stop ("package latticeExtra is needed for Voronoi plots.") + + # if (use.tripack){ + # if (!requireNamespace ("tripack")) + # stop ("package tripack requested but not available.") + # } else { + # if (!requireNamespace ("deldir")) + # stop ("package deldir requested but not available.") + # } + + if (use.tripack && mix) + object@data <- object@data [sample (nrow (object)),] + + dots <- modifyList (list (object = object, + model = model, + panel = panel.voronoi, + prepanel = prepanel.default.levelplot, + pch = 19, cex = .25, + col.symbol = "#00000020", + border = "#00000020", + use.tripack = use.tripack), + list (...)) + do.call (plotmap, dots) +} diff --git a/R/qplot.R b/R/qplot.R new file mode 100644 index 00000000..3ddb2d5e --- /dev/null +++ b/R/qplot.R @@ -0,0 +1,239 @@ +##' Spectra plotting with ggplot2 +##' +##' These functions are still experimental and may change substantially in future. +##' @title Spectra plotting with ggplot2 +##' @param x hyperSpec object +##' @param wl.range wavelength ranges to plot +##' @param ... handed to \code{\link[ggplot2]{geom_line}} +##' @param mapping see \code{\link[ggplot2]{geom_line}} +##' @param spc.nmax maximum number of spectra to plot +##' @param map.lineonly if \code{TRUE}, \code{mapping} will be handed to +##' \code{\link[ggplot2]{geom_line}} instead of \code{\link[ggplot2]{ggplot}}. +##' @param debuglevel if > 0, additional debug output is produced +##' @return a \code{\link[ggplot2]{ggplot}} object +##' @author Claudia Beleites +##' @export +##' @seealso \code{\link{plotspc}} +##' +##' \code{\link[ggplot2]{ggplot}}\code{\link[ggplot2]{geom_line}} +##' @examples +##' +##' qplotspc (chondro) +##' +##' qplotspc (paracetamol, c (2800 ~ max, min ~ 1800)) + scale_x_reverse (breaks = seq (0, 3200, 400)) +##' +##' qplotspc (aggregate (chondro, chondro$clusters, mean), +##' mapping = aes (x = .wavelength, y = spc, colour = clusters)) + +##' facet_grid (clusters ~ .) +##' +##' qplotspc (aggregate (chondro, chondro$clusters, mean_pm_sd), +##' mapping = aes (x = .wavelength, y = spc, colour = clusters, group = .rownames)) + +##' facet_grid (clusters ~ .) +qplotspc <- function (x, + wl.range = TRUE, ..., + mapping = aes_string (x = ".wavelength", y = "spc", group = ".rownames"), + spc.nmax = hy.getOption("ggplot.spc.nmax"), + map.lineonly = FALSE, + debuglevel = hy.getOption("debuglevel")){ + chk.hy (x) + validObject (x) + + ## cut away everything that isn't asked for _before_ transforming to data.frame + if (nrow (x) > spc.nmax) { + if (debuglevel >= 1L) + message ("Number of spectra exceeds spc.nmax. Only the first ", spc.nmax, " are plotted.") + x <- x [seq_len (spc.nmax)] + } + + wl.range <- wl2i (x, wl.range, unlist = FALSE) + + x <- x [,, unlist (wl.range), wl.index = TRUE] + + df <- as.long.df (x, rownames = TRUE, na.rm = FALSE) # with na.rm trouble with wl.range + + ## ranges go into facets + if (length (wl.range) > 1L){ + tmp <- wl.range + for (r in seq_along(tmp)) + tmp [[r]][TRUE] <- r + + df$.wl.range <- rep (unlist (tmp), each = nrow (x)) + } + + + df <- df [! is.na (df$spc),, drop = FALSE] + if (map.lineonly) + p <- ggplot (df) + geom_line (mapping = mapping, ...) + else + p <- ggplot (df, mapping = mapping) + geom_line (...) + + p <- p + xlab (labels (x, ".wavelength")) + ylab (labels (x, "spc")) + + if (! is.null (df$.wl.range)) + p <- p + facet_grid (. ~ .wl.range, + labeller = as_labeller (rep (NA, nlevels (df$.wl.range))), + scales = "free", space = "free") + + theme (strip.text.x = element_text (size = 0)) + + p +} + + +##' Spectra plotting with ggplot2 +##' +##' These functions are still experimental and may change substantially in future. +##' +##' Note that \code{qplotmap} will currently produce the wrong scales if x or y are discrete. +##' @title Spectra plotting with ggplot2 +##' @param object hyperSpec object +##' @param mapping see \code{\link[ggplot2]{geom_tile}} +##' @param ... handed to \code{\link[ggplot2]{geom_tile}} +##' @param func function to summarize the wavelengths +##' @param func.args arguments to \code{func} +##' @param map.tileonly if \code{TRUE}, \code{mapping} will be handed to +##' \code{\link[ggplot2]{geom_tile}} instead of \code{\link[ggplot2]{ggplot}}. +##' @return a \code{\link[ggplot2]{ggplot}} object +##' @export +##' @author Claudia Beleites +##' @seealso \code{\link{plotmap}} +##' +##' \code{\link[ggplot2]{ggplot}}\code{\link[ggplot2]{geom_tile}} +##' @examples +##' qplotmap (chondro) +##' qplotmap (chondro) + scale_fill_gradientn (colours = alois.palette ()) +##' @importFrom utils tail +qplotmap <- function (object, mapping = aes_string (x = "x", y = "y", fill = "spc"), ..., + func = mean, func.args = list (), + map.tileonly = FALSE){ + chk.hy (object) + validObject (object) + + if (nwl (object) > 1 & ! is.null (func)) + object <- do.call (apply, c (list (object, 1, func), func.args)) + + if (map.tileonly) + p <- ggplot (as.long.df (object)) + geom_tile (mapping = mapping) + else + p <- ggplot (as.long.df (object), mapping = mapping) + geom_tile () + + p <- p + coord_equal () + + ## set expand to c(0, 0) to suppress the gray backgroud + if (is.factor (with (p$data, eval (p$mapping$x)))) + p <- p + scale_x_discrete (expand = c(0, 0)) + else + p <- p + scale_x_continuous (expand = c(0, 0)) + + if (is.factor (with (p$data, eval (p$mapping$y)))) + p <- p + scale_y_discrete (expand = c(0, 0)) + else + p <- p + scale_y_continuous (expand = c(0, 0)) + + ## generate axis/scale labels + ## TODO: own function + x <- as_label (mapping$x) + xlabel <- labels (object)[[tail (x, 1)]] + if (is.null (xlabel)) xlabel <- x + + y <- as_label (mapping$y) + ylabel <- labels (object)[[tail (y, 1)]] + if (is.null (ylabel)) ylabel <- y + + f <- as_label (mapping$fill) + flabel <- labels (object)[[tail (f, 1)]] + if (is.null (flabel)) flabel <- f + + p + labs (x = xlabel, y = ylabel, fill = flabel) +} + + +##' Spectra plotting with ggplot2 +##' +##' These functions are still experimental and may change substantially in future. +##' @title Spectra plotting with ggplot2 +##' @param object hyperSpec object +##' @param mapping see \code{\link[ggplot2]{geom_point}} +##' @param ... handed to \code{\link[ggplot2]{geom_point}} +##' @export +##' @param func function to summarize the wavelengths, if \code{NULL}, only the first wavelength is used +##' @param func.args arguments to \code{func} +##' @param map.pointonly if \code{TRUE}, \code{mapping} will be handed to +##' \code{\link[ggplot2]{geom_point}} instead of \code{\link[ggplot2]{ggplot}}. +##' @return a \code{\link[ggplot2]{ggplot}} object +##' @author Claudia Beleites +##' @seealso \code{\link{plotc}} +##' +##' \code{\link[ggplot2]{ggplot}}\code{\link[ggplot2]{geom_point}} +##' @importFrom rlang as_label +##' @examples +##' qplotc (flu) +##' qplotc (flu) + geom_smooth (method = "lm") +qplotc <- function (object, mapping = aes_string(x = "c", y = "spc"), ..., + func = NULL, func.args = list (), + map.pointonly = FALSE){ + chk.hy (object) + validObject (object) + + dots <- list (...) + + if (! is.null (func)) + object <- do.call (apply, c (list (object, 1, func), func.args)) + + ## allow to plot against the row number + object$.row <- seq (object, index = TRUE) + + ## find out whether the wavelengths are needed individually, + ## if not, use only the first wavelength and issue a warning + + if (any (grepl ("spc", as_label (mapping))) && # use intensities + nwl (object) > 1 && # has > 1 wavelength + is.null (func) && # no stats function + ! any (grepl ("[.]wavelength", as_label (mapping)))) { + object <- object [,, 1, wl.index = TRUE] + warning ("Intensity at first wavelengh only is used.") + } + + ## produce fancy y label + ylab <- labels (object, as_label (mapping$y)) + if (! is.null (func)) + ylab <- make.fn.expr (substitute (func), c (ylab, func.args)) + ylab <- as.expression (ylab) + + ## expand the data.frame + df <- as.long.df (object, rownames = TRUE, wl.factor = TRUE) + + ## if plots should be grouped, faceted, etc. by wavelength, it is better to have a factor + if (any (grepl ("[.]wavelength", mapping [! names (mapping) %in% c("x", "y")]))) + df$.wavelength <- as.factor (df$.wavelength) + + if (map.pointonly) + p <- ggplot (df) + geom_point (mapping = mapping) + else + p <- ggplot (df, mapping = mapping) + geom_point () + + p + ylab (ylab) + + xlab (labels (object, as_label (mapping$x))) +} + +make.fn.expr <- function (fn, l = list ()){ + + if (length (fn) > 1L) + fn <- "f" + + l <- lapply (l, function (x) if (is.logical (x)) as.character (x) else x) + + if (is.null (names (l))) + names (l) <- rep ("", length (l)) + + tmp <- mapply (function (x, y) if (nzchar (x) > 0L) bquote (.(x) == .(y)) else y, + names (l), l) + + e <- expression (f (x)) + e [[1]][[1]] <- fn + if (length (tmp) > 0L) + e [[1]][seq_along (tmp) + 1] <- tmp + else + e [[1]][2] <- NULL + + e +} diff --git a/R/qplotmixmap.R b/R/qplotmixmap.R new file mode 100644 index 00000000..aaf915de --- /dev/null +++ b/R/qplotmixmap.R @@ -0,0 +1,264 @@ +##' map plot with colour overlay. +##' +##' +##' @title qplotmap with colour mixing for multivariate overlay +##' @param object hyperSpec object +##' @param ... handed over to \code{\link[hyperSpec]{qmixlegend}} and \code{\link[hyperSpec]{qmixtile}} +##' @return invisible list with ggplot2 objects map and legend +##' @seealso \code{\link[hyperSpec]{qmixtile}} +##' @author Claudia Beleites +##' @importFrom grid pushViewport viewport popViewport grid.layout unit +##' @import ggplot2 +##' @export +##' @examples +##' chondro <- chondro - spc.fit.poly.below (chondro) +##' chondro <- sweep (chondro, 1, apply (chondro, 1, mean), "/") +##' chondro <- sweep (chondro, 2, apply (chondro, 2, quantile, 0.05), "-") +##' +##' qplotmixmap (chondro [,,c (940, 1002, 1440)], +##' purecol = c (colg = "red", Phe = "green", Lipid = "blue")) +##' +##' @importFrom lazyeval f_rhs +qplotmixmap <- function (object, ...){ + + p <- qmixtile (object@data, ...) + + coord_equal () + + ## ggplot2 transition to lazyeval of mappings. Use `tmp.cnv` conversion function depending on ggplot2 behaviour + if (is.name (p$mapping$x)) # old ggplot2 + tmp.cnv <- as.character + else # new ggplot2 -> lazyeval + tmp.cnv <- f_rhs + + p <- p + + xlab (labels (object)[[tmp.cnv (p$mapping$x)]]) + + ylab (labels (object)[[tmp.cnv (p$mapping$y)]]) + + l <- qmixlegend (object@data$spc, ...) + + legendright (p, l) + + invisible (list (map = p, legend = l)) +} + +##' Plot multivariate data into colour channels +##' +##' plot graph with legend right of it +##' +##' @param p plot object +##' @param l legend object +##' @param legend.width,legend.unit size of legend part +##' @return invisible \code{NULL} +##' @author Claudia Beleites +##' @rdname qplotmix +##' @export +legendright <- function (p, l, legend.width = 8, legend.unit = "lines") { + plot.new () + pushViewport (viewport (layout = grid.layout (1, 2, + widths = unit (c (1, legend.width), c("null", legend.unit)) + ))) + print (p, viewport (layout.pos.col = 1), newpage = FALSE) + print (l, viewport (layout.pos.col = 2), newpage = FALSE) + popViewport () +} + +##' plot multivariate data into colour channels using \code{\link[ggplot2]{geom_tile}} +##' @rdname qplotmix +##' @param object matrix to be plotted with mixed colour channels +##' @param purecol pure component colours, names determine legend labels +##' @param mapping see \code{\link[ggplot2]{geom_tile}} +##' @param ... \code{qmixtile}: handed to \link[hyperSpec]{colmix.rgb} +##' +##' \code{qmixlegend} and \code{colmix.rgb} hand further arguments to the \code{normalize} function +##' @param map.tileonly if \code{TRUE}, \code{mapping} will be handed to +##' \code{\link[ggplot2]{geom_tile}} instead of \code{\link[ggplot2]{ggplot}}. +##' +qmixtile <- function (object, + purecol = stop ("pure component colors needed."), + mapping = aes_string (x = "x", y = "y", fill = "spc"), + ..., + map.tileonly = FALSE) { + + ## ggplot2 transition to lazyeval of mappings. Use `tmp.cnv` conversion function depending on ggplot2 behaviour + if (is.name (mapping$fill)) # old ggplot2 + tmp.cnv <- as.character + else # new ggplot2 -> lazyeval + tmp.cnv <- f_rhs + + + ## calculate fill colours + fill <- colmix.rgb (object [[tmp.cnv (mapping$fill)]], purecol, ...) + object [[tmp.cnv (mapping$fill)]] <- fill + + if (map.tileonly) + p <- ggplot (object) + geom_tile (mapping = mapping, data = object) + else + p <- ggplot (object, mapping = mapping) + geom_tile () + + p + scale_fill_identity () + theme (legend.position = "none") +} + +##' \code{normalize.colrange} normalizes the range of each column to [0, 1] +##' @rdname qplotmix +##' @export +##' +##' @param na.rm see \code{link[base]{min}} +##' @param legend should a legend be produced instead of normalized values? +##' @param n of colours to produce in legend +##' @return list with components ymin, max and fill to specify value and fill colour value (still +##' numeric!) for the legend, otherwise the normalized values +normalize.colrange <- function (x, na.rm = TRUE, legend = FALSE, n = 100, ...){ + ## legend + if (legend){ + y <- apply (x, 2, function (x) seq (min (x), max (x), length.out = n)) + dy2 <- abs (y [2,] - y [1,]) / 2 + + list (ymin = sweep (y, 2, dy2, `-`), + ymax = sweep (y, 2, dy2, `+`), + fill = apply (x, 2, function (x) seq ( 0, 1, length.out = n))) + } else { + ## normalized values + x <- sweep (x, 2, apply (x, 2, min, na.rm = na.rm), `-`) + sweep (x, 2, apply (x, 2, max, na.rm = na.rm), `/`) + } +} + +##' \code{normalize.range} normalizes the range of all columns to [0, 1] +##' @rdname qplotmix +##' @export +##' +normalize.range <- function (x, na.rm = TRUE, legend = FALSE, n = 100, ...){ + if (legend){ + y <- matrix (seq (min (x), max (x), length.out = n), nrow = n, ncol = ncol (x)) + dy2 <- abs (y [2,] - y [1,]) / 2 + + list (ymin = sweep (y, 2, dy2, `-`), + ymax = sweep (y, 2, dy2, `+`), + fill = apply (x, 2, function (x) seq ( 0, 1, length.out = n))) + } else { + x <- x - min (x, na.rm = na.rm) + x / max (x, na.rm = na.rm) + } +} + +##' \code{normalize.null} does not touch the values +##' @rdname qplotmix +##' @export +##' +normalize.null <- function (x, na.rm = TRUE, legend = FALSE, n = 100, ...){ + if (legend){ + y <- apply (x, 2, function (x) seq (min (x), max (x), length.out = n)) + + list (ymin = sweep (y, 2, min), + ymax = sweep (y, 2, max), + fill = apply (x, 2, function (x) seq ( 0, 1, length.out = n))) + } else { + x + } +} +##' \code{normalize.minmax} normalizes the range of each column j to [min_j, max_j] +##' @rdname qplotmix +##' @export +##' @param min numeric with value corresponding to "lowest" colour for each column +##' @param max numeric with value corresponding to "hightest" colour for each column +normalize.minmax <- function (x, min = 0, max = 1, legend = FALSE, n = 100, ...){ + if (legend){ + y <- matrix (seq (0, 1, length.out = n), nrow = n, ncol = ncol (x)) + y <- sweep (y, 2, max - min, `*`) + y <- sweep (y, 2, min, `+`) + + dy2 <- abs (y [2,] - y [1,]) / 2 + + l <- list (ymin = sweep (y, 2, dy2, `-`), + ymax = sweep (y, 2, dy2, `+`), + ymax = y + dy2, + fill = matrix (seq (0, 1, length.out = n), nrow = n, ncol = ncol (x))) + + l$ymin [1, ] <- pmin (l$ymin [1,], apply (x, 2, min, na.rm = TRUE)) + l$ymax [n, ] <- pmax (l$ymax [n,], apply (x, 2, max, na.rm = TRUE)) + + l + } else { + x <- sweep (x, 2, min, `-`) + sweep (x, 2, max, `/`) + } +} + +##' legends for mixed colour plots +##' @rdname qplotmix +##' @param dx width of label bar +##' @param ny number of colours in legend +##' @param labels component names +##' @return ggplot object with legend +##' @author Claudia Beleites +##' @export +qmixlegend <- function (x, purecol, dx = 0.33, ny = 100, labels = names (purecol), + normalize = normalize.colrange, ...) { + if (! is.matrix (x)) + x <- matrix (x, ncol = 1) + + if (is.null (labels)) + labels <- colnames (x) + if (is.null (labels)) + labels <- seq_len (ncol (x)) + + if (! is.null (normalize)) + l <- normalize (x, ..., legend = TRUE) + else + l <- x + + df <- data.frame () + for (column in seq_along (purecol)){ + tmp <- colmix.rgb (l$fill [, column, drop = FALSE], purecol [column], normalize = NULL, ...) + df <- rbind (df, data.frame (column = labels [column], + col = tmp, + ymin = l$ymin [, column], + ymax = l$ymax [, column]) + ) + } + df$column <- as.factor (df$column) + df$xmin <- as.numeric (df$column) - dx + df$xmax <- as.numeric (df$column) + dx + + l <- ggplot (df, aes (x = column), col = col) + + geom_point (aes (x=column, y = 1), col = NA) + ylab ("") + xlab ("") + l <- l + geom_rect (aes_string (xmin = "xmin", xmax = "xmax", ymin = "ymin", ymax = "ymax", + fill = "col", colour = "col")) + + l <- l + theme (plot.margin = unit(c(0.5, 0, 0 ,0), "lines"), legend.position = "none") + + scale_fill_identity () + scale_colour_identity () + + l +} + +##' @rdname qplotmix +##' @title multi channel colour mixing +##' @param x matrix with component intensities in columns +##' @param against value to mix against (for \code{sub = TRUE} only, 1 = white, 0 = black) +##' @param sub subtractive color mixing? +##' @param normalize function to normalize the values. +##' @return character with colours +##' @author Claudia Beleites +##' @export +##' @importFrom grDevices col2rgb rgb +colmix.rgb <- function (x, purecol, against = 1, sub = TRUE, + normalize = normalize.colrange, ...){ + if (! is.null (normalize)) + x <- normalize (x, ...) + + if (is.character (purecol)) + purecol <- t (col2rgb (purecol)) / 255 + + if (sub) + x <- against - x %*% (against - purecol) + else + x <- x %*% purecol + + x [x < 0] <- 0 + x [x > 1] <- 1 + + cols <- rep (NA, nrow (x)) + cols [! is.na (x [,1])] <- rgb (x [!is.na (x [, 1]),]) + + cols +} diff --git a/R/read.ENVI.HySpex.R b/R/read.ENVI.HySpex.R new file mode 100644 index 00000000..b835308d --- /dev/null +++ b/R/read.ENVI.HySpex.R @@ -0,0 +1,34 @@ +##' @describeIn read.ENVI +##' @include read.ENVI.R +##' @export +read.ENVI.HySpex <- function (file = stop ("read.ENVI.HySpex: file name needed"), + headerfile = NULL, header = list (), keys.hdr2data = NULL, ...) { + + headerfile <- .find.ENVI.header (file, headerfile) + keys <- readLines (headerfile) + keys <- .read.ENVI.split.header (keys) + keys <- keys [c ("pixelsize x", "pixelsize y", "wavelength units")] + + header <- modifyList (keys, header) + + ## most work is done by read.ENVI + spc <- read.ENVI (file = file, headerfile = headerfile, header = header, ..., pull.header.lines = FALSE) + + label <- list (x = "x / pixel", + y = "y / pixel", + spc = 'I / a.u.', + .wavelength = as.expression (bquote (lambda / .(u), list (u = keys$`wavelength units`)))) + + labels (spc) <- label + + spc +} + +.test (read.ENVI.HySpex) <- function (){ + context ("read.ENVI.HySpex") + + test_that ("Hyspex ENVI file", { + skip_if_not_fileio_available () + expect_known_hash(read.ENVI.HySpex("fileio/ENVI/HySpexNIR.hyspex"), "cf35ba92334f22513486f25c5d8ebe32") + }) +} \ No newline at end of file diff --git a/R/read.ENVI.Nicolet.R b/R/read.ENVI.Nicolet.R new file mode 100644 index 00000000..e484fbf4 --- /dev/null +++ b/R/read.ENVI.Nicolet.R @@ -0,0 +1,109 @@ +##' @details +##' Nicolet uses some more keywords in their header file. +##' They are interpreted as follows: +##' \tabular{ll}{ +##' description \tab giving the position of the first spectrum \cr +##' z plot titles \tab wavelength and intensity axis units, comma separated \cr +##' pixel size \tab interpreted as x and y step size +##' (specify \code{x = NA} and \code{y = NA}) +##' } +##' These parameters can be overwritten by giving a list with the respective +##' elements in parameter \code{header}. +##' +##' The values in header line description seem to be microns while the pixel +##' size seems to be in microns. If \code{nicolet.correction} is true, the +##' pixel size values (i.e. the step sizes) are multiplied by 1000. +##' +##' @param nicolet.correction see details +##' @describeIn read.ENVI +##' @export +##' @importFrom utils modifyList +read.ENVI.Nicolet <- function (file = stop ("read.ENVI: file name needed"), + headerfile = NULL, header = list (), ..., + x = NA, y = NA, + nicolet.correction = FALSE) { + + ## the additional keywords to interprete must be read from headerfile + headerfile <- .find.ENVI.header (file, headerfile) + keys <- readLines (headerfile) + keys <- .read.ENVI.split.header (keys) + keys <- keys [c ("description", "z plot titles", "pixel size")] + + header <- modifyList (keys, header) + + ## most work is done by read.ENVI + spc <- read.ENVI (file = file, headerfile = headerfile, header = header, ..., + x = if (is.na (x)) 0 : 1 else x, + y = if (is.na (y)) 0 : 1 else y) + + ### From here on processing the additional keywords in Nicolet's ENVI header * + + ## z plot titles ------------------------------------------------------------- + ## default labels + label <- list (x = expression (`/` (x, micro * m)), + y = expression (`/` (y, micro * m)), + spc = 'I / a.u.', + .wavelength = expression (tilde (nu) / cm^-1)) + + ## get labels from header information + if (!is.null (header$'z plot titles')){ + pattern <- "^[[:blank:]]*([[:print:]^,]+)[[:blank:]]*,.*$" + tmp <- sub (pattern, "\\1", header$'z plot titles') + + if (grepl ("Wavenumbers (cm-1)", tmp, ignore.case = TRUE)) + label$.wavelength <- expression (tilde (nu) / cm^(-1)) + else + label$.wavelength <- tmp + + pattern <- "^[[:blank:]]*[[:print:]^,]+,[[:blank:]]*([[:print:]^,]+).*$" + tmp <- sub (pattern, "\\1", header$'z plot titles') + if (grepl ("Unknown", tmp, ignore.case = TRUE)) + label$spc <- "I / a.u." + else + label$spc <- tmp + } + + ## modify the labels accordingly + spc@label <- modifyList (label, spc@label) + + ## set up spatial coordinates ------------------------------------------------ + ## look for x and y in the header only if x and y are NULL + ## they are in `description` and `pixel size` + + ## set up regular expressions to extract the values + p.description <- paste ("^Spectrum position [[:digit:]]+ of [[:digit:]]+ positions,", + "X = ([[:digit:].-]+), Y = ([[:digit:].-]+)$") + p.pixel.size <- "^[[:blank:]]*([[:digit:].-]+),[[:blank:]]*([[:digit:].-]+).*$" + + if (is.na (x) && is.na (y) && + ! is.null (header$description) && grepl (p.description, header$description ) && + ! is.null (header$'pixel size') && grepl (p.pixel.size, header$'pixel size')) { + + x [1] <- as.numeric (sub (p.description, "\\1", header$description)) + y [1] <- as.numeric (sub (p.description, "\\2", header$description)) + + x [2] <- as.numeric (sub (p.pixel.size, "\\1", header$'pixel size')) + y [2] <- as.numeric (sub (p.pixel.size, "\\2", header$'pixel size')) + + ## it seems that the step size is given in mm while the offset is in micron + if (nicolet.correction) { + x [2] <- x [2] * 1000 + y [2] <- y [2] * 1000 + } + + ## now calculate and set the x and y coordinates + x <- x [2] * spc$x + x [1] + if (! any (is.na (x))) + spc@data$x <- x + + y <- y [2] * spc$y + y [1] + if (! any (is.na (y))) + spc@data$y <- y + } + + ## consistent file import behaviour across import functions + ## .fileio.optional is called already by read.ENVI + + spc +} + diff --git a/R/read.ENVI.R b/R/read.ENVI.R new file mode 100644 index 00000000..56dcf519 --- /dev/null +++ b/R/read.ENVI.R @@ -0,0 +1,440 @@ +#################################################################################################### +### +### read.ENVI - read ENVI files, missing header files may be replaced by list in parameter header +### +### * read.ENVI.Nicolet for ENVI files written by Nicolet spectrometers +### * adapted from caTools read.ENVI +### +### Time-stamp: +### +#################################################################################################### + +### some general helper functions .................................................................. + +###----------------------------------------------------------------------------- +### +### split.line - split line into list of key-value pairs +### +### + +split.line <- function (x, separator, trim.blank = TRUE) { + tmp <- regexpr (separator, x) + + key <- substr (x, 1, tmp - 1) + value <- substr (x, tmp + 1, nchar (x)) + + if (trim.blank){ + blank.pattern <- "^[[:blank:]]*([^[:blank:]]+.*[^[:blank:]]+)[[:blank:]]*$" + key <- sub (blank.pattern, "\\1", key) + value <- sub (blank.pattern, "\\1", value) + } + + value <- as.list (value) + names (value) <- key + + value +} + + +### some ENVI-specific helper functions ............................................................. + +### guesses ENVI header file name +.find.ENVI.header <- function (file, headerfilename) { + if (is.null (headerfilename)) { + headerfilename <- paste (dirname (file), + sub ("[.][^.]+$", ".*", basename (file)), sep = "/") + + tmp <- Sys.glob (headerfilename) + + headerfilename <- tmp [! grepl (file, tmp)] + + if (length (headerfilename) > 1L) { + + headerfilename <- headerfilename [grepl ("[.][hH][dD][rR]$", headerfilename)] + + if (length (headerfilename == 1L)) + message (".find.ENVI.header: Guessing header file name ", headerfilename) + } + + if (length (headerfilename) != 1L) + stop ("Cannot guess header file name") + } + + if (!file.exists(headerfilename)) + stop("ENVI header file: ", headerfilename, " not found.") + + headerfilename +} + +# ................................................................................................... + +.read.ENVI.split.header <- function (header, pull.lines = TRUE) { + + ## check ENVI at beginning of file + if (!grepl ("ENVI", header[1])) + stop ("Not an ENVI header (ENVI keyword missing)") + else + header <- header [-1] + + ## remove curly braces and put multi-line key-value-pairs into one line + header <- gsub ("\\{([^}]*)\\}", "\\1", header) + + l <- grep ("\\{", header) + r <- grep ("\\}", header) + + if (length (l) != length(r)) + stop ("Error matching curly braces in header (differing numbers).") + + if (any (r <= l)) + stop ("Mismatch of curly braces in header.") + + header[l] <- sub ("\\{", "", header[l]) + header[r] <- sub ("\\}", "", header[r]) + + if (pull.lines) + for (i in rev (seq_along (l))) + header <- c (header [seq_len (l [i] - 1)], + paste (header [l [i] : r [i]], collapse = " "), + header [-seq_len (r [i])]) + + ## split key = value constructs into list with keys as names + header <- sapply (header, split.line, "=", USE.NAMES = FALSE) + names (header) <- tolower (names (header)) + + ## process numeric values + tmp <- names (header) %in% c("samples", "lines", "bands", "data type", "header offset") + header [tmp] <- lapply (header [tmp], as.numeric) + + header +} + +### ................................................................................................. + +.read.ENVI.bin <- function (file, header, block.lines.skip = NULL, block.lines.size = NULL) { + + DATA_TYPE_SIZES <- as.integer (c (1, 2, 4, 4, 8, NA, NA, NA, 16, NA, NA, 2)) + + if (is.null (header$interleave)) + header$interleave <- "bsq" + + if (any (is.null (header [c ("samples", "lines", "bands", "data type")]) || + is.na (header [c ("samples", "lines", "bands", "data type")]) )) + stop("Error in ENVI header (required entry missing or incorrect)\n header: ", + paste (names (header), " = ", header, collapse = ", ")) + + if (header$samples <= 0) + stop ("Error in ENVI header: incorrect data size (", header$samples, ")") + if (header$lines <= 0) + stop ("Error in ENVI header: incorrect data size (", header$lines, ")") + if (header$bands <= 0) + stop ("Error in ENVI header: incorrect data size (", header$bands, ")") + + if (!(header$`data type` %in% c (1 : 5, 9, 12))) + stop ("Error in ENVI header: data type incorrect or unsupported (", header$`data type`,")") + + if (is.null (header$`byte order`)){ + header$`byte order` <- .Platform$endian + message (".read.ENVI.bin: 'byte order' not given => Guessing '", + .Platform$endian, "'\n", sep = '') + } + + if (! header$`byte order` %in% c ("big", "little", "swap")) { + header$`byte order` <- as.numeric (header$`byte order`) + if (! header$`byte order` %in% 0 : 1) { + header$`byte order` <- .Platform$endian + warning ("byte order incorrect. Guessing '", .Platform$endian, "'") + } else if (header$`byte order` == 0) + header$`byte order` <- "little" + else + header$`byte order` <- "big" + } + + if (!file.exists (file)) + stop("Binary file not found: ", file) + + f <- file (file, "rb") + if (! is.null (header$`header offset`)) + seek (f, where = header$`header offset`, origin = "start") + + ## size of data point in bytes + size <- DATA_TYPE_SIZES [header$`data type`] + + ## read blocks of data + if (block.lines.skip > 0) { + skip <- switch (tolower (header$interleave), + bil = header$samples * header$bands * block.lines.skip, + bip = header$bands * header$samples * block.lines.skip, + bsq = stop ('skipping of band sequential (BSQ) ENVI files not yet supported. Please contact the maintainer (', + maintainer (pkg = "hyperSpec"), ")."), + stop ("Unknown interleave (", header$interleave, ") - should be one of 'BSQ', 'BIL', 'BIP'.") + ) + + skip <- skip * size + seek (f, where = skip, start = "current") + } + + if (!is.null (block.lines.size)) { + header$lines <- min (block.lines.size, header$lines - block.lines.skip) + } + + ## number of data points to read + n <- header$samples * header$lines * header$bands + + switch(header$`data type`, + spc <- readBin(f, integer (), n = n, size = size, signed = FALSE), + spc <- readBin(f, integer (), n = n, size = size, endian = header$`byte order`), + spc <- readBin(f, integer (), n = n, size = size, endian = header$`byte order`), + spc <- readBin(f, double (), n = n, size = size, endian = header$`byte order`), + spc <- readBin(f, double (), n = n, size = size, endian = header$`byte order`), + stop ("ENVI data type (", header$`data type`, ") unknown"), # 6 unused + stop ("ENVI data type (", header$`data type`, ") unknown"), # 7 unused + stop ("ENVI data type (", header$`data type`, ") unknown"), # 8 unused + spc <- readBin (f, complex(), n = n, size = size, endian = header$`byte order`), + stop ("ENVI data type (", header$`data type`, ") unknown"), # 10 unused + stop ("ENVI data type (", header$`data type`, ") unknown"), # 11 unused + spc <- readBin (f, integer(), n = n, size = size, endian = header$`byte order`, signed = FALSE) + ) + + close(f) + + switch (tolower (header$interleave), + bil = { + dim (spc) <- c (header$samples, header$bands, header$lines); + spc <- aperm (spc, c(3, 1, 2)) + }, + bip = { + dim (spc) <- c (header$bands, header$samples, header$lines); + spc <- aperm (spc, c(3, 2, 1)) + }, + bsq = { + dim (spc) <- c (header$samples, header$lines, header$bands); + spc <- aperm (spc, c(2, 1, 3)) + }, + stop ("Unknown interleave (", + header$interleave, + ", should be one of 'BSQ', 'BIL', 'BIP')") + ) + + dim (spc) <- c (header$samples * header$lines, header$bands) + + spc +} + +# .................................................................................................. + + + +##' @title Import of ENVI data as hyperSpec object +##' +##' @description +##' This function allows ENVI data import as \code{hyperSpec} object. +##' +##' \code{read.ENVI.Nicolet} should be a good starting point for writing custom +##' wrappers for \code{read.ENVI} that take into account your manufacturer's +##' special entries in the header file. +##' +##' @details +##' ENVI data usually consists of two files, an ASCII header and a binary data +##' file. The header contains all information necessary for correctly reading +##' the binary file. +##' +##' I experienced missing header files (or rather: header files without any +##' contents) produced by Bruker Opus' ENVI export. +##' +##' In this case the necessary information can be given as a list in parameter +##' \code{header} instead: +##' +##' \tabular{lll}{ +##' \code{header$} \tab values \tab meaning\cr +##' \code{samples} \tab integer \tab no of columns / spectra in x direction\cr +##' \code{lines} \tab integer \tab no of lines / spectra in y direction\cr +##' \code{bands} \tab integer \tab no of wavelengths / data points per spectrum\cr +##' \code{`data type`} \tab \tab format of the binary file\cr +##' \tab 1 \tab 1 byte unsigned integer \cr +##' \tab 2 \tab 2 byte signed integer \cr +##' \tab 3 \tab 4 byte signed integer \cr +##' \tab 4 \tab 4 byte float \cr +##' \tab 5 \tab 8 byte double \cr +##' \tab 9 \tab 16 (2 x 8) byte complex double \cr +##' \tab 12 \tab 2 byte unsigned integer \cr +##' \code{`header offset`} \tab integer \tab number of bytes to skip before binary data starts\cr +##' \code{interleave} \tab \tab directions of the data cube \cr +##' \tab "BSQ" \tab band sequential (indexing: [sample, line, band])\cr +##' \tab "BIL" \tab band interleave by line (indexing: [sample, line, band])\cr +##' \tab "BIP" \tab band interleave by pixel (indexing: [band, line, sample])\cr +##' \code{`byte order`} \tab 0 or "little" \tab little endian \cr +##' \tab 1 or "big" \tab big endian \cr +##' \tab "swap" \tab swap byte order +##' } +##' +##' Some more information that is not provided by the ENVI files may be given: +##' +##' Wavelength axis and axis labels in the respective parameters. For more +##' information, see \code{\link[hyperSpec]{initialize}}. +##' +##' The spatial information is by default a sequence from 0 to +##' \code{header$samples - 1} and \code{header$lines - 1}, respectively. +##' \code{x} and \code{y} give offset of the first spectrum and step size. +##' +##' Thus, the object's \code{$x} colum is: \code{(0 : header$samples - 1) * x +##' [2] + x [1]}. The \code{$y} colum is calculated analogously. +##' +##' @aliases read.ENVI read.ENVI.Nicolet read.ENVI.HySpex +##' @param file complete name of the binary file +##' @param headerfile name of the ASCII header file. If \code{NULL}, the name +##' of the header file is guessed by looking for a second file with the same +##' basename as \code{file} but \code{hdr} or \code{HDR} suffix. +##' @param header list with header information, see details. Overwrites information extracted from the header file. +##' @param x,y vectors of form c(offset, step size) for the position vectors, +##' see details. +##' @param wavelength,label lists that overwrite the respective information +##' from the ENVI header file. These data is then handed to +##' \code{\link[hyperSpec]{initialize}} +##' @param block.lines.skip,block.lines.size BIL and BIP ENVI files may be read in blocks of lines: +##' skip the first \code{block.lines.skip} lines, then read a block of \code{block.lines.size} +##' lines. If \code{block.lines.NULL}, the whole file is read. +##' Blocks are silently truncated at the end of the file (more precisely: to \code{header$lines}). +##' @param keys.hdr2data determines which fields of the header file should be +##' put into the extra data. Defaults to none. +##' +##' To specify certain entries, give character vectors containing the lowercase +##' names of the header file entries. +##' @param ... currently unused by \code{read.ENVI}, +##' \code{read.ENVI.Nicolet} hands those arguements over to \code{read.ENVI} +##' @param pull.header.lines (internal) flag whether multi-line header entries grouped by curly +##' braces should be pulled into one line each. +##' @return a \code{hyperSpec} object +##' @author C. Beleites, testing for the Nicolet files C. Dicko +##' @seealso \code{caTools::read.ENVI()} +##' +##' \code{\link[hyperSpec]{textio}} +##' @references This function was adapted from +##' \code{caTools::read.ENVI()}: +##' +##' Jarek Tuszynski (2008). caTools: Tools: moving window statistics, GIF, +##' Base64, ROC AUC, etc.. R package version 1.9. +##' @export +##' @keywords IO file +##' @importFrom utils modifyList +read.ENVI <- function (file = stop ("read.ENVI: file name needed"), headerfile = NULL, + header = list (), + keys.hdr2data = FALSE, + x = 0 : 1, y = x, + wavelength = NULL, label = list (), + block.lines.skip = 0, block.lines.size = NULL, ..., + pull.header.lines = TRUE) { + force (y) + + if (! file.exists (file)) + stop ("File not found:", file) + + if (! is.list (header)) # catch a common pitfall + if (is.character (header)) + stop ("header must be a list of parameters. Did you mean headerfile instead?") + else + stop ("header must be a list of parameters.") + + if (is.null (headerfile)) + headerfile <- .find.ENVI.header (file, headerfile) + + tmp <- readLines (headerfile) + tmp <- .read.ENVI.split.header (tmp, pull.lines = pull.header.lines) + header <- modifyList (tmp, header) + + ## read the binary file + spc <- .read.ENVI.bin (file, header, block.lines.skip = block.lines.skip, block.lines.size = block.lines.size) + + ## wavelength should contain the mean wavelength of the respective band + if (! is.null (header$wavelength)) { + header$wavelength <- as.numeric (unlist (strsplit (header$wavelength, "[,;[:blank:]]+"))) + + if (! any (is.na (header$wavelength)) && is.null (wavelength)) + wavelength <- header$wavelength + } + + ## set up spatial coordinates + x <- seq (0, header$samples - 1) * x [2] + x [1] + y <- seq (0, header$lines - 1) * y [2] + y [1] + + block.lines.size <- min (block.lines.size, nrow (spc) / header$samples) + x <- rep (x, each = block.lines.size) + + y <- y [block.lines.skip + seq_len (block.lines.size)] + y <- rep (y, header$samples) + + ## header lines => extra data columns + extra.data <- header [keys.hdr2data] + + if (.options$gc) gc () + + if (length (extra.data) > 0) { + extra.data <- lapply (extra.data, rep, length.out = length (x)) + data <- data.frame (x = x, y = y, extra.data) + } else { + data <- data.frame (x = x, y = y) + } + + if (.options$gc) gc () + + ## finally put together the hyperSpec object + spc <- new ("hyperSpec", data = data, spc = spc, wavelength = wavelength, labels = label) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} + +.test (read.ENVI) <- function (){ + context ("read.ENVI") + + test_that ("full spectrum BIL", { + skip_if_not_fileio_available () + tmp <- read.ENVI ("fileio/ENVI/toy.bil") + expect_equal(tmp$filename [1], "fileio/ENVI/toy.bil") + expect_equal(nrow (tmp), 21913) + expect_equal(ncol (tmp), 4) + expect_equal(nwl (tmp), 4) + expect_equal(range (tmp$x), c (0, 149)) + expect_equal(range (tmp$y), c (0, 166)) + }) + + test_that ("block reading BIL", { + skip_if_not_fileio_available () + tmp <- read.ENVI ("fileio/ENVI/toy.bil", block.lines.skip = 50, block.lines.size = 40) + expect_equal(nrow (tmp), 40*150) + expect_equal(ncol (tmp), 4) + expect_equal(nwl (tmp), 4) + expect_equal(range (tmp$x), c (0, 149)) + expect_equal(range (tmp$y), c (50, 89)) + }) + + test_that ("block reading BIL: block longer than file", { + skip_if_not_fileio_available () + tmp <- read.ENVI ("fileio/ENVI/toy.bil", block.lines.skip = 150, block.lines.size = 50) + expect_equal(tmp$filename [1], "fileio/ENVI/toy.bil") + expect_equal(nrow (tmp), 870) # ! not simple lines x samples multiplication as empty spectra are removed ! + expect_equal(ncol (tmp), 4) + expect_equal(nwl (tmp), 4) + expect_equal(range (tmp$x), c (86, 149)) + expect_equal(range (tmp$y), c (150, 166)) + }) + + test_that ("Guessing messages", { + skip_if_not_fileio_available () + expect_message(read.ENVI ("fileio/ENVI/example2.img"), ".read.ENVI.bin: 'byte order' not given => Guessing 'little'") + }) + + test_that ("empty spectra", { + skip_if_not_fileio_available () + old <- hy.getOption("file.remove.emptyspc") + on.exit(hy.setOptions(file.remove.emptyspc = old)) + + hy.setOptions(file.remove.emptyspc = TRUE) + expect_known_hash(read.ENVI ("fileio/ENVI/example2.img"), "e987ac694ac1d6b81cd070f2f1680887") + + hy.setOptions(file.remove.emptyspc = FALSE) + expect_known_hash(read.ENVI ("fileio/ENVI/example2.img"), "9911a87b8c29c6d23af41a8de5a2508a") + + hy.setOptions(file.remove.emptyspc = old) + }) + +} diff --git a/R/read.asc.Andor.R b/R/read.asc.Andor.R new file mode 100644 index 00000000..f9a167ac --- /dev/null +++ b/R/read.asc.Andor.R @@ -0,0 +1,46 @@ +##' Import Raman Spectra/Maps from Andor Cameras/Solis ASCII files +##' +##' \code{read.asc.Andor} reads Andor Solis ASCII (\code{.asc}) files where the first column gives the wavelength +##' axes and the other columns the spectra. +##' +##' @title File Import Andor Solis +##' @param file filename or connection to ASCII file +##' @param ...,quiet,dec,sep handed to \code{\link[base]{scan}} +##' @return a hyperSpec object +##' @author Claudia Beleites +##' @seealso \code{vignette ("fileio")} for more information on file import and +##' +##' \code{\link{options}} for details on options. +##' @include read.txt.Witec.R +##' @include fileio.optional.R +##' @export +read.asc.Andor <- function (file = stop ("filename or connection needed"), + ..., quiet = TRUE, dec = ".", sep = ","){ + + ## check for valid data connection + .check.con (file = file) + + ## read spectra + tmp <- readLines (file) + nwl <- length (tmp) + txt <- scan (text = tmp, dec = dec, sep = sep, quiet = quiet, ...) + + dim (txt) <- c (length (txt) / nwl, nwl) + + ## fix: Andor Solis may have final comma without values + if (all (is.na (txt [nrow (txt), ]))) + txt <- txt [- nrow (txt), ] + + spc <- new ("hyperSpec", wavelength = txt [1, ], spc = txt [-1, ]) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} + +.test (read.asc.Andor) <- function (){ + context ("read.asc.Andor") + test_that("Andor Solis .asc text files", { + skip_if_not_fileio_available() + expect_known_hash (read.asc.Andor("fileio/asc.Andor/ASCII-Andor-Solis.asc"), "9ead937f51") + }) +} diff --git a/R/read.asc.PerkinElmer.R b/R/read.asc.PerkinElmer.R new file mode 100644 index 00000000..bfe9e4a2 --- /dev/null +++ b/R/read.asc.PerkinElmer.R @@ -0,0 +1,36 @@ +#' File import filter PerkinElmer ASCII spectra +#' +#' Imports a single spectrum in PerkinElmer's ASCII format. This function is experimental. +#' +#' @param file filename (or connection) +#' @param ... further parameters are handed to \code{\link[hyperSpec]{read.txt.long}} +#' +#' @return hyperSpec object +#' @importFrom utils packageDescription +#' @export +#' +read.asc.PerkinElmer <- function (file = stop ("filename or connection needed"), ...){ + content <- readLines(con = file) + + message ("read.asc.PerkinElmer is experimental, hyperSpec so far has no test data for PE .asc files.", + " Please consider submitting your spectrum in an enhancement request to ", packageDescription("hyperSpec")$BugReports, + " in order to help the development of hyperSpec.") + + ## find beginning of DATA section + startDATA <- grep ("DATA", content) + + if (length (startDATA) != 1L) + stop ("read.asc.PerkinElmer so far can deal with single spectra files only.", + " Please file an enhancement request at", packageDescription("hyperSpec")$BugReports, + " with your file as an example or contact the maintainer (", + maintainer ("hyperSpec"), ").") + + ## Spectra values are stored + content <- content [- seq_len(startDATA)] + + spc <- read.txt.long (textConnection(content), header = FALSE, sep = "\t", ...) + spc$filename <- NULL # not meaningful due to textConnection use + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} diff --git a/R/read.ini.R b/R/read.ini.R new file mode 100644 index 00000000..a7b311b2 --- /dev/null +++ b/R/read.ini.R @@ -0,0 +1,52 @@ +##' Read INI files +##' +##' \code{read.ini} reads ini files of the form +##' +##' [section] +##' key = value +##' +##' into a list. +##' +##' \code{read.ini} sanitizes the element names and tries to convert scalars and comma separated +##' numeric vectors to numeric. +##' @export +##' @rdname read-ini +##' @param con connection or file name +##' @param skip number of lines to skip before first \code{[section]} starts +##' @param encoding see \code{\link[base]{readLines}} +##' @author C. Beleites +##' @return a list with one element per section in the .ini file, each containing a list with elements +##' for the key-value-pairs. +##' @keywords IO file + +read.ini <- function (con = stop ("Connection con needed."), skip = NULL, encoding = "unknown"){ + Lines <- readLines (con, encoding = encoding) + ## remove leading lines, if they are not a section + if (!is.null (skip)) + Lines <- Lines [-seq_len (skip)] + + sections <- grep ("[[].*[]]", Lines) + + content <- Lines [- sections] + ini <- as.list (gsub ("^.*=[[:blank:]]*", "", content)) # removes blanks behind equal sign + names (ini) <- .sanitize.name (gsub ("[[:blank:]]*=.*$", "", content)) # see above: removes in front of equal sign + + # try converting to numeric + tmp <- lapply (ini, function (x) strsplit (x, ",") [[1]]) + tmp <- suppressWarnings (lapply (tmp, as.numeric)) + numbers <- ! sapply (tmp, function (x) any (is.na (x))) + ini [numbers] <- tmp [numbers] + + tmp <- rep.int (seq_along (sections), diff (c (sections, length (Lines) + 1)) - 1) + ini <- split (ini, tmp) + + sections <- Lines [sections] + sections <- .sanitize.name (gsub ("^.(.*).$", "\\1", sections)) + names (ini) <- sections + + ini +} + +.sanitize.name <- function (name){ + gsub ("[^a-zA-Z0-9._]", ".", name) +} diff --git a/R/read.jdx.R b/R/read.jdx.R new file mode 100644 index 00000000..ade61dc6 --- /dev/null +++ b/R/read.jdx.R @@ -0,0 +1,386 @@ +##' JCAMP-DX Import for Shimadzu Library Spectra +##' +##' this is a first rough import function for JCAMP-DX spectra. +##' +##' So far, AFFN and PAC formats are supported for simple XYDATA, DATA TABLEs and PEAK TABLEs. +##' +##' NTUPLES / PAGES are not (yet) supported. +##' +##' DIF, DUF, DIFDUP and SQZ data formats are not (yet) supported. +##' +##' @note JCAMP-DX support is incomplete and the functions may change without notice. See +##' `vignette ("fileio")` and the details section. +##' @param filename file name and path of the .jdx file +##' @param encoding encoding of the JCAMP-DX file (used by [base::readLines()]) +##' @param header list with manually set header values +##' @param keys.hdr2data index vector indicating which header entries should be tranfered into the +##' extra data. Usually a character vector of labels (lowercase, without and dashes, blanks, +##' underscores). If `TRUE`, all header entries are read. +##' @param ... further parameters handed to the data import function, e.g. +##' +##' | parameter | meaning | default | +##' | --------- | ----------------------------------------------------------------------------------- | ------- | +##' | `xtol` | tolerance for checking calculated x values against checkpoints at beginning of line | XFACTOR | +##' | `ytol` | tolerance for checking Y values against MINY and MAXY | YFACTOR | +##' +##' @param NA.symbols character vector of text values that should be converted to `NA` +##' @param collapse.multi should hyperSpec objects from multispectra files be collapsed into one +##' hyperSpec object (if `FALSE`, a list of hyperSpec objects is returned). +##' @param wl.tolerance,collapse.equal see [collapse] +##' @return hyperSpec object +##' @author C. Beleites with contributions by Bryan Hanson +##' @md +##' @export +##' @importFrom utils head modifyList maintainer +read.jdx <- function(filename = stop ("filename is needed"), encoding = "", + header = list (), keys.hdr2data = FALSE, ..., + NA.symbols = c ("NA", "N/A", "N.A."), + collapse.multi = TRUE, + wl.tolerance = hy.getOption("wl.tolerance"), collapse.equal = TRUE){ + + ## see readLines help: this way, encoding is translated to standard encoding on current system. + file <- file (filename, "r", encoding = encoding, blocking = FALSE) + jdx <- readLines (file) + close (file) + + ## start & end of spectra header and data + hdrstart <- grep ("^[[:blank:]]*##TITLE=", jdx) + if (length (hdrstart) == 0L) stop ("No spectra found.") + + datastart <- grep (sprintf ("^[[:blank:]]*##(%s)=", paste (.DATA.START, collapse = "|")), jdx) + 1 + # V 4.24 uses ##XYDATA= + # V 5.00 uses ##DATA TABLE= ..., XYDATA + # V 5.01 MPI Golm files use ##PEAK TABLE= + + if (length (datastart) == 0L) stop ("No data found: unsupported data type.") + + dataend <- grep ("^[[:blank:]]*##", jdx) + dataend <- sapply (datastart, function (s) dataend [which (dataend > s)[1]]) - 1 + + spcend <- grep ("^[[:blank:]]*##END=[[:blank:]]*$", jdx) - 1 + + ## some checks + stopifnot (length (datastart) >= length (hdrstart)) + stopifnot (length (datastart) == length (dataend)) + stopifnot (all (hdrstart < spcend)) + stopifnot (all (datastart < dataend)) + + spc <- vector ("list", length (datastart)) + + for (s in seq_along (datastart)){ + ## look for header data + hdr <- modifyList (header, .jdx.readhdr (jdx [hdrstart [s] : (datastart [s] - 1)])) + + if (! is.null (hdr$page) || ! is.null (hdr$ntuples)) + stop ("NTUPLES / PAGEs are not yet supported.") + + if (s == 1L) { ## file header may contain overall settings + hdr <- modifyList (list (file = as.character (filename)), hdr) + header <- hdr [! names (hdr) %in% .key2names (.DATA.START)] + } + + ## evaluate data block + + if (grepl ("[A-DF-Za-df-z%@]", jdx[datastart [s]])) + stop ("SQZ, DIF, and DIFDUP forms are not yet supported.") + + spc [[s]] <- switch (hdr$.format, + `(X++(Y..Y))`= .jdx.TABULAR.PAC (hdr, jdx [datastart [s] : spcend [s]], ...), + `(XY..XY)` = .jdx.TABULAR.AFFN (hdr, jdx [datastart [s] : spcend [s]], ...), + + stop ("unknown JCAMP-DX data format: ", hdr$xydata) + ) + + ## process according to header entries + spc [[s]] <- .jdx.processhdr (spc [[s]], hdr, keys.hdr2data, ..., NA.symbols = NA.symbols) + } + + if (length (spc) == 1L) + spc <- spc [[1]] + else if (collapse.multi) + spc <- collapse (spc, wl.tolerance = wl.tolerance, collapse.equal = collapse.equal) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filename) +} + +### HEADER ------------------------------------------------------------------------------------------ + +.jdx.readhdr <- function (hdr){ + + ## get rid of comments. JCAMP-DX comments start with $$ and go to the end of the line. + hdr <- hdr [! grepl ("^[[:blank:]]*[$][$]", hdr)] + hdr <- gsub ("([[:blank:]][$][$].*)$", "", hdr) + + ## now join lines that are not starting with ##KEY= with the KEYed line before + nokey <- grep ("^[[:blank:]]*##.*=", hdr, invert = TRUE) + if (length (nokey) > 0) { + for (l in rev (nokey)) # these are few, so no optimization needed + hdr [l - 1] <- paste (hdr [(l - 1) : l], collapse = " ") + hdr <- hdr [-nokey] + } + + names <- .key2names (sub ("^[[:blank:]]*##(.*)=.*$", "\\1", hdr)) + + hdr <- sub ("^[[:blank:]]*##.*=[[:blank:]]*(.*)[[:blank:]]*$", "\\1", hdr) + hdr <- gsub ("^[\"'[:blank:]]*([^\"'[:blank:]].*[^\"'[:blank:]])[\"'[:blank:]]*$", "\\1", hdr) + i <- grepl ("^[[:blank:]]*[-+]?[.[:digit:]]*[eE]?[-+]?[.[:digit:]]*[[:blank:]]*$", hdr) & + ! names %in% c ("title", "datatype", "owner") + hdr <- as.list (hdr) + hdr [i] <- as.numeric (hdr [i]) + names (hdr) <- names + + ## e.g. Shimadzu does not always save XFACTOR and YFACTOR + if (is.null (hdr$yfactor)) hdr$yfactor <- 1 + if (is.null (hdr$xfactor)) hdr$xfactor <- 1 + + ## we treat XYDATA and PEAK TABLEs the same way + format <- hdr [names (hdr) %in% .key2names (.DATA.START)] + format <- format [! sapply (format, is.null)] + if (length (format) != 1) + stop ("contradicting format specification: please contact the maintainer (", + maintainer ("hyperSpec"), + "supplying the file you just tried to load.") + + hdr$.format <- format [[1]] + + hdr +} + +.jdx.processhdr <- function (spc, hdr, keys, ..., ytol = abs (hdr$yfactor), NA.symbols){ + + ## hdr$xfactor and $yfactor applied by individual reading functions + + ## check Y values + miny <- min (spc@data$spc) + if (! is.null (hdr$miny) && abs (hdr$miny - miny) > ytol) + message (sprintf ("JDX file inconsistency: Minimum of spectrum != MINY: difference = %0.3g (%0.3g * YFACTOR)", + miny - hdr$miny, + (miny - hdr$miny) / hdr$yfactor)) + + maxy <- max (spc@data$spc) + if (! is.null (hdr$maxy) && abs (hdr$maxy - maxy) > ytol) + message (sprintf ("JDX file inconsistency: Maximum of spectrum != MAXY: difference = %0.3g (%0.3g * YFACTOR)", + maxy - hdr$maxy, + (maxy - hdr$maxy) / hdr$yfactor)) + + + spc@label$.wavelength <- .jdx.xunits (hdr$xunits) + spc@label$spc <- .jdx.yunits (hdr$yunits) + + ## CONCENTRATIONS + if ("concentrations" %in% keys) + spc <- .jdx.hdr.concentrations (spc, hdr, NA.symbols = NA.symbols) + + # delete header lines already processed + hdr[c ("jcampdx", "xunits", "yunits", "xfactor", "yfactor", "firstx", "lastx", "npoints", + "firsty", "xydata", "end", "deltax", "maxy", "miny", + "concentrations")] <- NULL + if (is.character (keys)) + keys <- keys [keys %in% names (hdr)] + hdr <- hdr [keys] + + if (length (hdr) > 0L) + spc@data <- cbind (spc@data, hdr) + + spc +} + +### DATA FORMATS ------------------------------------------------------------------------------------ + +.jdx.TABULAR.PAC <- function (hdr, data, ..., xtol = hdr$xfactor){ + + ## regexp for numbers including scientific notation + .PATTERN.number <- "[-+]?[0-9]*[.]?[0-9]*([eE][-+]?[0-9]+)?" + if (is.null (hdr$firstx)) stop ("##FIRSTX= missing.") + if (is.null (hdr$lastx)) stop ("##LASTX= missing.") + if (is.null (hdr$npoints)) stop ("##NPOINTS= missing.") + + wl <- seq (hdr$firstx, hdr$lastx, length.out = hdr$npoints) + + ## remove starting X + y <- sub (paste0 ("^[[:blank:]]*", .PATTERN.number, "[[:blank:]]*(.*)$"), "\\2", data) + + ## add spaces between numbers if necessary + y <- gsub ("([0-9.])([+-])", "\\1 \\2", y) + + y <- strsplit (y, "[[:blank:]]+") + ny <- sapply (y, length) + + y <- as.numeric (unlist (y)) + + + if (length (y) != hdr$npoints) + stop ("mismatch between ##NPOINTS and length of Y data.") + + ## X checkpoints + x <- sub (paste0 ("^[[:blank:]]*(", .PATTERN.number, ")[[:blank:]]*.*$"), "\\1", data) + x <- as.numeric (x) * hdr$xfactor + diffx <- abs (wl [c (1, head (cumsum (ny) + 1, -1))] - x) + if (any (diffx > xtol)) + message ("JDX file inconsistency: X axis differs from checkpoints. ", + sprintf ("Maximum difference = %0.2g (%0.2g * XFACTOR)", + max (diffx), max (diffx) / hdr$xfactor)) + + y <- y * hdr$yfactor + + new ("hyperSpec", spc = y, wavelength = wl) +} + +.jdx.TABULAR.AFFN <- function (hdr, data, ...){ + + data <- strsplit (data, "[,;[:blank:]]+") + data <- unlist (data) + data <- matrix (as.numeric (data), nrow = 2) + + new ("hyperSpec", wavelength = data [1,] * hdr$xfactor, spc = data [2,]*hdr$yfactor) +} + +### UNITS ------------------------------------------------------------------------------------------- + +.jdx.xunits <- function (xunits){ + if (is.null (xunits)) + NULL + else + switch (tolower (xunits), + `1/cm` = expression (tilde (nu) / cm^-1), + micrometers = expression (`/` (lambda, micro * m)), + nanometers = expression (lambda / nm), + seconds = expression (t / s), + xunits) +} + +.jdx.yunits <- function (yunits){ + if (is.null (yunits)) + NULL + + else + switch (tolower (yunits), + transmittance = "T", + reflectance = "R", + absorbance = "A", + `kubelka-munk` = expression (`/` (1 - R^2, 2*R)), + `arbitrary units` = "I / a.u.", + yunits) +} + +## HDR processing functions +.jdx.hdr.concentrations <- function (spc, hdr, NA.symbols){ + + hdr <- strsplit (hdr$concentrations, "[)][[:blank:]]*[(]")[[1]] + hdr [length (hdr)] <- gsub (")$", "", hdr [length (hdr)]) + if (hdr [1] == "(NCU") + hdr <- hdr [-1] + else + message ("Unknown type of concentration specification in JDX file: ", hdr [1], ")") + + hdr <- simplify2array (strsplit (hdr, ",")) + hdr [hdr %in% NA.symbols] <- NA + + ## names + N <- hdr [1,] + N <- sub ("^([^[:alpha:]]*)", "", N) + N <- sub ("([^[:alpha:]]*)$", "", N) + N <- gsub ("([^[:alnum:]_-])", ".", N) + + ## concentrations + C <- t (as.numeric (hdr [2,])) + colnames (C) <- N + C <- as.data.frame (C) + spc@data <- cbind (spc@data, C) + + ## units + U <- as.list (hdr [3,]) + names (U) <- N + + spc@label <- modifyList (spc@label, U) + + spc +} + +## helpers +.DATA.START <- c ("XYDATA", "DATA TABLE", "PEAK TABLE") + +.key2names <- function (key){ + gsub ("[[:blank:]_-]", "", tolower (key)) +} + + +.test (read.jdx) <- function (){ + context ("test-read.jdx") + + files <- c (Sys.glob ("fileio/jcamp-dx/*.DX"), Sys.glob ("fileio/jcamp-dx/*.dx"), + Sys.glob ("fileio/jcamp-dx/*.jdx"), Sys.glob ("fileio/jcamp-dx/*.JCM"), + Sys.glob ("fileio/jcamp-dx/PE-IR/*.DX"), + "fileio/jcamp-dx/GMD_20111121_MDN35_ALK_JCAMP-shortened.txt" # MPI Golm, long version one is *slow* to read and exceeds memory limit + ) + + ## these files need special parameters: + files <- setdiff (files, c ("fileio/jcamp-dx/shimadzu.jdx", "fileio/jcamp-dx/virgilio.jdx")) + + test_that ("JCAMP-DX examples that need particular parameter sets",{ + skip_if_not_fileio_available () + + expect_known_hash(read.jdx ("fileio/jcamp-dx/shimadzu.jdx", encoding = "latin1", keys.hdr2data=TRUE), + "55c392d767f7a7f268e55540d4496fb1") + expect_known_hash(read.jdx ("fileio/jcamp-dx/virgilio.jdx", ytol = 1e-9), + "da4a725d23efe4a1888496f1739294c2") + }) + + unsupported <- c ("fileio/jcamp-dx/BRUKER2.JCM", + "fileio/jcamp-dx/BRUKER1.JCM", + "fileio/jcamp-dx/TESTSPEC.DX", + "fileio/jcamp-dx/TEST32.DX", + "fileio/jcamp-dx/SPECFILE.DX", + "fileio/jcamp-dx/ISAS_MS2.DX", + "fileio/jcamp-dx/ISAS_MS3.DX", # NTUPLES + "fileio/jcamp-dx/BRUKSQZ.DX", + "fileio/jcamp-dx/BRUKDIF.DX", + "fileio/jcamp-dx/BRUKNTUP.DX", # NTUPLES + "fileio/jcamp-dx/ISAS_CDX.DX", # PEAK ASSIGNMENTS= (XYMA) + "fileio/jcamp-dx/TESTFID.DX", # NTUPLES + "fileio/jcamp-dx/TESTNTUP.DX" # NTUPLES + ) + + checksums <- c (`fileio/jcamp-dx/AMA1.DX` = '5e8523b7022ec26cfb2541fdf929e997', + `fileio/jcamp-dx/AMA2.DX` = 'b336f71c592bc81de04d27bbbb9ede52', + `fileio/jcamp-dx/AMA3.DX` = '34344a42a232227c14ab5de5dc04e096', + `fileio/jcamp-dx/br_154_1.DX` = '232ef45bf818221c05927e311ac407a3', + `fileio/jcamp-dx/BRUKAFFN.DX` = '2498cac17635ad21e4998a3e3e7eebfa', + `fileio/jcamp-dx/BRUKPAC.DX` = '401cbaa375b79323ed0dcc30a135d11d', + `fileio/jcamp-dx/IR_S_1.DX` = '8d7032508efaf79fcc955f888d60cd8f', + `fileio/jcamp-dx/ISAS_MS1.DX` = '43017647aa339d8e7aaf3fadbdbbf065', + `fileio/jcamp-dx/LABCALC.DX` = '55ffdb250279aee967b2f65bbbf7dd5e', + `fileio/jcamp-dx/PE1800.DX` = '31ac39a5db243c3aa01e1978b9ab1aa3', + `fileio/jcamp-dx/testjose.dx` = '3b229eb9b8f229acd57783328d36a697', + `fileio/jcamp-dx/sign-rustam.jdx` = '386bf0b94baa5007e11e6af294895012', + `fileio/jcamp-dx/PE-IR/br_1.DX` = 'ab5fa92227625c287871d9e95091c364', + `fileio/jcamp-dx/PE-IR/br_2.DX` = 'eff5a1b37121a8902c0e62ebb5de0013', + `fileio/jcamp-dx/PE-IR/br_3.DX` = '2762712b1317631d32969624c97fa940', + `fileio/jcamp-dx/PE-IR/br_4.DX` = '11ddb20e9f6676f709827ececda360ab', + `fileio/jcamp-dx/PE-IR/br_5.DX` = 'ffa08204bfb2521dd8caa9d286eba519', + `fileio/jcamp-dx/PE-IR/fort_1.DX` = 'e808e243ae646c0526ba009f3ac3f80a', + `fileio/jcamp-dx/PE-IR/fort_2.DX` = 'df90e70f203294c8bfeac7a6141a552d', + `fileio/jcamp-dx/PE-IR/fort_3.DX` = 'd43a2c4fbb2598a5028a1406f83e3c3d', + `fileio/jcamp-dx/PE-IR/fort_4.DX` = '5382afba5c8b7fffdc26f00e129035c7', + `fileio/jcamp-dx/PE-IR/fort_5.DX` = '745c8b0fdad48a945e084d6e6cb9f0c6', + `fileio/jcamp-dx/PE-IR/lp_1.DX` = 'bcb0a1e1150bcd038a3e0e0e5a896b2b', + `fileio/jcamp-dx/PE-IR/lp_2.DX` = '7bc1c53f1363b2b02374442a1e8baa74', + `fileio/jcamp-dx/PE-IR/lp_3.DX` = 'eaa58c46360be604169e979c0fe2caeb', + `fileio/jcamp-dx/PE-IR/lp_4.DX` = '3b8d54eca48095d3f6c3eafc7b903a25', + `fileio/jcamp-dx/PE-IR/lp_5.DX` = 'a0eaa3ca11fb5a0dde83fa01296d72db', + `fileio/jcamp-dx/GMD_20111121_MDN35_ALK_JCAMP-shortened.txt` = 'fd2e686f5dc78691c22033805ed56463' + ) + + + test_that("JCAMP-DX example files", { + skip_if_not_fileio_available () + for (f in files [! files %in% unsupported]) { + spc <- read.jdx (f, ytol = 1e-6) + ## for wholesale updating of hashes (e.g. due to changes in initialize) + ## output filename hash pairs: + #cat (sprintf ("`%s` = '%s',\n", f, digest (spc))) + expect_known_hash(spc, checksums [f]) + } + }) +} \ No newline at end of file diff --git a/R/read.mat.Cytospec.R b/R/read.mat.Cytospec.R new file mode 100644 index 00000000..6ebd8690 --- /dev/null +++ b/R/read.mat.Cytospec.R @@ -0,0 +1,83 @@ +##' Import for Cytospec mat files +##' +##' These functions allow to import .mat (Matlab V5) files written by Cytospec. +##' +##' \code{read.cytomat} has been renamed to \code{read.mat.Cytospec} and is now +##' deprecated. Use \code{read.mat.Cytospec} instead. +##' +##' @param file The complete file name (or a connection to) the .mat file. +##' @param keys2data specifies which elements of the \code{Info} should be +##' transferred into the extra data +##' @param blocks which blocks should be read? \code{TRUE} reads all blocks. +##' @param ... \code{read.cytomat} for now hands all arguments to +##' \code{read.mat.Cytospec} for backwards compatibility. +##' @note This function is an ad-hoc implementation and subject to changes. +##' @return hyperSpec object if the file contains a single spectra block, +##' otherwise a list with one hyperSpec object for each block. +##' @author C. Beleites +##' @rdname read.mat.Cytospec +##' @seealso \code{R.matlab::readMat} +##' @export +##' @keywords IO file +read.mat.Cytospec <- function (file, keys2data = FALSE, blocks = TRUE) { + if (! requireNamespace ("R.matlab")) + stop ("package 'R.matlab' needed.") + + tmp <- R.matlab::readMat(file) + + ## read spectra matrix + spc <- tmp$C + d <- dim (spc) + + ## get wavelength information + fileinfo<-(tmp$Info[[1]]) + lwn <- as.numeric (fileinfo [grep ("LWN", fileinfo) - 1]) + hwn <- as.numeric (fileinfo [grep ("VWN", fileinfo) - 1]) + wn <- seq (lwn, hwn, length.out = dim (spc)[3]) + + ## x + y coordinates + x <- rep (1 : d [1], d [2]) + y <- rep (1 : d [2], each = d [1]) + + extra.data <- data.frame (x = x, y = y) + + nblocks <- d [4] + if (is.na (nblocks)) { # only one block => 3d array + nblocks <- 1 + dim (spc) <- c (dim (spc), 1L) + } + + blocks <- seq (nblocks) [blocks] + + if (any (is.na (blocks))) { + warning ("Dropping requests to unavailable blocks.") + blocks <- blocks [! is.na (blocks)] + } + + if (length (blocks) == 1L) { + result <- .block2hyperSpec (spc, extra.data, wn, blocks, file) + } else { + result <- list () + for (b in blocks) + result [[b]] <- .block2hyperSpec (spc, extra.data, wn, b, file) + } + + ## consistent file import behaviour across import functions + ## .fileio.optional is called inside .block2hyperSpec + + result +} + +.block2hyperSpec <- function (spc, df, wn, block, file) { + spc <- spc [,,, block] + + d <- dim (spc) + dim (spc) <- c (d [1] * d[2], d [3]) + + df$block <- block + + ## consistent file import behaviour across import functions + .fileio.optional (new ("hyperSpec", spc = spc, wavelength = wn, data = df), + filename = file) +} + diff --git a/R/read.mat.Witec.R b/R/read.mat.Witec.R new file mode 100644 index 00000000..c2722379 --- /dev/null +++ b/R/read.mat.Witec.R @@ -0,0 +1,24 @@ +## ' @export +##' @importFrom utils maintainer +read.mat.Witec <- function (file = stop ("filename or connection needed")){ + if (! requireNamespace ("R.matlab")) + stop ("package 'R.matlab' needed.") + + data <- R.matlab::readMat (file) + + if (length (data) > 1L) + stop ("Matlab file contains more than 1 object. This should not happen.\n", + "If it is nevertheless a WITec exported .mat file, please contact the ", + "maintainer (", maintainer("hyperSpec"), ") with\n", + "- output of `sessionInfo ()` and\n", + "- an example file") + spcname <- names (data) + data <- data [[1]] + + spc <- new ("hyperSpec", spc = data$data) + + spc$spcname <- spcname + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} diff --git a/R/read.spc.Kaiser.R b/R/read.spc.Kaiser.R new file mode 100644 index 00000000..024f64e4 --- /dev/null +++ b/R/read.spc.Kaiser.R @@ -0,0 +1,102 @@ +##' Import functions for Kaiser Optical Systems .spc files +##' +##' \code{read.spc.Kaiser} imports sets of .spc files written by Kaiser Optical Systems' Hologram +##' software. It may also serve as an example how to write wrapper functions for \code{read.spc} to +##' conveniently import specialized sets of .spc files. +##' +##' @title read Kaiser .spc files +##' @export +##' @rdname read-spc-Kaiser +##' @param files If \code{glob = TRUE}, \code{filename} can contain wildcards. +##' Thus all files matching the name pattern in \code{filename} can be +##' specified. +##' @param glob If \code{TRUE} the filename is interpreted as a wildcard +##' containing file name pattern and expanded to all matching file names. +##' @param keys.log2data,... All further arguments are handed over directly to \code{\link{read.spc}}. +##' @return hyperSpec +##' @examples +##' ## for examples, please see `vignette ("fileio", package = "hyperSpec")`. + +read.spc.Kaiser <- function (files, ..., glob = TRUE) { + + if (glob) + files <- Sys.glob (files) + + if (length (files) == 0){ + warning ("No files found.") + return (new ("hyperSpec")) + } + + f <- files [1] + + spc <- read.spc (f, no.object = TRUE, ...) + + data <- spc$data [rep (1L, length (files)),, drop = FALSE] + + spc$spc <- spc$spc [rep (1L, length (files)), , drop = FALSE] + + for (f in seq_along (files)){ + tmp <- read.spc (files [f], no.object = TRUE, ...) + + data [f, ] <- tmp$data + spc$spc [f, ] <- tmp$spc + } + + data$filename <- files + + spc <- new ("hyperSpec", wavelength = spc$wavelength, spc = spc$spc, data = data, + labels = tmp$label) + ## consistent file import behaviour across import functions + ## filenames already set + .fileio.optional(spc, file.keep.name = FALSE) +} + +##' \code{read.spc.KaiserMap} is a wrapper for \code{read.spc.Kaiser} with predefined \code{log2data} +##' to fetch the stage position for each file. +##' @rdname read-spc-Kaiser +##' @export +read.spc.KaiserMap <- function (files, keys.log2data = NULL, ...) { + keys.log2data <- c ('Stage_X_Position','Stage_Y_Position','Stage_Z_Position', keys.log2data) + + spc <- read.spc.Kaiser (files, keys.log2data = keys.log2data, ...) + + spc@data <- spc@data [, ! colnames (spc@data) %in% c ("z", "z.end"), drop = FALSE] + + colnames (spc@data) <- gsub ("Stage_(.)_Position", "\\L\\1", colnames (spc@data), perl = TRUE) + for (cln in c ("x", "y", "z")) + spc@data [[cln]] <- as.numeric (spc@data [[cln]]) + + spc@label$x <- expression (`/` (x, micro * m)) + spc@label$y <- expression (`/` (y, micro * m)) + spc@label$z <- expression (`/` (z, micro * m)) + spc@label$z.end <- NULL + + spc +} + +##' \code{read.spc.KaiserLowHigh} is a wrapper for \code{read.spc.Kaiser} for raw data that is saved +##' in separate files for low and high wavenumber range. The wavelength axis holds the pixel +##' numbers, which repeat for low and high wavenumber ranges. +##' +##' @rdname read-spc-Kaiser +##' @param type what kind of measurement was done? If \code{"map"}, \code{read.spc.KaiserMap} is used +##' instead of \code{read.spc.Kaiser}. +##' @export +read.spc.KaiserLowHigh <- function (files = stop ("file names needed"), + type = c ("single", "map"), + ..., glob = TRUE) { + + if (glob) + files <- Sys.glob (files) + + files <- matrix (files, nrow = 2) + + type <- match.arg (type) + switch (type, + single = cbind (read.spc.Kaiser (files [1,], ..., glob = FALSE), + read.spc.Kaiser (files [2,], ..., glob = FALSE)), + map = cbind (read.spc.KaiserMap (files [1,], ..., glob = FALSE), + read.spc.KaiserMap (files [2,], ..., glob = FALSE)) + ) + +} diff --git a/R/read.spc.R b/R/read.spc.R new file mode 100644 index 00000000..e3638df5 --- /dev/null +++ b/R/read.spc.R @@ -0,0 +1,926 @@ +### read.spc - Import Thermo Galactic's .spc file format into an hyperSpec Object +### +### C. Beleites 2009/11/29 +### +##################################################################################################### + +## Define constants --------------------------------------------------------------------------------- + +.nul <- as.raw (0) + +## header sizes +.spc.size <- c (hdr = 512, subhdr = 32, subfiledir = 12, loghdr = 64) + +.spc.default.keys.hdr2data <- c('fexper', 'fres', 'fsource') +.spc.default.keys.log2data <- FALSE + +## axis labeling ------------------------------------------------------------------------------------ + +## x-axis units ..................................................................................... +.spc.FXTYPE <- c (expression (`/` (x, "a. u.")), #0 + expression (`/` (tilde (nu), cm^-1)), + expression (`/` (lambda, (mu * m))), + expression (`/` (lambda, nm)), + expression (`/` (t, s)), + expression (`/` (t, min)), + expression (`/` (f, Hz)), + expression (`/` (f, kHz)), + expression (`/` (f, MHz)), + expression (`/` (frac (m, z), frac (u, e))), + expression (`/` (delta, ppm)), # 10 + expression (`/` (t, d)), + expression (`/` (t, a)), + expression (`/` (Delta*tilde (nu), cm^-1)), + expression (`/` (E, eV)), + NA, # old version file uses label in gcatxt + 'Diode No', + 'Channel', + expression (`/` (x, degree)), + expression (`/` (T, degree*F)), + expression (`/` (T, degree*C)), # 20 + expression (`/` (T, K)), + 'Data Point', + expression (`/` (t, ms)), + expression (`/` (t, micro*s)), + expression (`/` (t, ns)), + expression (`/` (f, GHz)), + expression (`/` (lambda, cm)), + expression (`/` (lambda, m)), + expression (`/` (lambda, mm)), + expression (`/` (t, h)) # 30 +) + +.spc.xlab <- function (x) { + if (is.character (x)) + x + else if (x <= length (.spc.FXTYPE) + 1) + .spc.FXTYPE [x + 1] + else + ## x = 255 is for double interferogram and supposed not to have a label. + ## Thus, returning NA is appropriate + NA +} + +## y-axis units ..................................................................................... +.spc.FYTYPE <- c (expression (`/` (I[Ref], "a. u.")), # -1 + expression (`/` (I, "a. u.")), + expression (`/` (I[IGRM], "a. u.")), + 'A', + expression (frac ((1 - R)^2, 2 * R)), + 'Counts', + expression (`/` (U, V)), + expression (`/` (y, degree)), + expression (`/` (I, mA)), + expression (`/` (l, mm)), + expression (`/` (U, mV)), + expression (-log (R)), # 10 + expression (`/` (y, '%')), + expression (`/` (I, 'a. u.')), + expression (I / I[0]), + expression (`/` (E, J)), + NA, # old version file uses label in gcatxt + expression (`/` (G, dB)), + NA, # old version file uses label in gcatxt + NA, # old version file uses label in gcatxt + expression (`/` (T, degree*F)), + expression (`/` (T, degree*C)), # 20 + expression (`/` (T, K)), + 'n', + 'K', # extinction coeaffictient + expression (Re (y)), + expression (Im (y)), + 'y (complex)', # complex + 'T', + 'R', + expression (`/` (I, 'a. u.')), + expression (`/` (I[Emission], 'a. u.')) +) +.spc.ylab <- function(x){ + if (is.character (x)) + x + else if (x <= 26) + .spc.FYTYPE [x + 2] + else if (x %in% 128 : 131) + .spc.FYTYPE [x - 99] + else + NA +} + +## helper functions --------------------------------------------------------------------------------- +### raw.split.nul - rawToChar conversion, splitting at \0 +##' @importFrom utils tail +raw.split.nul <- function (raw, trunc = c (TRUE, TRUE), firstonly = FALSE, paste.collapse = NULL) { + # todo make better truncation + trunc <- rep (trunc, length.out = 2) + + if (trunc [1] && raw [1] == .nul) + raw <- raw [-1] + if (trunc [2]) { + tmp <- which (raw > .nul) + if (length (tmp) == 0) + return ("") + raw <- raw [1 : tail (tmp, 1)] + } + if (raw [length (raw)] != .nul) + raw <- c (raw , .nul) + + tmp <- c (0, which (raw == .nul)) + + out <- character (length (tmp) - 1) + for (i in 1 : (length (tmp) - 1)) + if (tmp [i] + 1 < tmp [i + 1] - 1) + out [i] <- rawToChar (raw [(tmp [i] + 1) : (tmp [i + 1] - 1)]) + + if (length (out) > 1L){ + if (firstonly){ + + message ("multiple strings encountered in spc file ", paste (out, collapse = ", "), ": using only the first one.") + out <- out [1] + + } else if (! is.null (paste.collapse)){ + + if (hy.getOption ("debuglevel") > 2L) + message ("multiple strings encountered in spc file ", paste (out, collapse = ", "), " => pasting.") + + out <- paste (out, collapse = paste.collapse) + } + } + + out +} + +## file part reading functions ---------------------------------------------------------------------- + +## read file header ................................................................................. +## +## + +##' @importFrom utils maintainer +.spc.filehdr <- function (raw.data) { + ## check file format + + ## Detect Shimadzu SPC (which is effectively a variant of OLE CF format) + if (isTRUE (all.equal ( + raw.data[1:4], + as.raw(c('0xD0', '0xCF', '0x11', '0xE0')) + ))){ + stop ('Support for Shimadzu SPC file format (OLE CF) is not yet implemented') + } + + ## NEW.LSB = 75 supported, + ## NEW.MSB = 76 not supported (neither by many Grams software according to spc doc) + ## OLD = 77 not supported (replaced by new format in 1996) + if (raw.data [2] != 75) + stop ("Wrong spc file format version (or no spc file at all).\n", + "Only 'new' spc files (1996 file format) with LSB word order are supported.") + + hdr <- list (ftflgs = readBin (raw.data [ 1], "integer", 1, 1, signed = FALSE), + ## byte 2 is already interpreted + fexper = readBin (raw.data [ 3], "integer", 1, 1, signed = TRUE ), + fexp = readBin (raw.data [ 4], "integer", 1, 1, signed = TRUE ), + fnpts = readBin (raw.data [ 5 : 8], "integer", 1, 4 ), + ffirst = readBin (raw.data [ 9 : 16], "double", 1, 8 ), + flast = readBin (raw.data [ 17 : 24], "double", 1, 8 ), + fnsub = readBin (raw.data [ 25 : 28], "integer", 1, 4 ), + fxtype = readBin (raw.data [ 29], "integer", 1, 1, signed = FALSE), + fytype = readBin (raw.data [ 30], "integer", 1, 1, signed = FALSE), + fztype = readBin (raw.data [ 31], "integer", 1, 1, signed = FALSE), + fpost = readBin (raw.data [ 32], "integer", 1, 1, signed = TRUE ), + fdate = readBin (raw.data [ 33 : 36], "integer", 1, 4 ), + fres = raw.split.nul (raw.data [ 37 : 45], paste.collapse = "\r\n"), + fsource = raw.split.nul (raw.data [ 46 : 54], paste.collapse = "\r\n"), + fpeakpt = readBin (raw.data [ 55 : 56], "integer", 1, 2, signed = FALSE), + fspare = readBin (raw.data [ 57 : 88], "numeric", 8, 4 ), + fcmnt = raw.split.nul (raw.data [ 89 : 218], paste.collapse = "\r\n"), + fcatxt = raw.split.nul (raw.data [219 : 248], trunc = c (FALSE, TRUE) ), + flogoff = readBin (raw.data [249 : 252], "integer", 1, 4), #, signed = FALSE), + fmods = readBin (raw.data [253 : 256], "integer", 1, 4), #, signed = FALSE), + fprocs = readBin (raw.data [ 257], "integer", 1, 1, signed = TRUE ), + flevel = readBin (raw.data [ 258], "integer", 1, 1, signed = TRUE ), + fsampin = readBin (raw.data [259 : 260], "integer", 1, 2, signed = FALSE), + ffactor = readBin (raw.data [261 : 264], "numeric", 1, 4 ), + fmethod = raw.split.nul (raw.data [265 : 312]), + fzinc = readBin (raw.data [313 : 316], "numeric", 1, 4), #, signed = FALSE), + fwplanes = readBin (raw.data [317 : 320], "integer", 1, 4), #, signed = FALSE), + fwinc = readBin (raw.data [321 : 324], "numeric", 1, 4 ), + fwtype = readBin (raw.data [ 325], "integer", 1, 1, signed = TRUE ), + ## 187 bytes reserved + .last.read = .spc.size ['hdr'] + ) + + ## R doesn't have unsigned long int ................................. + if (any (unlist (hdr [c ("flogoff", "fmods", "fwplanes")]) < 0)) + stop ("error reading header: R does not support unsigned long integers.", + "Please contact the maintainer of the package.") + + + + ## do some post processing .......................................... + + experiments <- c ("General", "Gas Chromatogram", "General Chromatogram", "HPLC Chromatogram", + "NIR Spectrum", "UV-VIS Spectrum", "* reserved *", "X-ray diffraction spectrum", + "Mass Spectrum", "NMR Spectrum", "Raman Spectrum", "Fluorescence Spectrum", + "Atomic Spectrum", "Chroatography Diode Array Data") + hdr$fexper <- factor (hdr$fexper + 1, levels = seq_along (experiments)) + levels (hdr$fexper) <- experiments + + hdr$ftflgs <- .spc.ftflags (hdr$ftflgs) + + hdr$fdate <- ISOdate (year = hdr$fdate %/% 1048560, + month = hdr$fdate %/% 65536 %% 16, + day = hdr$fdate %/% 2048 %% 32, + hour = hdr$fdate %/% 64 %% 32, + min = hdr$fdate %% 64) + + ## interferogram ? + ## if not, hdr$fpeakpt is set to NULL + if (hdr$fytype == 1){ + if (hdr$fpeakpt != 0) + hdr$fpeakpt <- hdr$fpeakpt + 1 + } else { + hdr$fpeakpt <- NULL + } + + ## set the axis labels + if (hdr$ftflgs ['TALABS']) { + # TODO: find test data + tmp <- rep (0, 4) + tmp [seq_along (hdr$fcatxt)] <- nchar (hdr$fcatxt) + + if (tmp [1] > 0) hdr$fxtype <- hdr$fcatxt[1] + if (tmp [2] > 0) hdr$fytype <- hdr$fcatxt[2] + if (tmp [3] > 0) hdr$fztype <- hdr$fcatxt[3] + if (tmp [4] > 0) hdr$fwtype <- hdr$fcatxt[4] + } + hdr$fxtype <- .spc.xlab (hdr$fxtype) + hdr$fytype <- .spc.ylab (hdr$fytype) + hdr$fztype <- .spc.xlab (hdr$fztype) + hdr$fwtype <- .spc.xlab (hdr$fwtype) + + + ## File with subfiles with individual x axes? + ## Then there should be a subfile directory: + if (hdr$ftflgs ['TXYXYS'] && hdr$ftflgs ['TMULTI']){ + ## try to reject impossible values for the subfiledir offset + if (hdr$fnpts > length (raw.data) || + (hdr$fnpts > hdr$flogoff && hdr$flogoff > 0) || + hdr$fnpts < 512) + .spc.error (".spc.read.hdr", list (hdr = hdr), + "file header flags specify TXYXYS and TMULTI, ", + "but fnpts does not give a valid offset for the subfile directory.\n hdr$ftflgs = ", + paste (names (hdr$ftflgs)[hdr$ftflgs], collapse = " | "), + " (", sum (2^(0:7) [hdr$ftflgs]) , ")\n", + "You can try to read the file using hdr$ftflgs & ! TXYXYS (", + sum (2^(0 : 7) [hdr$ftflgs & c (TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE)]), + "). This assumes that all subfiles do have the same x axis.\n\n") + + hdr$subfiledir <- hdr$fnpts + hdr$fnpts <- 0 + } else { + hdr$subfiledir <- 0 + } + + + ## some checks ...................................................... + + if (hdr$ftflgs ['TMULTI']){ + ## multiple spectra in file + if (hdr$fnsub <= 1) + if (hy.getOption ("debuglevel") >= 2L) + message ("spc file header specifies multiple spectra but only zero or one subfile.") + } else { + ## single spectrum file + if (hdr$fnsub == 0) + hdr$fnsub <- 1 + + if (hdr$fnsub > 1) { + warning ("spc file header specifies single spectrum file but ", hdr$fnsub, + " subfiles (spectra).\nOnly first subfile will be read.") + hdr$fnsub <- 1 + } + + if (hdr$ftflgs ['TRANDM']) + message ("spc file header: file type flag TRANDM encountered => Enforcing TMULTI.") + + if (hdr$ftflgs ['TORDRD']) + message ("spc file header: file type flag TORDRD encountered => Enforcing TMULTI.") + + if ((hdr$ftflgs ['TRANDM'] || hdr$ftflgs ['TORDRD']) && hdr$fnsub > 1) + hdr$ftflgs ['TMULTI'] <- TRUE + } + + if (hdr$ftflgs ['TXYXYS'] && ! hdr$ftflgs ['TXVALS']) { + warning ("spc file header: file type flag TXYXYS encountered => Enforcing TXVALS.") + hdr$ftflgs ['TXVALS'] <- TRUE + } + + if (hdr$fwplanes > 0) + warning ("w planes found! This is not yet tested as the developer didn't have access to such files.\n", + "Please contact the package maintainer ", maintainer ("hyperSpec"), + " stating whether the file was imported successfully or not.") + + hdr +} + +## read sub file header ............................................................................. +## +## needs header for consistency checks +## + +.spc.subhdr <- function (raw.data, pos, hdr) { + subhdr <- list (subflgs = raw.data [pos + ( 1)], + subexp = readBin (raw.data [pos + ( 2)], "integer", 1, 1, signed = TRUE), + subindx = readBin (raw.data [pos + ( 3 : 4)], "integer", 1, 2, signed = FALSE), + subtime = readBin (raw.data [pos + ( 5 : 8)], "numeric", 1, 4), + subnext = readBin (raw.data [pos + ( 9 : 12)], "numeric", 1, 4), + subnois = readBin (raw.data [pos + (13 : 16)], "numeric", 1, 4), + subnpts = readBin (raw.data [pos + (17 : 20)], "integer", 1, 4), #, signed = FALSE), + subscan = readBin (raw.data [pos + (21 : 24)], "integer", 1, 4), #, signed = FALSE), + subwlevel = readBin (raw.data [pos + (25 : 28)], "numeric", 1, 4)) + ## 4 bytes reserved + + ## R doesn't have unsigned long int ................................. + if (any (unlist (subhdr [c ("subnpts", "subscan")]) < 0)) + stop ("error reading subheader: R does not support unsigned long integers.", + "Please contact the maintainer of the package.") + + hdr$.last.read <- pos + .spc.size ['subhdr'] + + ## checking + if (subhdr$subexp == -128 && hdr$fexp != -128) + message ("subfile ", subhdr$subindx, " specifies data type float, but file header doesn't.", + "\n=> Data will be interpreted as float unless TMULTI is set.") + + if (subhdr$subnpts > 0 && subhdr$subnpts != hdr$fnpts && ! hdr$ftflgs ['TXYXYS']) + message ('subfile ', subhdr$subindx, ": number of points in file header and subfile header ", + "inconsistent. => Going to use subheader.") + + if (subhdr$subnpts == 0){ + if (hdr$ftflgs ['TXYXYS']) + message ('subfile ', subhdr$subindx, ': number of data points per spectrum not specified. ', + '=> Using file header information (', hdr$fnpts, ').') + subhdr$subnpts <- hdr$fnpts + } + + if (! hdr$ftflgs ['TXYXYS']) + if (hdr$fnpts != subhdr$subnpts) { + .spc.error (".spc.read.subhdr", list (hdr = hdr, subhdr = subhdr), + "hdr and subhdr differ in number of points per spectrum, ", + "but TXYXYS is not specified.\n hdr$ftflgs = ", + paste (names (hdr$ftflgs)[hdr$ftflgs], collapse = " | "), + " (", sum (2^(0:7) [hdr$ftflgs]) , ")\n", + "You can try to read the file using hdr$ftflgs | TMULTI | TXYXYS (", + sum (2^(0 : 7) [hdr$ftflgs | + c (FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE)]), + ").\n\n") + } + +# str (subhdr) + ## according to .spc file documentation: + if (! hdr$ftflgs ['TMULTI']) + subhdr$subexp <- hdr$fexp + else if (hdr$fexp == -128 && subhdr$subexp != -128) { + message ("Header file specifies float data format, but subfile uses integer exponent. ", + "=> Using file header settings.") + subhdr$subexp <- -128 + } + + ## the z values + if (hdr$fzinc == 0) # should only happen for the first subfile... + hdr$fzinc = subhdr$subnext - subhdr$subtime + + if (subhdr$subindx == 0) + hdr$firstz <- subhdr$subtime + + if (subhdr$subtime == 0) + subhdr$subtime = subhdr$subindx * hdr$fzinc + hdr$firstz + + ## the w values + if (hdr$fwplanes > 0) { + if (hdr$fwinc == 0) { ## unevenly spaced w planes + + } + + # if (subhdr$subwlevel != 0) { + # subhdr$w <- subhdr$subwlevel + # + # } else if (subhdr$subindx %% hdr$fwplanes == 1) + # subhdr$w <- hdr$subhdr$w + hdr$fwinc + # else + # subhdr$w <- hdr$subhdr$w + } + + + hdr$subhdr <- subhdr + + hdr +} + +## read subfile directory ........................................................................... +## + +.spc.subfiledir <- function (raw.data, pos, nsub) { + dir <- data.frame (ssfposn = rep (NA, nsub), + ssfsize = rep (NA, nsub), + ssftime = rep (NA, nsub)) + + for (s in seq_len (nsub)){ + dir [s,] <- c (readBin (raw.data [pos + ( 1 : 4)], "integer", 1, 4), # , signed = FALSE), + readBin (raw.data [pos + ( 5 : 8)], "integer", 1, 4), # , signed = FALSE), + readBin (raw.data [pos + ( 9 : 12)], "numeric", 1, 4)) + pos <- pos + .spc.size ['subfiledir'] + } + + ## R doesn't have unsigned long int ................................. + if (any (dir [, 1:2] < 0)) + stop ("error reading subfiledir: R does not support unsigned long integers.", + "Please contact the maintainer of the package.") + +# dir$ssfposn <- dir$ssfposn + dir +} + +## read log block header ............................................................................ +## +##' @importFrom utils head tail +.spc.log <- function (raw.data, pos, log.bin, log.disk, log.txt, keys.log2data, + replace.nul = as.raw (255), iconv.from = "latin1", iconv.to = "utf8") { + + if (pos == 0) # no log block exists + return (list (data = list (), + log = list ())) + + loghdr <- list (logsizd = readBin (raw.data [pos + ( 1 : 4)], "integer", 1, 4), # , signed = FALSE), + logsizm = readBin (raw.data [pos + ( 5 : 8)], "integer", 1, 4), # , signed = FALSE), + logtxto = readBin (raw.data [pos + ( 9 : 12)], "integer", 1, 4), # , signed = FALSE), + logbins = readBin (raw.data [pos + (13 : 16)], "integer", 1, 4), # , signed = FALSE), + logdsks = readBin (raw.data [pos + (17 : 20)], "integer", 1, 4), # , signed = FALSE), + ## 44 bytes reserved + .last.read = pos + .spc.size ['loghdr'] + ) + + ## R doesn't have unsigned long int ................................. + if (any (unlist (loghdr) < 0)) + stop ("error reading log: R does not support unsigned long integers.", + "Please contact the maintainer of the package.") + + log <- list () + data <- list () + + ## read binary part of log + if (log.bin) + log$.log.bin <- raw.data [loghdr$.last.read + seq_len (loghdr$logbins)] + + ## read binary on-disk-only part of log + if (log.disk) + log$.log.disk <- raw.data [loghdr$.last.read + loghdr$logbins + seq_len (loghdr$logdsks)] + + ## read text part of log + if (log.txt & loghdr$logsizd > loghdr$logtxto) { + log.txt <- raw.data [pos + loghdr$logtxto + seq_len (loghdr$logsizd - loghdr$logtxto)] + if (tail (log.txt, 1) == .nul) # throw away nul at the end + log.txt <- head (log.txt, -1) + log.txt [log.txt == .nul] <- replace.nul + log.txt <- readChar (log.txt, length (log.txt), useBytes=T) + log.txt <- gsub (rawToChar (replace.nul), '\r\n', log.txt) + log.txt <- iconv (log.txt, iconv.from, iconv.to) + log.txt <- split.string (log.txt, "\r\n") ## spc file spec says \r\n regardless of OS + log.txt <- split.line (log.txt, "=") + data <- getbynames (log.txt, keys.log2data) + } + + list (log.long = log, extra.data = data) +} + + +## read y data ...................................................................................... +## + +.spc.read.y <- function (raw.data, pos, npts, exponent, word) { + if (exponent == -128) { # 4 byte float + + list (y = readBin (raw.data [pos + seq_len (npts * 4)], "numeric", npts, 4), + .last.read = pos + npts * 4) + + } else if (word) { # 2 byte fixed point integer = word + + list (y = readBin (raw.data [pos + seq_len (npts * 2)], "integer", npts, 2, signed = TRUE) * + 2 ^ (exponent - 16), + .last.read = pos + npts * 2) + + } else { # 4 byte fixed point integer = dword + list (y = readBin (raw.data [pos + seq_len (npts * 4)], "integer", npts, 4) * + 2 ^ (exponent - 32), + .last.read = pos + npts * 4) + } +} + +## read x data ...................................................................................... +## + +.spc.read.x <- function (raw.data, pos, npts) { + list (x = readBin (raw.data [pos + seq_len (npts * 4)], "numeric", npts, 4), + .last.read = pos + 4 * npts) +} + +## error ............................................................................................. +##' @importFrom utils str +.spc.error <- function (fname, objects, ...) { + cat ('ERROR in read.spc function ', fname, '\n\n') + for (i in seq_along (objects)) { + cat (names (objects) [i], ":\n") + str (objects [[i]], vec.len = 20) + } + stop (...) +} + +.spc.ftflags <- function (x) { + ftflgs <- as.logical (x %/% 2^(0 : 7) %% 2) + names (ftflgs) <- c ('TSPREC', 'TCGRAM', 'TMULTI', 'TRANDM', + 'TORDRD', 'TALABS', 'TXYXYS', 'TXVALS') + ftflgs +} + +##################################################################################################### + + +##' Import for Thermo Galactic's spc file format +##' These functions allow to import Thermo Galactic/Grams .spc files. +##' +##' @param filename The complete file name of the .spc file. +##' @param keys.hdr2data,keys.log2data character vectors with the names of parameters in the .spc +##' file's log block (log2xxx) or header (hdr2xxx) that should go into the extra data (yyy2data) of +##' the returned hyperSpec object. +##' +##' All header fields specified in the .spc file format specification (see +##' below) are imported and can be referred to by their de-capitalized names. +##' @param log.txt Should the text part of the .spc file's log block be read? +##' @param log.bin,log.disk Should the normal and on-disk binary parts of the +##' .spc file's log block be read? If so, they will be put as raw vectors +##' into the hyperSpec object's log. +##' @param hdr A list with fileheader fields that overwrite the settings of +##' actual file's header. +##' +##' Use with care, and look into the source code for detailed insight on the +##' elements of this list. +##' @param no.object If \code{TRUE}, a list with wavelengths, spectra, labels, +##' log and data are returned instead of a hyperSpec object. +##' +##' This parameter will likely be subject to change in future - use with care. +##' @return If the file contains multiple spectra with individual wavelength +##' axes, \code{read.spc} returns a list of hyperSpec objects. Otherwise the +##' result is a hyperSpec object. +##' +##' \code{read.spc.KaiserMap} returns a hyperSpec object with data columns x, +##' y, and z containing the stage position as recorded in the .spc files' +##' log. +##' @note Only a restricted set of test files was available for development. +##' Particularly, the w-planes feature could not be tested. +##' +##' If you have .spc files that cannot be read with these function, don't +##' hesitate to contact the package maintainer with your code patch or asking +##' advice. +##' @author C. Beleites +##' @rdname read-spc +##' @seealso \code{\link[hyperSpec]{textio}} +##' @references Source development kit and file format specification of .spc +##' files. +##' @export +##' @keywords IO file +##' @examples +##' +##' ## get the sample .spc files from ftirsearch.com (see above) +##' \dontrun{ +##' # single spectrum +##' spc <- read.spc ("BENZENE.SPC") +##' plot (spc) +##' +##' # multi-spectra .spc file with common wavelength axis +##' spc <- read.spc ('IG_MULTI.SPC') +##' spc +##' +##' # multi-spectra .spc file with individual wavelength axes +##' spc <- read.spc ("BARBITUATES.SPC") +##' plot (spc [[1]], lines.args = list (type = "h")) +##' } +##' +##' @importFrom utils modifyList +read.spc <- function (filename, + keys.hdr2data = FALSE, keys.log2data = FALSE, + log.txt = TRUE, log.bin = FALSE, log.disk = FALSE, + hdr = list (), + no.object = FALSE){ + + ## f contains the raw bytes of the file + + ## fpos marks the position of the last read byte + ## this is the same as the offset from beginning of the file (count 0) in the .spc definition + + f <- readBin (filename, "raw", file.info (filename)$size, 1) + + hdr <- modifyList (.spc.filehdr (f), hdr) + fpos <- hdr$.last.read + + if (! hdr$ftflgs ['TXYXYS']) { + if (! hdr$ftflgs ['TXVALS']) { + ## spectra with common evenly spaced wavelength axis + wavelength <- seq (hdr$ffirst, hdr$flast, length.out = hdr$fnpts) + } else { + ## spectra with common unevenly spaced wavelength axis + # if (! hdr$ftflgs ['TMULTI']) { # also for multifile with common wavelength axis + tmp <- .spc.read.x (f, fpos, hdr$fnpts) + wavelength <- tmp$x + fpos <- tmp$.last.read + } + #} + } + + ## otherwise (TXYXYS set) hdr$fnpts gives offset to subfile directory if that exists + + ## obtain labels from file hdr or from parameter + label <- list (.wavelength = hdr$fxtype, spc = hdr$fytype, + z = hdr$fztype, z.end = hdr$fztype) + + if (hdr$fwplanes > 0) + label$w <- hdr$fwtype + + ## prepare list for hyperSpec log and data.frame for extra data + + data <- list (z = NA, z.end = NA) + if (hdr$fwplanes > 0) + data <- c (data, w = NA) + + ## process the log block + tmp <- .spc.log (f, hdr$flogoff, + log.bin, log.disk, log.txt, + keys.log2data) + ## TODO: remove data2log + + data <- c (data, tmp$extra.data, getbynames (hdr, keys.hdr2data)) + + ## preallocate spectra matrix or list for multispectra file with separate wavelength axes + ## populate extra data + if (hdr$ftflgs ['TXYXYS'] && hdr$ftflgs ['TMULTI']) { + spc <- list () + data <- .prepare.hdr.df (data, nsubfiles = 1L) + } else { + spc <- matrix (NA, nrow = hdr$fnsub, ncol = hdr$fnpts) + data <- .prepare.hdr.df (data, nsubfiles = hdr$fnsub) + } + + ## read subfiles + if (hdr$subfiledir){ ## TXYXYS + hdr$subfiledir <- .spc.subfiledir (f, hdr$subfiledir, hdr$fnsub) + + for (s in seq_len (hdr$fnsub)) { + + hdr <- .spc.subhdr (f, hdr$subfiledir$ssfposn [s], hdr) + fpos <- hdr$.last.read + wavelength <- .spc.read.x (f, fpos, hdr$subhdr$subnpts) + fpos <- wavelength$.last.read + + y <- .spc.read.y (f, fpos, npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp, + word = hdr$ftflgs ['TSPREC']) + fpos <- y$.last.read + + data$z <- hdr$subhdr$subtime + data$z.end <- hdr$subhdr$subnext + + if (hdr$fwplanes > 0) + data$w <- hdr$subhdr$w + + if (! exists ('wavelength')) + .spc.error ("read.spc", list (hdr = hdr), + "wavelength not read. This may be caused by wrong header information.") + + spc [[s]] <- new ("hyperSpec", + spc = y$y, + wavelength = wavelength$x, + data = data, + labels = label) + } + + } else { ## multiple y data blocks behind each other + for (s in seq_len (hdr$fnsub)) { + hdr <- .spc.subhdr (f, fpos, hdr) + fpos <- hdr$.last.read + tmp <- .spc.read.y (f, fpos, npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp, + word = hdr$ftflgs ['TSPREC']) + fpos <- tmp$.last.read + + spc [s, ] <- tmp$y + + data [s, c('z', 'z.end')] <- unlist (hdr$subhdr [c('subtime', 'subnext')]) + + if (hdr$fwplanes > 0) + data [s, "w"] <- hdr$subhdr$w + } + } + + if (hdr$ftflgs ['TXYXYS'] && hdr$ftflgs ['TMULTI']) + ## list of hyperSpec objects + ## consistent file import behaviour across import functions + lapply (spc, .fileio.optional, filename = filename) + else if (no.object) + list (spc = spc, wavelength = wavelength, data = data, labels = label) + else { + if (hdr$fnsub > 1L && nrow (data) == 1L) + data <- data [rep (1L, hdr$fnsub), ] + + spc <- new ("hyperSpec", spc = spc, wavelength = wavelength, + data = data, labels = label) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filename) + } +} + + +.test (read.spc) <- function (){ + context ("read.spc") + + old.spc <- paste0 ("fileio/spc/", c ('CONTOUR.SPC', 'DEMO 3D.SPC', 'LC DIODE ARRAY.SPC')) + wplanes <- "fileio/spc/wplanes.spc" + other.spc <- setdiff (Sys.glob ("fileio/spc/*.[sS][pP][cC]"), c (old.spc, wplanes)) + + test_that ("old file format -> error", { + skip_if_not_fileio_available() + for (f in old.spc) + expect_error (read.spc (f)) + }) + + test_that("SPC SDK example files", { + skip_if_not_fileio_available() + + checksums <- c (`fileio/spc/BARBITUATES.SPC` = 'f49bbc854c', + `fileio/spc/barbsvd.spc` = '8a4d30672c', + `fileio/spc/BENZENE.SPC` = '6fc7901d15', + `fileio/spc/DRUG SAMPLE_PEAKS.SPC` = 'a600cd05e2', + `fileio/spc/DRUG SAMPLE.SPC` = '981e42bfb8', + `fileio/spc/FID.SPC` = 'ab65b6bb23', + `fileio/spc/HCL.SPC` = 'c657dd8279', + `fileio/spc/HOLMIUM.SPC` = '18dc3b1ca3', + `fileio/spc/IG_BKGND.SPC` = '0b083dab3a', + `fileio/spc/IG_MULTI.SPC` = 'fed652db3b', + `fileio/spc/IG_SAMP.SPC` = 'c72dd5fc70', + `fileio/spc/KKSAM.SPC` = '8e905a5500', + `fileio/spc/POLYR.SPC` = '78b5987d93', + `fileio/spc/POLYS.SPC` = '608c01f69b', + `fileio/spc/SINGLE POLYMER FILM.SPC` = '0e13423de4', + `fileio/spc/SPECTRUM WITH BAD BASELINE.SPC` = 'a05b77fada', + `fileio/spc/time.spc` = '98eabdd347', + `fileio/spc/TOLUENE.SPC` = 'eb08948be8', + `fileio/spc/TriVista-linear.spc` = '31b30dac34', + `fileio/spc/TriVista-normal.spc` = '15d5d219b0', + `fileio/spc/TUMIX.SPC` = '7f8db885fb', + `fileio/spc/TWO POLYMER FILMS.SPC` = '173a0bb6d3', + `fileio/spc/Witec-timeseries.spc` = '65f84533d8', + `fileio/spc/XYTRACE.SPC` = '28594b6078') + + for (f in other.spc ) { + ## for wholesale output of current hashes: + # cat (sprintf ("`%s` = '%s',\n", f, digest (read.spc (f)))) + expect_known_hash (read.spc (f), checksums [f]) + } + }) + + test_that("LabRam spc files", { + skip_if_not_fileio_available() + expect_known_hash (read.spc("fileio/spc.LabRam/LabRam-1.spc"), "d67562e4b4") + expect_known_hash (read.spc("fileio/spc.LabRam/LabRam-2.spc"), "c87094210a") + }) + + test_that ("Shimadzu spc files do not yet work", { + skip_if_not_fileio_available() + expect_error (read.spc("fileio/spc.Shimadzu/F80A20-1.SPC")) + + fname <- "fileio/spc.Shimadzu/UV-2600_labeled_DNA" + # TODO #102 - implement support for Shimadzu files + SHIMADZU_SPC_IMPLEMENTED <- F + if (SHIMADZU_SPC_IMPLEMENTED){ + # Compare data from SPC file and from CSV file. They should be equal + spc <- read.spc( paste0(fname, ".spc")) + expected <- read.txt.long(paste0(fname, '.csv'), sep=',') + expect_true(all.equal(spc$spc, expected$spc)) + }else{ + # IF NOT IMPLEMENTED + #expect_error (read.spc("fileio/spc.Shimadzu/F80A20-1.SPC"), regexp = 'Shimadzu SPC') + expect_error (read.spc(paste0(fname, ".spc")), + regexp = 'Support for Shimadzu SPC file format (OLE CF) is not yet implemented', + fixed = T) + } + }) + + + + test_that("Witec: some files supported", { + skip_if_not_fileio_available() + + expect_error (read.spc("fileio/spc.Witec/P_A32_006_Spec.Data 1.spc")) + expect_error (read.spc("fileio/spc.Witec/P_A32_007_Spec.Data 1.spc")) + + tmp <- read.spc("fileio/spc.Witec/Witec-Map.spc") + expect_known_hash (tmp, "d737a0a777") + ## no spatial information + expect_null(tmp$x) + expect_null(tmp$y) + + ## spectra numbered in z + tmp <- read.spc("fileio/spc.Witec/Witec-timeseries.spc") + expect_known_hash (tmp, "d6879317f2") + }) + + + ## Kaiser spc files tested mostly in Kaiser-specific read.spc.Kaiser* unit tests + + + test_that("wplanes",{ + skip_if_not_fileio_available() + skip ("wplanes do not yet work") + # wplanes + }) + + test_that("option file.keep.name", { + skip_if_not_fileio_available() + file.keep.name <- hy.getOption("file.keep.name") + + hy.setOptions(file.keep.name = FALSE) + expect_null (read.spc("fileio/spc.LabRam/LabRam-2.spc")$filename) + hy.setOptions(file.keep.name = TRUE) + expect_equal (read.spc("fileio/spc.LabRam/LabRam-2.spc")$filename, "fileio/spc.LabRam/LabRam-2.spc") + + hy.setOptions(file.keep.name = file.keep.name) + }) + + test_that("option file.remove.emptyspc", { + skip ("no spc files with empty spectra available so far") + skip_if_not_fileio_available() + file.remove.emptyspc <- hy.getOption("file.remove.emptyspc") + + hy.setOptions(file.remove.emptyspc = FALSE) + expect_equal (nrow (read.spc("")), NA) + hy.setOptions(file.remove.emptyspc = TRUE) + expect_equal (nrow (read.spc("")), NA) + + hy.setOptions(file.keep.name = file.remove.emptyspc) + }) + + test_that ("hdr2data", { + skip_if_not_fileio_available() + expect_equal (colnames (read.spc("fileio/spc.LabRam/LabRam-2.spc", keys.hdr2data = TRUE)), + c("z", "z.end", "ftflgs", "fexper", "fexp", "fnpts", "ffirst", + "flast", "fnsub", "fxtype", "fytype", "fztype", "fpost", "fdate", + "fres", "fsource", "fspare", "fcmnt", "fcatxt", "flogoff", "fmods", + "fprocs", "flevel", "fsampin", "ffactor", "fmethod", "fzinc", + "fwplanes", "fwinc", "fwtype", ".last.read", "subfiledir", "spc", + "filename") + ) + }) + + + test_that ("log2data", { + skip_if_not_fileio_available() + expect_equal(colnames (read.spc ("fileio/spc.Kaisermap/ebroAVII.spc", keys.log2data = TRUE)), + c("z", "z.end", "Grams_File_Name", "HoloGRAMS_File_Name", "Acquisition_Date_Time", + "Lambda", "Accuracy_Mode", "Dark_subtracted", "Dark_File_Name", + "Auto_New_Dark_Curve", "Background_subtracted", "Background_File_Name", + "Intensity_Corrected", "Intensity_Calibration_Available", "Intensity_Correction_File", + "Intensity_Correction_Threshold", "Intensity_Source_Correction", + "Intensity_Source_Correction_File", "Comment", "Cosmic_Ray_Filtering", + "Total_Cosmic_Count", "Exposure_Length", "Accumulations", "Accumulation_Method", + "Calibration_File", "Comment.1", "Temperature_Status", "Temperature", + "HoloGRAMS_File_Version", "File_Type", "Operator", "Stage_X_Position", + "Stage_Y_Position", "Stage_Z_Position", "AutoFocusUsed", "WLInterval", + "CalInterval", "FFTFillFactor", "FFTApT", "SamplingMethod", "Has_MultiPlex_Laser", + "External_Trigger", "Laser_Wavelength", "Default_Laser_Wavelength", + "Laser_Tracking", "Laser_Block_Active", "Pixel_Fill_minimum", + "Pixel_Fill_maximum", "Binning_Start", "Binning_End", "NumPoints", + "First", "last", "spc", "filename")) + }) +} + + +.prepare.hdr.df <- function (data, nsubfiles){ + ## the *type header elements are expressions. They need to be converted to character. + data <- lapply (data, function (x) { + if (mode (x) == "expression") + as.character (x) + else + x + }) + + ## convert vectors to matrix, otherwise the data.frame will contain one row per element. + ## matrices need to be protected during as.data.frame + + vector.entries <- which (sapply (data, length) > 1L) + for (v in vector.entries) + data [[v]] <- I (t (as.matrix (data [[v]]))) + + data <- as.data.frame (data, stringsAsFactors = FALSE) + data <- data [rep (1L, nsubfiles), ] + + for (v in vector.entries) + data [[v]] <- unclass (data [[v]]) # remove AsIs protection + + data +} diff --git a/R/read.spc.Shimadzu.R b/R/read.spc.Shimadzu.R new file mode 100644 index 00000000..d221aed0 --- /dev/null +++ b/R/read.spc.Shimadzu.R @@ -0,0 +1,12 @@ +read.spc.Shimadzu <- function(filename) { + stop ("Import of Shimadzu SPC file format (OLE CF) is not yet implemented.") +} + +.test (read.spc.Shimadzu) <- function (){ + context ("read.spc.Shimadzu") + + test_that("not implemented error", { + expect_error(read.spc.Shimadzu()) + }) + +} \ No newline at end of file diff --git a/R/read.spe.R b/R/read.spe.R new file mode 100644 index 00000000..d2e3625e --- /dev/null +++ b/R/read.spe.R @@ -0,0 +1,390 @@ +# Reading of SPE files, produced by Princeton Instruments spectrometers +# File format version 2.5 (Sept. 2002) + +# C. Beleites +# R. Kiselev +# July 2015 + + +##' Import WinSpec SPE file +##' +##' Import function for WinSpec SPE files (file version up to 3.0). The calibration +##' data (polynome and calibration data pairs) for x-axis are automatically +##' read and applied to the spectra. Note that the y-calibration data structure +##' is not extracted from the file since it is not saved there by WinSpec and is +##' always empty. +##' +##' @param filename Name of the SPE file to read data from +##' @param xaxis Units of x-axis, e.g. \emph{"file"}, \emph{"px"}, +##' \emph{"nm"}, \emph{"energy"}, \emph{"raman"}, \emph{...} +##' \code{read.spe} function automatically checks if the x-calibration data are +##' available and uses them (if possible) to reconstruct the xaxis +##' in the selected units. +##' @param acc2avg whether to divide the actual data set by the number of +##' accumulations, thus transforming \emph{accumulated} spectra to +##' \emph{averaged} spectra. WinSpec does not do this automatically, so the +##' spectral intensity is always proportional to the number of accumulations. +##' The flag \code{@@data$averaged} is automatically set to \code{TRUE}. +##' @param cts_sec whether to divide the actual data set by the exposure time, +##' thus going to count per second unit. +##' @param keys.hdr2data Which metadata from the file header should be saved to +##' the \code{Data} slot of a newly created hyperSpec object +##' +##' @return hyperSpec object +##' +##' @rdname read.spe +##' +##' @author R. Kiselev, C. Beleites +##' @export +read.spe <- function(filename, xaxis="file", acc2avg=F, cts_sec=F, + keys.hdr2data=c("exposure_sec", + "LaserWavelen", + "accumulCount", + "numFrames", + "darkSubtracted")){ + + hdr <- .read.spe.header(filename) + + # This is the size of one data point in bytes. WinSpec uses 2 bytes or 4 bytes only + data_size <- ifelse(hdr$datatype > 2, 2L, 4L) + data_chunk_size <- hdr$xdim * hdr$ydim * hdr$numFrames * data_size + + # Read the part of file that contains actual experimental data + raw.data <- readBin(filename, "raw", data_chunk_size + 4100, 1)[- (1:4100)] + + # Convert raw spectral data according to the datatype defined in the header + spc <- switch(hdr$datatype + 1, + readBin(raw.data, "double", length(raw.data)/4, 4), # float + readBin(raw.data, "integer", length(raw.data)/4, 4, signed=TRUE), # long + readBin(raw.data, "integer", length(raw.data)/2, 2, signed=TRUE), # int + readBin(raw.data, "integer", length(raw.data)/2, 2, signed=FALSE) # uint + ) + + # Create a structured data.frame that accomodates spectral data + dim(spc) <- c(hdr$xdim, hdr$ydim * hdr$numFrames) + extra_data <- data.frame ( + px.y = rep(seq_len(hdr$ydim), hdr$numFrames), + frame = rep(seq_len(hdr$numFrames), each=hdr$ydim) + ) + + # Extract selected items from the header. They will go to a new hyperSpec object + hdr2data <- hdr[keys.hdr2data] + if (length (hdr2data > 0)) + extra_data <- cbind (extra_data, hdr2data) + + # Create hyperSpec object + spc <- new("hyperSpec", spc=t(spc), data=extra_data, + labels = list (spc = "counts", .wavelength = "pixel number")) + + # For SPE 3.0 and above we need to read the XML header + if (hdr$fileFormatVer >= 3.0){ + spc@data$xml <- .read.spe.xml(filename) + } + + # Check if we should use display units specified in the SPE file + if (xaxis == "file") + xaxis = .fixunitname(hdr$xCalDisplayUnit) + + # Create a new x-axis, if required + xaxis <- .fixunitname(xaxis) + if (xaxis == "px") + return(.fileio.optional(spc, filename)) + + + if (! hdr$xCalValid) + warning("The calibration is NOT valid") + + # Recreate calibration function + polyorder <- hdr$xCalPolyOrder + coeffs <- hdr$xCalPolCoeffs[seq(polyorder + 1)] + + vM <- vanderMonde(spc@wavelength, polyorder) + + # Check if we have laser wavelength + if (hdr$LaserWavelen < 10) + hdr$LaserWavelen <- NULL + + # Perform convertion + spc@wavelength <- wlconv(src=.fixunitname(hdr$xCalPolyUnit), + dst=xaxis, + points=as.numeric(vM %*% coeffs), + laser=hdr$LaserWavelen) + + spc@label$.wavelength = switch(xaxis, + nm=expression("Wavelength, nm"), + invcm=expression(tilde(nu) / cm^-1), + ev=expression("Energy / eV"), + freq=expression(nu / THz), + raman=expression(Raman~shift / cm^-1)) + if (acc2avg){ + spc <- spc / hdr$accumulCount + spc@data$averaged <- T + } + if (cts_sec){ + spc <- spc / hdr$exposure_sec + spc@label$spc <- expression("counts / s") + } + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filename) +} + +#' Read XML footer from SPE file format version 3.0 +#' +#' The new SPE file format, introduced in 2012, was designed to be backwards compatible with the +#' previous format 2.5. The most prominent change is the new plain text XML footer holding vast +#' experimental metadata that gets attached at the end of the file. Thus, the file contains 3 +#' blocks: a 4100-bytes long binary header, a chunk with spectral data, and the XML footer. +#' This function retrieves the XML footer converted to R list, and throws error if it is not available. +#' The file format specification is available at Princeton Instruments FTP server under name +#' 'SPE 3.0 File Format Specification'. +#' +#' This function relies on R package xml2 to work correctly +#' +#' @param filename - SPE filename +#' +#' @return xml data from the file converted to R list +#' @importFrom xml2 as_list read_xml +.read.spe.xml <- function(filename){ + as_list(read_xml(.read.spe.xml_string(filename))) +} + + +#' .read.spe.xml_string +#' +#' Read XML footer from SPE file format version 3.0 and return it as a long string +#' for subsequent parsing. Basically the purpose of this function is to check +#' that the file format version is 3.0 or above, and to find and read the +#' correct part of this file. +#' +#' @param filename - SPE filename +#' +#' @return string containing XML footer +.read.spe.xml_string <- function(filename){ + hdr <- .read.spe.header(filename) + + if (hdr$fileFormatVer < 3.0){ + stop(paste("This SPE file contains no XML data: file format version", + round(hdr$fileFormatVer, digits = 3), "< 3.0")) + return() + } + + data_size <- ifelse(hdr$datatype > 2, 2L, 4L) + data_chunk_size <- hdr$xdim * hdr$ydim * hdr$numFrames * data_size + + # Read the part of file that contains actual experimental data + raw_bytes <- readBin(filename, "raw", file.info(filename)$size, 1)[- (1:(4100+data_chunk_size))] + readChar(raw_bytes, length(raw_bytes)) +} + + + +##' @describeIn read.spe Read only header of a WinSpec SPE file (version 2.5) +##' @return hdr list with \code{key=value} pairs +.read.spe.header <- function(filename){ + # Read the 4100-byte long binary header from the SPE file and parse it + + # Load the header + raw.data <- readBin(filename, "raw", 4100, 1) + + # Extract some items from the 4100 bytes-long file header + hdr <- list ( + hwVersion = readBin(raw.data[1 :2 ], "integer", 1, 2, signed=TRUE ), # uint16 + xDimDet = readBin(raw.data[7 :8 ], "integer", 1, 2, signed=FALSE), # uint16 + mode = readBin(raw.data[9 :10 ], "integer", 1, 2, signed=TRUE ), # uint16 + exposure_sec = readBin(raw.data[11 :14 ], "double", 1, 4), # float32 + vChipXDim = readBin(raw.data[15 :16 ], "integer", 1, 2, signed=TRUE ), # int8 + vChipYDim = readBin(raw.data[17 :18 ], "integer", 1, 2, signed=TRUE ), # int8 + yDimDet = readBin(raw.data[19 :20 ], "integer", 1, 2, signed=FALSE), # uint16 + date = readBin(raw.data[21 :30 ], "character", 1, 10 ), # char + detTemperature = readBin(raw.data[37 :40 ], "double", 1, 4), # float32 + xdim = readBin(raw.data[43 :44 ], "integer", 1, 2, signed=FALSE), # uint16 + shutterMode = readBin(raw.data[51 :52 ], "integer", 1, 2, signed=FALSE), # uint16 + specCenterWlNm = readBin(raw.data[73 :76 ], "double", 1, 4), # float32 + datatype = readBin(raw.data[109 :110 ], "integer", 1, 2, signed=TRUE ), # int8 + darkSubtracted = readBin(raw.data[151 :152 ], "integer", 1, 2, signed=FALSE), # int8 + timeLocal = readBin(raw.data[173 :179 ], "character", 1, 7 ), # char + timeUTC = readBin(raw.data[180 :186 ], "character", 1, 7 ), # char + gain = readBin(raw.data[199 :200 ], "integer", 1, 2, signed=FALSE), # uint16 + comments = readBin(raw.data[201 :600 ], "character", 1, 400 ), # char + ydim = readBin(raw.data[657 :658 ], "integer", 1, 2, signed=FALSE), # uint16 + accumulCount = readBin(raw.data[669 :672 ], "integer", 1, 4), # uint32 + readoutTime = readBin(raw.data[673 :676 ], "double", 1, 4), # float32 + swVersion = readBin(raw.data[688 :704 ], "character", 1, 16 ), # char + kinTrigMode = readBin(raw.data[725 :726 ], "integer", 1, 2, signed=TRUE ), # int16 + expRepeatCount = readBin(raw.data[1419:1422], "integer", 1, 4, signed=TRUE ), # int32 + expAccumCount = readBin(raw.data[1423:1426], "integer", 1, 4, signed=TRUE ), # int32 + hwAccumFlag = readBin(raw.data[1433:1434], "integer", 1, 2, signed=TRUE ), # int16 + cosmicApplied = readBin(raw.data[1439:1440], "integer", 1, 2, signed=TRUE ), # int16 + cosmicType = readBin(raw.data[1441:1442], "integer", 1, 2, signed=TRUE ), # int16 + numFrames = readBin(raw.data[1447:1450], "integer", 1, 4), # int32 + shutterType = readBin(raw.data[1475:1476], "integer", 1, 2, signed=TRUE ), # int16 + readoutMode = readBin(raw.data[1481:1482], "integer", 1, 2, signed=TRUE ), # int16 + kinWindowSize = readBin(raw.data[1483:1484], "integer", 1, 2, signed=TRUE ), # int16 + clkSpeed = readBin(raw.data[1485:1486], "integer", 1, 2, signed=TRUE ), # int16 + computerIface = readBin(raw.data[1487:1488], "integer", 1, 2, signed=TRUE ), # int16 + fileFormatVer = readBin(raw.data[1993:1996], "double", 1, 4, signed=TRUE ), # float32 + + # X Calibration Structure + xCalOffset = readBin(raw.data[3001:3008], "double", 1, 8, signed=TRUE ), # float64 + xCalFactor = readBin(raw.data[3009:3016], "double", 1, 8, signed=TRUE ), # float64 + xCalDisplayUnit= readBin(raw.data[3017 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalValid = readBin(raw.data[3099 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalInputUnit = readBin(raw.data[3100 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalPolyUnit = readBin(raw.data[3101 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalPolyOrder = readBin(raw.data[3102 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalPointCount = readBin(raw.data[3103 ], "integer", 1, 1, signed=FALSE), # uint8 + xCalPxPos = readBin(raw.data[3104:3183], "double", 10, 8, signed=TRUE ), # float64 + xCalValues = readBin(raw.data[3184:3263], "double", 10, 8, signed=TRUE ), # float64 + xCalPolCoeffs = readBin(raw.data[3264:3311], "double", 6, 8, signed=TRUE ), # float64 + LaserWavelen = readBin(raw.data[3312:3319], "double", 1, 8, signed=TRUE ) # float64 + ) + + # Convert magic numbers into human-readable unit strings + spe_units <- c("pixel", "pixel", "data", "user units", "nm", "cm-1", "Raman shift") + hdr$xCalDisplayUnit <- spe_units[hdr$xCalDisplayUnit + 1] + hdr$xCalInputUnit <- spe_units[hdr$xCalInputUnit + 1] + hdr$xCalPolyUnit <- spe_units[hdr$xCalPolyUnit + 1] + + return(hdr) +} + + +##' @describeIn read.spe Plot the WinSpec SPE file (version 2.5) and show the +##' calibration points stored inside of it (x-axis calibration) +##' @export +spe.showcalpoints <- function(filename, xaxis="file", acc2avg=F, cts_sec=F){ + + hdr <- .read.spe.header(filename) + xaxis <- .fixunitname(xaxis) + + # Check if we should use display units specified in the SPE file + if (xaxis == "file") + xaxis <- .fixunitname(hdr$xCalDisplayUnit) + if (xaxis == "px"){ + xaxis <- hdr$xCalPolyUnit + warning("Cannot show calibration data in pixels") + } + + # Open file, make plot and mark position of all peaks stored inside the file + # in the x-calibration structure + spc <- read.spe(filename, xaxis, acc2avg, cts_sec) + rng <- max(spc) - min(spc) + ylims <- c(min(spc), max(spc) + 0.3*rng) + if (dim(spc@data$spc)[1] > 1) + plot(spc, plot.args=list(ylim=(ylims)), "spcprctl5") + else + plot(spc, plot.args=list(ylim=(ylims))) + title(basename(filename)) + + + if (hdr$xCalPointCount == 0){ + warning("No calibration data! Nothing to show") + return("") + } + + markpeak(spc, wlconv(src=hdr$xCalInputUnit, + dst=.fixunitname(xaxis), + points=hdr$xCalValues, + laser=hdr$LaserWavelen)) +} + + + +############# UNIT TESTS ################ +.test (read.spe) <- function (){ + + # Filenames + polystyrene <- "fileio/spe/polystyrene.SPE" + blut1 <- "fileio/spe/blut1.SPE" + spe3 <- "fileio/spe/spe_format_3.0.SPE" + + # unit tests for `read.spe` itself + ################################## + test_that("read.spe correctly extracts spectral data from SPE file", { + skip_if_not_fileio_available () + fname <- blut1 + expect_true(file.exists(fname)) + spc <- read.spe(fname) + + # We check that specific values are correctly read from a particular file + # This test is not generic and works with this and only this SPE file + expect_equal(spc$spc[[5, 77]], 1484) + expect_equal(spc$spc[[2, 811]], 606) + expect_equal(spc@wavelength[621], 2618.027) + }) + + + test_that("read.spe detects an XML footer in SPE 3.0 file", { + skip_if_not_fileio_available () + fname <- spe3 + expect_true(file.exists(fname)) + spc <- read.spe(fname) + + expect_true('xml' %in% names(spc@data)) + }) + + + test_that("read.spe correctly parses XML footer with SPE 3.0 file and saves metadata in hyperSpec object", { + skip_if_not_fileio_available () + fname <- spe3 + expect_true(file.exists(fname)) + spc <- read.spe(fname) + + expect_equal(attr(spc$xml$SpeFormat$DataFormat$DataBlock, "pixelFormat"), "MonochromeFloating32") + }) + + + # unit tests for helper functions of `read.spe` (whose name starts with .) + ########################################################################## + test_that (".read.spe.xml_string throws error on SPE format below v3.0", { + skip_if_not_fileio_available () + fname <- blut1 + expect_true(file.exists(fname)) + + expect_error(.read.spe.xml_string(fname), regexp = '*no XML*') + }) + + + test_that ("We can correctly read XML footer from SPE3.0 file", { + skip_if_not_fileio_available () + expect_true(file.exists(spe3)) + + xml_file <- paste0(spe3, "_metadata.xml") + actual_xml_footer <- .read.spe.xml_string(spe3) + expected_xml_footer <- readChar(xml_file, file.info(xml_file)$size) + expect_equal(actual_xml_footer, expected_xml_footer) + }) + + + test_that (".read.spe.xml correctly parses the XML footer and can extract the actual data", { + skip_if_not_fileio_available () + expect_true(file.exists(spe3)) + + # Read XML footer and convert it to R list + x <- .read.spe.xml(spe3) + expect_true(is.list(x)) + + # Check values of some elements + expect_true(is.list(x)) + expect_true('SpeFormat' %in% names(x)) + + # Check file format version and namespace URL + expect_equal(attr(x$SpeFormat, 'version'), "3.0") + expect_equal(attr(x$SpeFormat, 'xmlns'), "http://www.princetoninstruments.com/spe/2009") + + # Check that some children are present + expect_true('DataFormat' %in% names(x$SpeFormat)) + expect_true('Calibrations' %in% names(x$SpeFormat)) + expect_true('DataHistories' %in% names(x$SpeFormat)) + expect_true('GeneralInformation' %in% names(x$SpeFormat)) + + # Check that we can correctly extract file creation date + info <- x$SpeFormat$GeneralInformation$FileInformation + expect_equal(attr(info, 'created'), "2018-01-26T16:31:09.0979397+01:00") + + # Check that we can correctly extract pixel format and laser line + expect_equal(attr(x$SpeFormat$DataFormat$DataBlock, "pixelFormat"), "MonochromeFloating32") + expect_equal(attr(x$SpeFormat$Calibrations$WavelengthMapping, "laserLine"), "785") + }) +} diff --git a/R/read.txt.Horiba.R b/R/read.txt.Horiba.R new file mode 100644 index 00000000..ee6e10b7 --- /dev/null +++ b/R/read.txt.Horiba.R @@ -0,0 +1,48 @@ +##' Read ASCII (.txt) files exported by Horiba's Labspec software (LabRAM spectrometers) +##' +##' \code{read.txt.Horiba.xy} reads maps, i.e. .txt files where the first two columns give x and y coordinates. +##' +##' @title Import Horiba Labspec exported ASCII files +##' @param file connection (file name and path) to the .txt file +##' @param cols,header,sep,row.names,check.names,... further parameters are handed over to \code{\link[hyperSpec]{read.txt.wide}} +##' @rdname read.txt.Horiba +##' @author C. Beleites +##' @return hyperSpec object +##' @export +read.txt.Horiba <- function (file, cols = c (spc = "I / a.u.", + .wavelength = expression (Delta*tilde(nu) / cm^-1)), + header = TRUE, sep = "\t", row.names = NULL, + check.names = FALSE, ...){ + spc <- read.txt.wide (file, cols = cols, + header = header, sep = sep, row.names = row.names, + check.names = check.names, ...) + + ## consistent file import behaviour across import functions + ## is already provided by read.txt.wide + + spc +} + +##' @rdname read.txt.Horiba +##' @export +read.txt.Horiba.xy <- function (file, ...){ + read.txt.Horiba (file = file, + cols = c (x = expression (x / mu*m), + y = expression (y / mu*m), + spc = "I / a.u.", + .wavelength = expression (Delta*tilde(nu) / cm^-1)), + ...) +} + +##' \code{read.txt.Horiba.t} reads time series, i.e. .txt files with the time in the first column +##' @rdname read.txt.Horiba +##' @export +read.txt.Horiba.t <- function (file, header = TRUE, sep = "\t", row.names = NULL, + check.names = FALSE, ...){ + read.txt.Horiba (file, + cols = c (t = "t / s", + spc = "I / a.u.", + .wavelength = expression (Delta*tilde(nu) / cm^-1)), + ...) +} + diff --git a/R/read.txt.Renishaw.R b/R/read.txt.Renishaw.R new file mode 100644 index 00000000..12b169ea --- /dev/null +++ b/R/read.txt.Renishaw.R @@ -0,0 +1,202 @@ +##' import Raman measurements from Renishaw ASCII-files +##' +##' import Raman measurements from Renishaw (possibly compressed) .txt file. +##' +##' The file may be of any file type that can be read by +##' \code{\link[base]{gzfile}} (i.e. text, or zipped by gzip, bzip2, xz or +##' lzma). .zip zipped files need to be read using \code{read.zip.Renishaw}. +##' +##' Renishaw .wxd files are converted to .txt ASCII files by their batch +##' converter. They come in a "long" format with columns (y x | time | z)? +##' wavelength intensity. The first columns depend on the data type. +##' +##' The corresponding possibilities for the \code{data} argument are: +##' \tabular{lll}{ \code{data} \tab columns \tab \cr \code{"spc"} \tab wl int +##' \tab single spectrum \cr \code{"zspc"}, \code{"depth"} \tab z wl int \tab +##' depth profile\cr \code{"ts"} \tab t wl int \tab time series\cr +##' \code{"xyspc"} \tab y x wl int \tab 2d map\cr } +##' +##' This function allows reading very large ASCII files, but it does not work +##' on files with missing values (\code{NA}s are allowed). +##' +##' If the file is so large that it sould be read in chunks and \code{nspc} is +##' not given, \code{read.txt.Renishaw} tries to guess it by using \code{wc} +##' (if installed). +##' +##' @aliases read.txt.Renishaw read.zip.Renishaw +##' @param file file name or connection +##' @param data type of file, one of "spc", "xyspc", "zspc", "depth", "ts", see +##' details. +##' @param nlines number of lines to read in each chunk, if 0 or less read +##' whole file at once. +##' +##' \code{nlines} must cover at least one complete spectrum,i.e. \code{nlines} +##' must be at least the number of data points per spectrum. Reasonable +##' values start at \code{1e6}. +##' @param nspc number of spectra in the file +##' @param ... Arguments for \code{read.txt.Renishaw} +##' @return the \code{hyperSpec} object +##' @export +##' @author C. Beleites +##' @seealso \code{\link{read.txt.long}}, \code{\link{read.txt.wide}}, +##' \code{\link[base]{scan}} +##' @keywords IO file +##' @importFrom utils head +read.txt.Renishaw <- function (file = stop ("file is required"), + data = "xyspc", nlines = 0, nspc = NULL){ + cols <- switch (data, + spc = NULL, + xyspc = list (y = expression ("/" (y, mu * m)), + x = expression ("/" (x, mu * m))), + zspc = , + depth = list (z = expression ("/" (z, mu * m))), + ts = list (t = "t / s"), + stop ("unknown format for Renishaw .txt files.") + ) + cols <- c (cols, list (.wavelength = expression (Delta * tilde(nu) / cm^-1) , + spc = "I / a.u.")) + + if (!is (file, "connection")) + file <- gzfile (file, "r") + + on.exit(close(file)) + + first <- scan(file, nlines = 1, quiet = TRUE) + + ncol <- length (first) + + if (ncol == 0) + return (new ("hyperSpec")) + + if (ncol != length (cols)) + stop (paste ("File has", ncol, "columns, while 'cols' gives", length (cols))) + + fbuf <- matrix (c (first, scan (file, quiet = TRUE, nlines = nlines)), + ncol = ncol, byrow = TRUE) + + ## wavelength axis + wl <- rep (TRUE, nrow (fbuf)) + for (i in seq_len (ncol (fbuf) - 2)) + wl [wl] <- fbuf [wl, i] == fbuf [1, i] + + wl <- fbuf[wl, ncol - 1] + + ## if the file is to be read in chunks + ## try to find out how many lines it has + if (is.null (nspc)) + if (nlines > 0){ + nspc <- count_lines (summary(file)$description, nlines) + if (is.null (nspc)) + stop ("Failed guessing nspc.") + else { + message ("Counted ", nspc, " lines = ", nspc / length (wl), " spectra.") + nspc <- nspc / length (wl) + } + } else { + nspc <- nrow (fbuf) / length (wl) + } + + data <- matrix (NA, ncol = ncol - 2, nrow = nspc) + colnames (data) <- head (names (cols), -2) + pos.data <- 0 + + spc <- numeric (nspc * length (wl)) + pos.spc <- 0 + + while (length (fbuf > 0)){ + if (nlines > 0) cat (".") + spc [pos.spc + seq_len (nrow (fbuf))] <- fbuf [, ncol] + pos.spc <- pos.spc + nrow (fbuf) + + tmp <- fbuf [fbuf[, ncol - 1] == wl [1], seq_len (ncol - 2), drop = FALSE] + + data [pos.data + seq_len (nrow (tmp)), ] <- tmp + pos.data <- pos.data + nrow (tmp) + + fbuf <- matrix (scan (file, quiet = TRUE, nlines = nlines), ncol = ncol, + byrow = TRUE) + + if (length (fbuf > 0) & ! all(unique (fbuf[, ncol - 1]) %in% wl)) + stop ("Wavelengths do not correspond to that of the other chunks. ", + "Is the size of the first chunk large enough to cover a complete ", + "spectrum?") + } + if (nlines > 0) cat ("\n") + + spc <- matrix (spc, ncol = length (wl), nrow = nspc, byrow = TRUE) + + spc <- orderwl (new ("hyperSpec", spc = spc, data = as.data.frame (data), + wavelength = wl, label = cols)) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} + +.test (read.txt.Renishaw) <- function(){ + context("read.txt.Renishaw") + + test_that("single spectrum", { + skip_if_not_fileio_available () + tmp <- read.txt.Renishaw ("fileio/txt.Renishaw/paracetamol.txt", "spc") + expect_equal(dim (tmp), c(nrow = 1L, ncol = 2L, nwl = 4064L)) + }) + + test_that("time series spectrum, gzipped", { + skip_if_not_fileio_available () + tmp <- read.txt.Renishaw ("fileio/txt.Renishaw/laser.txt.gz", "ts") + expect_equal(dim (tmp), c(nrow = 84L, ncol = 3L, nwl = 140L)) + expect_equal(colnames (tmp), c("t", "spc", "filename")) + }) + + test_that("map (= default)", { + skip_if_not_fileio_available () + tmp <- read.txt.Renishaw ("fileio/txt.Renishaw/chondro.txt", "xyspc") + expect_equal(dim (tmp), c(nrow = 875L, ncol = 4L, nwl = 1272L)) + expect_equal(colnames (tmp), c("y", "x", "spc", "filename")) + + tmp <- read.txt.Renishaw ("fileio/txt.Renishaw/chondro.txt") + expect_equal(dim (tmp), c(nrow = 875L, ncol = 4L, nwl = 1272L)) + expect_equal(colnames (tmp), c("y", "x", "spc", "filename")) + + }) + + test_that("chunked reading", { + skip_if_not_fileio_available () + + ## error on too small chunk size + expect_error (read.txt.Renishaw ("fileio/txt.Renishaw/chondro.txt", nlines = 10), + "Wavelengths do not correspond") + + tmp <- read.txt.Renishaw ("fileio/txt.Renishaw/chondro.txt", nlines = 1e5) + expect_equal(dim (tmp), c(nrow = 875L, ncol = 4L, nwl = 1272L)) + }) + + test_that("compressed files",{ + skip_if_not_fileio_available () + + files <- Sys.glob("fileio/txt.Renishaw/chondro.*") + files <- grep ("[.]zip", files, invert = TRUE, value = TRUE) # .zip is tested with read.zip.Renishaw + for (f in files){ + expect_equal(dim (read.txt.Renishaw (!!f)), c(nrow = 875L, ncol = 4L, nwl = 1272L)) + } + }) +} + +##' @export +##' @param txt.file name of the .txt file in the .zip archive. Defaults to zip +##' file's name with suffix .txt instead of .zip +##' @rdname read.txt.Renishaw +read.zip.Renishaw <- function (file = stop ("filename is required"), + txt.file = sub ("[.]zip", ".txt", basename (file)), ...){ + read.txt.Renishaw (file = unz (file, filename = txt.file, "r"), ...) +} + +.test (read.zip.Renishaw) <- function(){ + context("read.zip.Renishaw") + + test_that("compressed files",{ + skip_if_not_fileio_available () + + expect_equal(dim (read.zip.Renishaw ("fileio/txt.Renishaw/chondro.zip")), c(nrow = 875L, ncol = 4L, nwl = 1272L)) + }) +} diff --git a/R/read.txt.Shimadzu.R b/R/read.txt.Shimadzu.R new file mode 100644 index 00000000..5e54f128 --- /dev/null +++ b/R/read.txt.Shimadzu.R @@ -0,0 +1,204 @@ + ##' Reads Shimadzu GCxGC-qMS - Spectra Files (.txt) as exported by Shimadzu Chrome Solution (v. 2.72) + ##' Mass Spectrometer: Shimadzu GCMS-QP 2010 Ultra (www.shimadzu.com) + ##' + ##' @note This is a first rough import function and the functions may change without notice. + ##' @param filename file name and path of the .txt file + ##' @param encoding encoding of the txt file (used by \code{\link[base]{readLines}}) + ##' @param quiet suppress printing of progress + ##' @return list of spectra tables + ##' @author Bjoern Egert + ##' @export + ##' @importFrom utils read.table + read.txt.Shimadzu <- function(filename, encoding = "", quiet = TRUE) + { + + # A file consists of several sections ([Headers]) + # Each Section consists of: + # [Header] + # [MS Spectrum] + # [MC Peak Table] + # [MS Similarity Search Results for Spectrum Process Table] + + impLines <- readLines(con = filename, n = -1L, ok = TRUE, warn = TRUE, encoding = encoding) + length(impLines) + + # total numbers of pos1 and pos2 and pos3 are equal + pos1 <- which(impLines == "[Header]") # row positions of Headers + pos2 <- which(impLines == "[MC Peak Table]") # row positions of peak info tables + pos3 <- which(impLines == "[MS Similarity Search Results for Spectrum Process Table]") # row positions of peak annotations + pos4 <- which(impLines == "[MS Spectrum]") # row positions of peak spectra + + headers <- length(pos1) # number of header sections + + # link spectra to headers + pos4Li = list() + for (i in 1:(length(pos1))) + { + header <- pos1[i] + headerNext <- pos1[i+1] + tmp <- (pos4>header & pos4headerLast + pos4Li[[length(pos1)]] = pos4[tmp] + + for (i in 1:(length(pos4Li)-1)) + { + tmp <- length(pos4Li[[i]]) + vec <- pos4Li[[i]] + pos4Li[[i]] <- c(vec,pos1[i+1]) + } + #End position + pos4Li[[headers]] <- c(pos4Li[[headers]], length(impLines)) + + # Check + stopifnot(impLines[1] == "[Header]") + stopifnot(length(pos1) == length(pos2)) + stopifnot(length(pos2) == length(pos3)) + stopifnot(length(pos3) == length(pos4Li)) + + + # ----------------- 1. Import: gather [Header] informations + + # gather in lists + res2Li <- list() # Peak Info + res3Li <- list() # Peak Similarity + res4Li <- list() # Mass Spectra + + for(header in 1:headers) + { + + if(!quiet) cat("header:", header, "\n") + + # ----------------- 1a. Import: "[MC Peak Table]" + + start <- pos2[header]+3 + stop <- pos3[header]-2 + + peakMat <- read.table(file = filename, skip = start-1, nrows = stop - start, + header = TRUE, sep = ";", dec =".", comment.char = "", + stringsAsFactors = FALSE, quote = "\"'") + + # maybe faster than above ... + #Peak#;Ret.Time;Proc.From;Proc.To;Mass;Area;Height;A/H;Conc.;Mark;Name;Ret. Index" + #colnames <- strsplit(impLines[start], split = ";")[[1]] + #strsplit(impLines[(start+1):stop], split = ";")[[1]] + #peakMat <- matrix(impLines[(start+1):stop], ncol = length(colnames), byrow = TRUE) + + res2Li[[header]] <- peakMat + + # ----------------- 1b. Import: "[MS Similarity Search Results for Spectrum Process Table]" + + start <- pos3[header]+2 + stop <- pos4Li[[header]][1]-1 + if(stop-start!=0) # no annotation hits + { + simMat <- read.table(file = filename, skip = start - 1, , nrows = stop - start, + header = TRUE, sep = ";", dec =".", comment.char = "", + stringsAsFactors = FALSE, quote = "") # quote = "\"'" + }else simMat <- NA + res3Li[[header]] <- simMat + + # ----------------- 1c. Import: "[MS Spectrum]" + + specLi <- list() # list of all spectra in current header section + for(i in 1:(length(pos4Li[[header]])-1)) + { + + # extract spectra + start <- pos4Li[[header]][i]+5 # data starts 5 rows below + stop <- pos4Li[[header]][i+1]-1 # last data row before new Spectrum + + # debug purposes + if(! quiet) cat("header:", header, "spec:", i, "start:", start, "stop:", stop, "\n") + + # ------------------- + # Catch expeption, when peak is reported in peakMat, but [MS Spectrum] is not available: like example: (keyrow: # of Peaks; 0) + # [MS Spectrum] + # # of Peaks;0 + # Raw Spectrum;38.343 (scan : 68686);Base Peak;m/z 0.00 (Inten : 0) + # Background;38.342 <-> 38.348 (scan : 68684 <-> 68697) + # m/z;Absolute Intensity;Relative Intensity + # [MS Spectrum] + + checkRow <- pos4Li[[header]][i]+1 # check for: "# of Peaks;0" + isEmptySpec <- impLines[checkRow] == "# of Peaks;0" + if(isEmptySpec) + { + # special case: create dummy spectrum with zero intensities + spec <- cbind("m/z" = 1:100, "Absolute Intensity" = 0, "Relative Intensity" = 0) + + } + # ------------------- + + if(!isEmptySpec) + { + spec <- scan(file = filename, sep = ";", skip = start - 1, nlines = (stop - start) + 1, + dec = ".", quiet = TRUE) + spec <- matrix(spec,ncol = 3, byrow = T) + colnames(spec) <- c("m/z", "Absolute Intensity", "Relative Intensity") + } + + specLi[[i]] <- spec + + } + + res4Li[[header]] <- specLi + + } # for(headers) + + + # ----------------- 2. combine all headers sections + + # res2Li --> m2 + m2 <- as.data.frame(res2Li[[1]]) + m2 <- cbind(header = 1, m2) + for(header in 2:headers) + { + tmp <- as.data.frame(res2Li[[header]]) + tmp <- cbind(header=header,tmp) + m2 <- rbind(m2,tmp) + } + + # res3Li --> m3 + # In a header section there may be not annotation tables + m3 <- do.call("rbind", res3Li) + # add header Nr. + tmpMat <- lapply(X = res3Li, FUN = nrow) + tmpMat <- as.matrix(tmpMat) + tmp <- vector(length = 0) + for(i in 1:nrow(tmpMat)) + { + if(tmpMat[i,1] == "NULL" ) tmp <- c(tmp,i) + if(tmpMat[i,1] != "NULL" ) tmp <- c(tmp,rep(x = i, times = tmpMat[i,1])) + } + m3 <- cbind(header = tmp, m3) + m3 <- m3 [, c("header", "Spectrum.", "Hit..", "SI", "CAS..", "Name", "Mol.Weight", "Mol.Form", + "Retention.Index")] # select most important columns + tmp <- complete.cases(m3) + m3 <- m3[tmp,] + + # res4Li --> m4 + tmp <- colnames(res4Li[[1]][[1]]) + m4 <- matrix(NA, nrow = 1, ncol = length(tmp)) + colnames(m4) <- tmp + m4 <- cbind(header = NA, spectra = NA, m4) # header number and spectra number as first columns + for(header in 1:headers) + { + for(spectra in 1:length(res4Li[[header]])) + { + tmp <- as.matrix(res4Li[[header]][[spectra]]) + tmp <- cbind(header, spectra, tmp) + m4 <- rbind(m4, tmp) + }# spectras + }# header + mode(m4) <- "numeric" + m4 <- m4[-1,] + + return(list(peakInfo = m2, peakAnnotate = m3, peakMasses = m4)) + + } + diff --git a/R/read.txt.Witec.R b/R/read.txt.Witec.R new file mode 100644 index 00000000..66f741ed --- /dev/null +++ b/R/read.txt.Witec.R @@ -0,0 +1,434 @@ +##' Import Raman Spectra/Maps from Witec Instrument via ASCII files +##' +##' \code{read.txt.Witec} reads Witec ASCII files where the first column gives the wavelength +##' axes and the other columns the spectra. \code{read.dat.Witec} reads Witec's ASCII exported data +##' which comes in separate files with x and y data. +##' +##' Parameters `nwl` (automatically calculated now) and `remove.zerospc` +##' (use \code{\link{hy.setOptions} (file.remove.emptyspc = TRUE)} instead) have +##' been deprecated and removed. +##' +##' @title File Import Witec Raman +##' @param file filename or connection to ASCII file +##' @param points.per.line number of spectra in x direction of the map +##' @param lines.per.image number of spectra in y direction +##' @param type type of spectra: \code{single} for single spectra (including time series), \code{map} for imaging data. +##' @param hdr.label WITec Project exports the spectra names (contain information of map position or number of spectra) within the \code{file}. +##' @param hdr.units WITec Project exports the spectra units within the \code{file}. +##' @param encoding character encoding, see \code{\link[base]{readLines}} +##' @param ...,quiet handed to \code{\link[base]{scan}} +##' @return a hyperSpec object +##' @author Claudia Beleites and Marcel Dahms +##' @seealso \code{vignette ("fileio")} for more information on file import and +##' +##' \code{\link{options}} for details on options. +##' @export +##' @importFrom utils head +read.txt.Witec <- function (file = stop ("filename or connection needed"), + points.per.line = NULL, + lines.per.image = NULL, + type = c ("single", "map"), + hdr.label = FALSE, + hdr.units = FALSE, + encoding = "unknown", + ..., + quiet = TRUE){ + + ## check for valid data connection + .check.con (file = file) + + ## check for valid input + type <- .check.valid (type, hdr = NULL, points.per.line, lines.per.image) + + ## manage possible header lines by export function 'Table' in WITec Control/Project (version 4) + skip <- hdr.label + hdr.units + + ## read spectra + tmp <- readLines (file, encoding = encoding) + nwl <- length (tmp) - skip + txt <- scan (text = tmp, skip = skip, quiet = quiet, encoding = encoding, ...) + + dim (txt) <- c (length (txt) / nwl, nwl) + + hdr <- head (tmp, skip) + + ## fix: Witec/Andor may have final comma without values + if (all (is.na (txt [nrow (txt), ]))) + txt <- txt [- nrow (txt), ] + + spc <- new ("hyperSpec", wavelength = txt [1, ], spc = txt [-1, ]) + + ## add header information + if (hdr.label | hdr.units) + spc <- .parse.hdr (spc, hdr, hdr.label) + + ## add map information + if (type == "map") + spc <- .parse.xy (spc, hdr, hdr.label, points.per.line, lines.per.image) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, file) +} + +.test (read.txt.Witec) <- function (){ + context ("read.txt.Witec") + + test_that("Map with neither header nor label lines", { + skip_if_not_fileio_available() + expect_error (suppressWarnings (read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt", + type = "map", hdr.units = TRUE, hdr.label = TRUE) + )) + expect_warning (read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt", type = "map")) + + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt", type = "map", points.per.line = 5, lines.per.image = 5) + expect_known_hash (spc, hash = "6816a87cf3") + }) + + test_that("Map: one of points.per.line and lines.per.image is sufficient", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt", type = "map", lines.per.image = 5) + expect_known_hash (spc, hash = "6816a87cf3") + + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt", type = "map", points.per.line = 5) + expect_known_hash (spc, hash = "6816a87cf3") + }) + + test_that("Map with label line but no units header", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_label.txt", type = "map", hdr.units = FALSE, hdr.label = TRUE) + expect_known_hash(spc, "c4a384d6b2") + }) + + test_that("Map with units header line but no labels", { + skip_if_not_fileio_available() + expect_warning (spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_unit.txt", type = "map", hdr.units = TRUE, hdr.label = FALSE)) + expect_null(spc$x) + expect_null(spc$y) + + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_unit.txt", type = "map", hdr.units = TRUE, hdr.label = FALSE, + points.per.line = 5, lines.per.image = 5) + expect_known_hash(spc, "86ecc17360") + }) + + test_that("Map with header and label lines", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_full.txt", type = "map", hdr.units = TRUE, hdr.label = TRUE) + expect_known_hash(spc, "76db6397fc") + }) + + test_that ("Map can be read as time series", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-Map_no.txt") + expect_known_hash(spc, "6213aefc6b") + expect_null(spc$x) + expect_null(spc$y) + }) + + + test_that ("parameter default type = 'single'", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_no.txt") + expect_known_hash(spc, "1a8c3be079") + }) + + test_that("Time series with neither header nor label lines", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_no.txt") + expect_known_hash(spc, "1a8c3be079") + }) + + test_that("Time series with label line but no units header", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_label.txt", hdr.units = FALSE, hdr.label = TRUE) + expect_known_hash(spc, "4cb098a671") + }) + + test_that("Time series with units header line but no labels", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_unit.txt", hdr.units = TRUE, hdr.label = FALSE) + + expect_known_hash(spc, "6b6abac4e8") + }) + + test_that("Time series with header and label lines", { + skip_if_not_fileio_available() + expect_error (spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_full.txt")) + + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_full.txt", hdr.units = TRUE, hdr.label = TRUE) + expect_known_hash(spc, "db5b1a5db0") + }) + + test_that("encoding", { + skip_if_not_fileio_available() + spc <- read.txt.Witec("fileio/txt.Witec/Witec-timeseries_full.txt", hdr.units = TRUE, hdr.label = TRUE, + encoding = "ascii") + expect_known_hash(spc, "db5b1a5db0") + }) +} + +##' @rdname read.txt.Witec +##' @param filex filename wavelength axis file +##' @param filey filename intensity file +##' @export +read.dat.Witec <- function (filex = stop ("filename or connection needed"), + filey = sub ("-x", "-y", filex), + points.per.line = NULL, + lines.per.image = NULL, + type = c ("single", "map"), + encoding = "unknown", + ..., + quiet = hy.getOption ("debuglevel") < 1L){ + ## check valid data connection + .check.con (filex = filex, filey = filey) + + ## check valid input + type <- .check.valid (type = type, points.per.line = points.per.line, + lines.per.image = lines.per.image) + + ## read data + wl <- scan (file = filex, ..., quiet = quiet, encoding = encoding) + spc <- scan (file = filey, ..., quiet = quiet, encoding = encoding) + + dim (spc) <- c (length (wl), length (spc) / length (wl)) + + spc <- new ("hyperSpec", wavelength = wl, spc = t (spc)) + + ## add map information + if (type == "map") + spc <- .parse.xy (spc = spc, points.per.line = points.per.line, lines.per.image = lines.per.image) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filey) +} + +.test (read.dat.Witec) <- function (){ + context("read.dat.Witec") + + test_that ("-y file guessing", { + skip_if_not_fileio_available() + spc <- read.dat.Witec("fileio/txt.Witec/Witec-timeseries-x.dat") + expect_known_hash(spc, "9562f59323") + }) + + test_that ("encoding", { + skip_if_not_fileio_available() + spc <- read.dat.Witec("fileio/txt.Witec/Witec-timeseries-x.dat", encoding = "ascii") + expect_known_hash(spc, "9562f59323") + }) + + test_that ("Time series", { + skip_if_not_fileio_available() + spc <- read.dat.Witec("fileio/txt.Witec/Witec-timeseries-x.dat", "fileio/txt.Witec/Witec-timeseries-y.dat") + expect_known_hash(spc, "9562f59323") + }) + + test_that ("Map: .dat does not have spatial information", { + skip_if_not_fileio_available() + spc <- read.dat.Witec("fileio/txt.Witec/Witec-Map-x.dat", "fileio/txt.Witec/Witec-Map-y.dat") + expect_null(spc$x) + expect_null(spc$y) + expect_known_hash(spc, "8a7ed06b0b") + }) + + test_that ("Map", { + skip_if_not_fileio_available() + expect_warning(read.dat.Witec("fileio/txt.Witec/Witec-Map-x.dat", "fileio/txt.Witec/Witec-Map-y.dat", + points.per.line = 5, lines.per.image = 5) + ) + + spc <- read.dat.Witec("fileio/txt.Witec/Witec-Map-x.dat", "fileio/txt.Witec/Witec-Map-y.dat", + type = "map", points.per.line = 5, lines.per.image = 5) + expect_known_hash(spc, "3d6339675b") + }) + + +} + + +##' @rdname read.txt.Witec +##' @param headerfile filename or connection to ASCII file with header information +##' @export +read.txt.Witec.Graph <- function (headerfile = stop ("filename or connection needed"), + filex = gsub ("Header", "X-Axis", headerfile), + filey = gsub ("Header", "Y-Axis", headerfile), + type = c ("single", "map"), encoding = "unknown", + ..., quiet = TRUE){ + ## check for valid data connection + .check.con (headerfile, filex, filey) + + ## processing headerfile + hdr <- read.ini (headerfile, skip = 1, encoding = encoding) + hdr <- sapply (hdr, function (x) unlist (x, recursive = FALSE)) # returns a matrix with colnames and rownames for better adressing + + ## check valid input + type <- .check.valid (type = type, hdr = hdr, + ...) + + ## read spectra and header + wl <- scan (filex, quiet = quiet, encoding = encoding) + nwl <- length (wl) + + txt <- scan (filey, quiet = quiet, encoding = encoding) + dim (txt) <- c (nwl, length (txt) / nwl) + + spc <- new ("hyperSpec", wavelength = wl, spc = t (txt)) + + ## cross validation of parameters and information provided by header file + if (nwl != hdr["SizeGraph", ]) + stop (paste ("length of wavelength axis in file '", filex, + "' differs from 'SizeGraph' in header file '", headerfile, "'", sep ="")) + + ## add header information + spc <- .parse.hdr (spc, hdr) + + ## add map information + if (type == "map") + spc <- .parse.xy (spc, hdr, ...) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filex) +} + +.test (read.txt.Witec.Graph) <- function (){ + context ("read.txt.Witec.Graph") + + test_that ("defaults and (X-Axis)/(Y-Axis) file guessing", { + skip_if_not_fileio_available() + spc <- read.txt.Witec.Graph("fileio/txt.Witec/Witec-timeseries (Header).txt") + expect_known_hash(spc, "295499c43c") + }) + + test_that ("encoding", { + skip_if_not_fileio_available() + expect_warning(read.txt.Witec.Graph("fileio/txt.Witec/nofilename (Header).txt")) + + spc <- read.txt.Witec.Graph("fileio/txt.Witec/nofilename (Header).txt", encoding = "latin1") + expect_known_hash(spc, "2bad36adb3") + }) + + test_that ("Time Series", { + skip_if_not_fileio_available() + spc <- read.txt.Witec.Graph("fileio/txt.Witec/Witec-timeseries (Header).txt", type = "single") + expect_known_hash(spc, "295499c43c") + }) + + test_that ("Map", { + skip_if_not_fileio_available() + expect_warning (read.txt.Witec.Graph("fileio/txt.Witec/Witec-Map (Header).txt")) + expect_warning (read.txt.Witec.Graph("fileio/txt.Witec/Witec-Map (Header).txt", type = "single")) + + spc <- read.txt.Witec.Graph("fileio/txt.Witec/Witec-Map (Header).txt", type = "map") + expect_known_hash(spc, "cb9cd9757a") + }) + + test_that("missing filename", { + skip_if_not_fileio_available() + spc <- read.txt.Witec.Graph("fileio/txt.Witec/nofilename (Header).txt", encoding = "latin1") + expect_known_hash(spc, "2bad36adb3") + }) + + test_that ("wrong combination of file names", { + skip_if_not_fileio_available() + expect_error (read.txt.Witec.Graph("fileio/txt.Witec/Witec-timeseries (Header).txt", "fileio/txt.Witec/Witec-timeseries (Y-Axis).txt")) + }) + +} + +### -------- helpers ------------------------ + +###checking file connection +.check.con <- function (headerfile, filex, filey, file){ + ## check for valid data connection + if (!missing (headerfile) && !file.exists (headerfile)) + stop ("Header file not found!") + + if (!missing (filex) && !file.exists (filex)) + stop ("Wavelength axis file not found!") + + if (!missing (filey) && !file.exists (filey)) + stop ("Intensity file not found!") + + if (!missing (file) && !file.exists (file)) + stop ("Spectra file not found!") +} + +###checking for valid input +.check.valid <- function (type, hdr, points.per.line, lines.per.image){ + ## check valid input + type <- match.arg (type, c ("single", "map")) + + if (type == "single" && !missing (points.per.line) && !is.null (points.per.line) && points.per.line != 1)#TODO: better to prove for values > 1? + warning ("points.per.line != 1 given for single spectrum") + + if (type == "single" && !missing (lines.per.image) && !is.null (lines.per.image) && lines.per.image != 1)#TODO: see above + warning ("lines.per.image != 1 are defined for single spectrum") + + if (type == "single" && !missing (hdr) && !is.null (hdr) && hdr ["SizeY", ] != 1) + warning ("header provides spatial information in y direction for single spectra") + + return (type) +} + +### parsing header information +.parse.hdr <- function (spc, hdr, hdr.label) { + if (!missing (hdr) && !missing (hdr.label)){ + hdr <- strsplit (hdr, "\t") + + if (length (hdr) == 2){ + spc@data$spcname <- hdr [[1]][-1] + labels (spc, ".wavelength") <- hdr [[2]] [1] + labels (spc, "spc") <- unique (hdr [[2]] [-1]) + } else if (length (hdr) == 1 && hdr.label){ + spc@data$spcname <- hdr [[1]][-1] + } else { + labels (spc, ".wavelength") <- hdr [[1]] [1] + labels (spc, "spc") <- unique (hdr [[1]] [-1]) + } + } + + if (!missing (hdr) && missing (hdr.label)){ + spc@data$spcname <- hdr ["GraphName", ] + if ("FileName" %in% rownames (hdr)) + spc@data$WIPname <- hdr ["FileName", ] + labels (spc, "spc") <- hdr ["DataUnit", ] + } + return (spc) +} + +### parsing map information +.parse.xy <- function (spc, hdr, hdr.label, points.per.line, lines.per.image, ...){ + + ## set points.per.line and lines.per.image, if at least one is set unequal NULL + if (xor (!missing (points.per.line) && !is.null (points.per.line), + !missing (lines.per.image) && !is.null(lines.per.image))){ + if ((missing (points.per.line) || is.null (points.per.line)) && + !is.null (lines.per.image)) { + points.per.line <- nrow (spc) / lines.per.image + } else { + lines.per.image <- nrow (spc) / points.per.line + } + + } else if (!missing (points.per.line) && !missing (lines.per.image) && + is.null (points.per.line) && is.null (points.per.line) && + !missing (hdr.label) && hdr.label) {#TODO: only read, if not yet calculated? + x <- sub ("^.*\\(([[:digit:]]+)/[[:digit:]]+\\)$", "\\1", hdr[1]) + y <- sub ("^.*\\([[:digit:]]+/([[:digit:]]+)\\)$", "\\1", hdr[1]) + points.per.line <- as.numeric (x) + 1 + lines.per.image <- as.numeric (y) + 1 + } else if ((missing (points.per.line) || missing (lines.per.image)) && + !missing (hdr) && missing (hdr.label)){#TODO: only read, if not yet calculated? + points.per.line <- as.numeric (hdr ["SizeX", ]) + lines.per.image <- as.numeric (hdr ["SizeY", ]) + } else if (is.null (points.per.line) && is.null (lines.per.image)) { + warning ("no spatial information provided") + return (spc) + } + + if (points.per.line * lines.per.image == nrow (spc)){ + spc@data$x <- rep (seq_len (points.per.line), lines.per.image) + spc@data$y <- rep (- seq_len (lines.per.image), each = points.per.line) + } else + warning ("number of spectra and number of points in map are not equal!") + + return (spc) +} diff --git a/R/read.txt.long.R b/R/read.txt.long.R new file mode 100644 index 00000000..805c850c --- /dev/null +++ b/R/read.txt.long.R @@ -0,0 +1,172 @@ +### --------------------------------------------------------------------------- +### +### read.txt.long: import measurements from .txt file +### +### Format: +### (y x) wl int +### + +##' Import and Export of hyperSpec objects +##' Besides \code{\link[base]{save}} and \code{\link[base]{load}}, two general +##' ways to import and export data into \code{hyperSpec} objects exist. +##' +##' Firstly, hyperSpec objects can be imported and exported as ASCII files. +##' +##' A second option is using the package \code{\link[R.matlab]{R.matlab}} which +##' provides the functions \code{\link[R.matlab]{readMat}} and +##' \code{\link[R.matlab]{writeMat}}. +##' +##' hyperSpec comes with a number of pre-defined functions to import +##' manufacturer specific file formats. For details, see \code{vignette +##' ("file-io")}. +##' +##' \code{\link[hyperSpec]{read.spc}} imports Thermo Galactic's .spc file +##' format, and ENVI files may be read using +##' \code{\link[hyperSpec]{read.ENVI}}. +##' +##' These functions are very flexible and provide lots of arguments. +##' +##' If you use them to read or write manufacturer specific ASCII formats, +##' please consider writing a wrapper function and contributing this function +##' to \pkg{hyperSpec}. An example is in the \dQuote{flu} vignette (see +##' \code{vignette ("flu", package = "hyperSpec"}). +##' +##' Note that R accepts many packed formats for ASCII files, see +##' \code{\link[base]{connections}}. For .zip files, see +##' \code{\link[utils]{unzip}}. +##' +##' For further information, see the examples below and the documentation of +##' \code{\link[R.matlab]{R.matlab}}. +##' +##' @aliases read.txt.long import export +##' @param file filename or connection +##' @param cols the column names specifying the column order. +##' +##' For data import, a list with elements \code{colname = label}; for export a +##' character vector with the colnames. Use \code{wavelength} to specify the +##' wavelengths. +##' @param header the file has (shall have) a header line +##' @param ... arguments handed to \code{\link[utils]{read.table}} and +##' \code{\link[utils]{write.table}}, respectively. +##' @param decreasing logical vector giving the sort order +##' @author C. Beleites +##' @seealso \code{\link[utils]{read.table}} and +##' \code{\link[utils]{write.table}} +##' +##' \code{\link[R.matlab]{R.matlab}} for .mat files +##' +##' \code{\link[hyperSpec]{read.ENVI}} for ENVI data +##' +##' \code{\link[hyperSpec]{read.spc}} for .spc files +##' +##' Manufacturer specific file formats: \code{\link{read.txt.Renishaw}} +##' @rdname textio +##' @keywords IO file +##' @export +##' @examples +##' +##' +##' \dontrun{vignette ("file-io")} +##' +##' ## export & import matlab files +##' if (require (R.matlab)) { +##' # export to matlab file +##' writeMat (paste0 (tempdir(), "/test.mat"), +##' x = flu[[]], wavelength = flu@@wavelength, +##' label = lapply (flu@@label, as.character)) +##' +##' # reading a matlab file +##' data <- readMat (paste0 (tempdir(), "/test.mat")) +##' print (data) +##' mat <- new ("hyperSpec", spc = data$x, +##' wavelength = as.numeric(data$wavelength), +##' label = data$label[,,1]) +##' } +##' +##' ## ascii export & import +##' +##' +##' write.txt.long (flu, +##' file = paste0 (tempdir(), "/flu.txt"), +##' cols = c(".wavelength", "spc", "c"), +##' order = c("c", ".wavelength"), +##' decreasing = c(FALSE, TRUE)) +##' +##' read.txt.long (file = paste0 (tempdir(), "/flu.txt"), +##' cols = list (.wavelength = expression (lambda / nm), +##' spc = "I / a.u", c = expression ("/" (c, (mg/l))))) +##' +##' write.txt.wide (flu, file = paste0 (tempdir(), "/flu.txt"), +##' cols = c("c", "spc"), +##' col.labels = TRUE, header.lines = 2, row.names = TRUE) +##' +##' write.txt.wide (flu, file = paste0 (tempdir(), "/flu.txt"), +##' col.labels = FALSE, row.names = FALSE) +##' +##' read.txt.wide (file = paste0 (tempdir(), "/flu.txt"), +##' # give columns in same order as they are in the file +##' cols = list (spc = "I / a.u", +##' c = expression ("/"("c", "mg/l")), +##' filename = "filename", +##' # plus wavelength label last +##' .wavelength = "lambda / nm"), +##' header = TRUE) +##' +##' +##' @importFrom utils read.table unstack +read.txt.long <- function (file = stop ("file is required"), + cols = list ( + .wavelength = expression (lambda / nm), + spc = "I / a.u." + ), + header = TRUE, + ...) { + txtfile <- read.table (file = file, header = header, ...) + + if (header) { + cln <- match (colnames (txtfile), names (cols)) + cln <- cols[cln] + names (cln) <- colnames (txtfile) + cols <- cln + rm (cln) + } else { + if (ncol (txtfile) != length (cols)) { + warning (paste ( + "cols does not correspond to the columns in", file, + ". Guessing remaining columns." + )) + cols <- c (character (ncol (txtfile) - 2), cols) + } + } + + + if (is.na (match ("spc", names (cols)))) { + stop ("cols$spc must exist.") + } + + wavelength <- match (".wavelength", names (cols)) + if (is.na (wavelength)) { + stop ("cols$.wavelength must exist.") + } + + colnames (txtfile) <- names (cols) + + ## wavelength axis + wavelength <- as.numeric (levels (as.factor (txtfile$.wavelength))) + + spc <- as.matrix (unstack (txtfile, form = spc ~ .wavelength)) + if ((nrow (spc) == length (wavelength)) & (ncol (spc) != length (wavelength))) { + spc <- t (spc) + } + + colnames (spc) <- levels (txtfile$.wavelength) + + txtfile <- txtfile [txtfile$.wavelength == txtfile$.wavelength[1], ] + txtfile$.wavelength <- NULL + txtfile$spc <- I (spc) + + spc <- new ("hyperSpec", wavelength = wavelength, data = txtfile, labels = cols) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filename = file) +} diff --git a/R/read.txt.wide.R b/R/read.txt.wide.R new file mode 100644 index 00000000..dc84abe5 --- /dev/null +++ b/R/read.txt.wide.R @@ -0,0 +1,84 @@ +###----------------------------------------------------------------------------- +### +### read.txt.wide +### +### Format: +### x y ... int (wl1) int (wl2) ... int (wl p) z ... +### +##' Import/export of hyperSpec objects to/from ASCII files +##' A detailed discussion of hyperSpec's file import and export capabilities is given in vignette \dQuote{fileio}. +##' +##' Besides \code{\link[base]{save}} and \code{\link[base]{load}}, two general ways to import and +##' export data into \code{hyperSpec} objects exist. +##' +##' Firstly, hyperSpec objects can be imported and exported as ASCII files. +##' +##' A second option is using the package \code{\link[R.matlab]{R.matlab}} +##' which provides the functions \code{\link[R.matlab]{readMat}} and +##' \code{\link[R.matlab]{writeMat}}. +##' +##' hyperSpec comes with a number of pre-defined functions to import +##' manufacturer specific file formats. For details, see \code{vignette +##' ("fileio")}. +##' +##' \code{\link[hyperSpec]{read.spc}} imports Thermo Galactic's .spc file +##' format, and ENVI files may be read using +##' \code{\link[hyperSpec]{read.ENVI}}. +##' +##' These functions are very flexible and provide lots of arguments. +##' +##' If you use them to read or write manufacturer specific ASCII formats, +##' please consider writing a wrapper function and contributing this +##' function to \pkg{hyperSpec}. An example is in the \dQuote{flu} vignette +##' (see \code{vignette ("flu", package = "hyperSpec"}). +##' +##' Note that R accepts many packed formats for ASCII files, see +##' \code{\link[base]{connections}}. For .zip files, see \code{\link[utils]{unzip}}. +##' +##' For further information, see the examples below, \code{vignette ("fileio")} and the documentation +##' of \code{\link[R.matlab]{R.matlab}}. +##' @seealso \code{vignette ("fileio")} and \url{http://hyperspec.r-forge.r-project.org/blob/fileio.pdf}, +##' respectively +##' @aliases read.txt.wide +##' @rdname textio +##' @param check.names handed to \code{\link[utils]{read.table}}. Make sure this is \code{FALSE}, if +##' the column names of the spectra are the wavelength values. +##' @export +##' @importFrom utils read.table head +read.txt.wide <- function (file = stop ("file is required"), + cols = list ( + spc = "I / a.u.", + .wavelength = expression (lambda / nm)), + sep = '\t', + row.names = NULL, + check.names = FALSE, + ...){ + + .wavelength <- match (".wavelength", names (cols)) + if (is.na (.wavelength)) + cols <- as.list (c (cols, .wavelength = expression (lambda / nm))) + else + if (.wavelength != length (cols)) # .wavelength should be at the end of cols + cols <- cols [c (seq_along (cols)[-.wavelength], .wavelength)] + + ## columns containing the spectra + spc <- match ("spc", names (cols)) + if (is.na (spc)) + stop ("cols$spc must exist.") + + txtfile <- read.table (file = file, check.names = check.names, row.names = row.names, + sep = sep, ...) + + ispc <- 0 : (ncol (txtfile) - length (cols) + 1) + spc + + spc.data <- as.matrix (txtfile[, ispc]) + txtfile <- txtfile [, -ispc, drop = FALSE] + + ## enforce colnames given by cols + colnames (txtfile) <- head (names (cols) [-spc], -1) + + spc <- new ("hyperSpec", spc = spc.data, data = txtfile, labels = cols) + + ## consistent file import behaviour across import functions + .fileio.optional (spc, filename = file) +} diff --git a/R/regexps.R b/R/regexps.R new file mode 100644 index 00000000..d38b8be7 --- /dev/null +++ b/R/regexps.R @@ -0,0 +1 @@ +.PATTERN.number <- "[-+]?(([0-9]*[.][0-9]+)|([0-9]+[.][0-9]*)|([0-9]+))([eE][-+]?[0-9])?" diff --git a/R/spc.NA.approx.R b/R/spc.NA.approx.R new file mode 100644 index 00000000..e3cbe60a --- /dev/null +++ b/R/spc.NA.approx.R @@ -0,0 +1,142 @@ +##' Impute missing data points +##' +##' Replace \code{NA}s in the spectra matrix by interpolation. With +##' less than 4 points available linear interpolation of the 2 neighbour points is used. For larger numbers of +##' neighbour points, smoothing interpolation is performed by +##' \code{\link[stats]{smooth.spline}}. +##' @note The function has been renamed from \code{spc.NA.linapprox} to \code{spc.NA.approx} +##' @param spc hyperSpec object with spectra matrix containing \code{NA}s +##' @param neighbours how many neighbour data points should be used to fit the +##' line +##' @param w,df,spar see \code{\link[stats]{smooth.spline}} +##' @param debuglevel see \code{\link[hyperSpec]{options}} +##' @return hyperSpec object +##' @export +##' @author Claudia Beleites +##' @examples +##' fluNA <- hyperSpec:::fluNA +##' spc.NA.approx (fluNA [,, min ~ 410], debuglevel = 1) +##' spc.NA.approx (fluNA [1,, min ~ 410], debuglevel = 2) +##' spc.NA.approx (fluNA [4,, min ~ 410], neighbours = 3, df = 4, debuglevel = 2) +spc.NA.approx <- function (spc, neighbours = 1, w = rep (1, 2 * neighbours), df = 1 + .Machine$double.eps, spar = NULL, + debuglevel = hy.getOption("debuglevel")){ + chk.hy (spc) + validObject (spc) + + all.na <- which (apply (is.na (spc@data$spc), 1, all)) + if (length (all.na) > 0){ + warning ("Spectra containing only NAs found. They will not be approximated.") + } + stopifnot (neighbours >= 1L) + + ispc <- which (is.na (spc@data$spc), arr.ind = TRUE) + + ispc <- setdiff (unique (ispc[,"row"]), all.na) + + if (debuglevel == 1L) + plot (spc [ispc], col = "gray") + + for (i in ispc){ + if (debuglevel == 2L) + plot (spc [i], col = "gray") + + nas <- which (is.na (spc@data$spc[i,])) + + start <- c (0, which (diff (nas) > 1)) + 1 + end <- c (start [-1] - 1, length (nas)) + + for (j in seq (along = start)) { + pts <- nas [start [j]] : nas [end [j]] + + xneighbours <- c ( -(1 : neighbours) + nas [start [j]], + (1 : neighbours) + nas [end [j]]) + mask <- xneighbours > 0 & xneighbours <= nwl (spc) + xneighbours <- xneighbours [mask] + + if (sum (mask) == 0) {# should not happen as all NA-only spectra were excluded + stop ("No data to interpolate from.") + + } else if (sum (mask) == 1) { + spc@data$spc [i, pts] <- spc@data$spc [i, xneighbours] + + if (debuglevel == 2L) + points (x = spc@wavelength [xneighbours], y = spc@data$spc [i, xneighbours]) + + } else if (sum (mask) < 4) { # old behaviour using linear interpolation + spc@data$spc [i, pts] <- approx (x = spc@wavelength [xneighbours], + y = spc@data$spc [i, xneighbours], + xout = spc@wavelength [pts], + method = "linear", + rule = 2)$y + if (debuglevel == 2L) + lines (x = spc@wavelength [xneighbours], y = spc@data$spc [i, xneighbours]) + } else {# more neighbours: interpolation spline + spline <- smooth.spline(x = spc@wavelength [xneighbours], + y = spc@data$spc [i, xneighbours], + w = w [mask], df = df, spar = spar, + cv = FALSE, all.knots = TRUE, keep.data = FALSE) + spc@data$spc [i, pts] <- predict (spline, x = spc@wavelength [pts])$y + + if (debuglevel == 2L) { + wlr <- seq (from = min (spc@wavelength [xneighbours]), + to = max (spc@wavelength [xneighbours]), + length.out = 100) + lines (predict (spline, wlr)) + } + + } + + if (debuglevel == 2L) + plot (spc [i,,xneighbours, wl.index = TRUE], add = TRUE, lines.args = list (type = "p", pch = 20), col = 1) + if (debuglevel >= 1L) + plot (spc [i,,pts, wl.index = TRUE], add = TRUE, lines.args = list (type = "p", pch = 20), col = 2) + } + } + + spc +} + +##' @rdname spc.NA.approx +##' @param ... ignored +spc.NA.linapprox <- function (...){ + stop ("spc.NA.linapprox has been renamed to spc.NA.approx") +} + +.test (spc.NA.approx) <- function (){ + context ("spc.NA.approx") + + test_that ("linear interpolation", { + tmp <- spc.NA.approx (fluNA [-2,, min ~ 410]) + expect_equivalent(as.numeric (tmp [[,, 406]]), rowMeans (fluNA [[-2,, 405.5 ~ 406.5]], na.rm = TRUE)) + }) + + test_that ("spline interpolation", { + tmp <- spc.NA.approx (fluNA [-2,, min ~ 410], neighbours = 2) + expect_true (all (abs (tmp [[,, 406]] - rowMeans (fluNA [[-2,, 405 ~ 407]], na.rm = TRUE)) <= 1e-5)) + # version on CRAN throws error on `expect_equal (tolerance = 1e-5)` + # TODO => change back ASAP + }) + + test_that ("edge treatment and debuglevel", { + ranges <- list (405 ~ 407, 405.5 ~ 406.5, 405.6 ~ 406) + for (d in 0 : 2) { + for (r in ranges) { + tmp <- spc.NA.approx (fluNA [-2,, r], neighbours = 3, debuglevel = d) + # expect_equal (round (as.numeric (tmp [[,, 406]]), 5), + # round (rowMeans (fluNA [[-2,, r]], na.rm = TRUE), 5), + # tolerance = 1e-5, + # info = paste0 ("debuglevel = ", d, ", range = ", paste0 (r [c (2, 1, 3)], collapse = ""))) + # version on CRAN throws error on `expect_equal (tolerance = 1e-5)` + # TODO => change back ASAP + expect_true (all (abs (tmp [[,, 406]] - rowMeans (fluNA [[-2,, r]], na.rm = TRUE)) <= 1e-5), + info = paste0 ("debuglevel = ", d, ", range = ", paste0 (r [c (2, 1, 3)], collapse = ""))) + } + } + }) + + + test_that ("spc.NA.linapprox deprecated",{ + expect_error(spc.NA.linapprox (fluNA)) + }) + +} diff --git a/R/spc.bin.R b/R/spc.bin.R new file mode 100644 index 00000000..73760db8 --- /dev/null +++ b/R/spc.bin.R @@ -0,0 +1,79 @@ +##' Wavelength Binning +##' In order to reduce the spectral resolution and thus gain signal to noise +##' ratio or to reduce the dimensionality of the spectral data set, the +##' spectral resolution can be reduced. +##' +##' The mean of every \code{by} data points in the spectra is calculated. +##' +##' Using \code{na.rm = TRUE} always takes about twice as long as \code{na.rm = FALSE}. +##' +##' If the spectra matrix does not contain too many \code{NA}s, \code{na.rm = 2} is faster than +##' \code{na.rm = TRUE}. +##' +##' @param spc the \code{hyperSpec} object +##' @param by reduction factor +##' @param na.rm decides about the treatment of \code{NA}s: +##' +##' if \code{FALSE} or \code{0}, the binning is done using \code{na.rm = FALSE} +##' +##' if \code{TRUE} or \code{1}, the binning is done using \code{na.rm = TRUE} +##' +##' if \code{2}, the binning is done using \code{na.rm = FALSE}, and resulting \code{NA}s are +##' corrected with \code{mean(\dots{}, na.rm = TRUE)}. +##' @param ... ignored +##' @return A \code{hyperSpec} object with \code{ceiling (nwl (spc) / by)} data points per spectrum. +##' @rdname spc-bin +##' @export +##' @author C. Beleites +##' @keywords manip datagen +##' @examples +##' +##' spc <- spc.bin (flu, 5) +##' +##' plot (flu[1,,425:475]) +##' plot (spc[1,,425:475], add = TRUE, col = "blue") +##' +##' nwl (flu) +##' nwl (spc) +##' +spc.bin <- function (spc, by = stop ("reduction factor needed"), na.rm = TRUE, ...) { + chk.hy (spc) + validObject (spc) + + n <- ceiling (nwl (spc) / by) + + small <- nwl (spc) %% by + if (small != 0) + warning (paste (c("Last data point averages only ", small, " points."))) + + bin <- rep (seq_len (n), each = by, length.out = nwl (spc)) + + na <- is.na (spc@data$spc) + + if ((na.rm > 0) && any (na)) { + if (na.rm == 1) { + na <- apply (!na, 1, tapply, bin, sum, na.rm = FALSE) + spc@data$spc <- t (apply (spc@data$spc, 1, tapply, bin, sum, na.rm = TRUE) / na) + } else { # faster for small numbers of NA + tmp <- t (apply (spc@data$spc, 1, tapply, bin, sum, na.rm = FALSE)) + tmp <- sweep (tmp, 2, rle (bin)$lengths, "/") + + na <- which (is.na (tmp), arr.ind = TRUE) + bin <- split (wl.seq (spc), bin) + + for (i in seq_len (nrow (na))){ + tmp [na [i, 1], na [i, 2]] <- mean (spc@data$spc [na [i, 1], bin [[na[i, 2]]]], na.rm = TRUE) + } + spc@data$spc <- tmp + } + } else { # considerably faster + spc@data$spc <- t (apply (spc@data$spc, 1, tapply, bin, sum, na.rm = FALSE)) + spc@data$spc <- sweep (spc@data$spc, 2, rle (bin)$lengths, "/") + } + + .wl (spc) <- as.numeric (tapply (spc@wavelength, bin, mean, na.rm = na.rm > 0)) + + validObject (spc) + spc +} + diff --git a/R/spc.fit.poly.R b/R/spc.fit.poly.R new file mode 100644 index 00000000..b996a93f --- /dev/null +++ b/R/spc.fit.poly.R @@ -0,0 +1,297 @@ +##' Polynomial Baseline Fitting +##' These functions fit polynomal baselines. +##' +##' Both functions fit polynomials to be used as baselines. If \code{apply.to} +##' is \code{NULL}, a \code{hyperSpec} object with the polynomial coefficients +##' is returned, otherwise the polynomials are evaluated on the spectral range +##' of \code{apply.to}. +##' +##' \code{spc.fit.poly} calculates the least squares fit of order +##' \code{poly.order} to the \emph{complete} spectra given in \code{fit.to}. +##' Thus \code{fit.to} needs to be cut appropriately. +##' +##' @rdname baselines +##' @concept baseline +##' @param fit.to \code{hyperSpec} object on which the baselines are fitted +##' @param apply.to \code{hyperSpec} object on which the baselines are evaluted +##' If \code{NULL}, a \code{hyperSpec} object containing the polynomial +##' coefficients rather than evaluted baselines is returned. +##' @param poly.order order of the polynomial to be used +##' @param offset.wl should the wavelength range be mapped to -> [0, delta wl]? +##' This enhances numerical stability. +##' @return \code{hyperspec} object containing the baselines in the spectra +##' matrix, either as polynomial coefficients or as polynomials evaluted on +##' the spectral range of \code{apply.to} +##' @author C. Beleites +##' @seealso \code{vignette ("baseline", package = "hyperSpec")} +##' @keywords manip datagen +##' @export +##' @examples +##' +##' \dontrun{vignette ("baseline", package = "hyperSpec")} +##' +##' spc <- chondro [1 : 10] +##' baselines <- spc.fit.poly(spc [,, c (625 ~ 640, 1785 ~ 1800)], spc) +##' plot(spc - baselines) +##' +spc.fit.poly <- function (fit.to, apply.to = NULL, poly.order = 1, offset.wl = ! (is.null (apply.to))){ + chk.hy (fit.to) + if (! is.null (apply.to)) + chk.hy (apply.to) + + validObject (fit.to) + validObject (apply.to) + + x <- fit.to@wavelength + + if (offset.wl){ + minx <- min (x) + x <- x - min (x) + } else { + minx <- 0 + } + + x <- vanderMonde (x, poly.order) # Vandermonde matrix of x + + p <- apply (fit.to, 1, + function (y, x){ + x <- x [! is.na (y),,drop = FALSE] + y <- y [! is.na (y)] + qr.solve (x, y) + }, + x) + + if (is.null (apply.to)){ + colnames (p@data$spc) <- paste0 ("(x - minx)^", 0 : poly.order) + + p$min.x = minx + return (p) + + } else { + wl <- apply.to@wavelength - minx + + x <- vanderMonde(wl, poly.order) # Vandermonde matrix of x + apply.to@data$spc <- I (t (apply (p[[]], 1, function (p, x) {x %*% p}, x))) + + validObject(apply.to) + + apply.to + } +} + +.test (spc.fit.poly) <- function (){ + context ("spc.fit.poly") + + test_that("no normalization", + bl.nonorm <- spc.fit.poly (flu, flu, poly.order = 3, offset.wl = FALSE) + ) + + # test effect of wavelength axis normalization + # was issue 1 on github + tmp <- flu + wl (tmp) <- wl (tmp) + 1e4 + + test_that("normalization/offset wavelengths", { + expect_error (spc.fit.poly(tmp, poly.order = 3, offset.wl = FALSE)) + + bl.1e4 <- spc.fit.poly(tmp, tmp, poly.order = 3, offset.wl = TRUE) + bl.nonorm <- spc.fit.poly (flu, flu, poly.order = 3, offset.wl = FALSE) + expect_equal (bl.nonorm [[]], bl.1e4 [[]]) + }) + + test_that("spectrum containing NA", { + tmp <- chondro [1] + tmp [[,, 1600]] <- NA + + coefs <- spc.fit.poly (tmp, apply.to = NULL) [[]] + expect_equal( + coefs, + spc.fit.poly(chondro [1,, !is.na (tmp)], apply.to = NULL) [[]] + ) + + ## bug was: all coefficients were silently 0 + expect_true (all (abs (coefs) > sqrt (.Machine$double.eps))) + }) + +} + +##' +##' \code{spc.fit.poly.below} tries to fit the baseline on appropriate spectral +##' ranges of the spectra in \code{fit.to}. For details, see the +##' \code{vignette ("baseline")}. +##' @rdname baselines +##' @param npts.min minimal number of points used for fitting the polynomial +##' @param noise noise level to be considered during the fit. It may be given +##' as one value for all the spectra, or for each spectrum separately. +##' @param max.iter stop at the latest after so many iterations. +##' @param stop.on.increase additional stopping rule: stop if the number of support points would increase, +##' regardless whether npts.min was reached or not. +##' @param debuglevel additional output: +##' \code{1} shows \code{npts.min}, +##' \code{2} plots support points for the final baseline of 1st spectrum, +##' \code{3} plots support points for 1st spectrum, +##' \code{4} plots support points for all spectra. +##' @seealso see \code{\link[hyperSpec]{options}} for more on \code{debuglevel} +##' @export +##' @examples +##' +##' baselines <- spc.fit.poly.below (spc) +##' plot (spc - baselines) +##' +##' spc.fit.poly.below(chondro [1:3], debuglevel = 1) +##' spc.fit.poly.below(chondro [1:3], debuglevel = 2) +##' spc.fit.poly.below(chondro [1:3], debuglevel = 3, noise = sqrt (rowMeans (chondro [[1:3]]))) +##' +spc.fit.poly.below <- function (fit.to, apply.to = fit.to, poly.order = 1, + npts.min = max (round (nwl (fit.to) * 0.05), 3 * (poly.order + 1)), + noise = 0, offset.wl = FALSE, max.iter = nwl (fit.to), + stop.on.increase = FALSE, + debuglevel = hy.getOption("debuglevel")){ + ## for debuglevel >= 2L + cols <- matlab.dark.palette(max.iter) + + chk.hy (fit.to) + if (! is.null (apply.to)) + chk.hy (apply.to) + + validObject (fit.to) + validObject (apply.to) + + if (missing (npts.min) && debuglevel >= 1L) + message ("Fitting with npts.min = ", npts.min, "\n") + + if (npts.min <= poly.order){ + npts.min <- poly.order + 1 + warning (paste ("npts.min too small: adjusted to", npts.min)) + } + + if (length (noise) == 1) + noise <- rep (noise, nrow (fit.to)) + + x <- fit.to@wavelength + + if (offset.wl){ + minx <- min (x) + x <- x - min (x) + } else { + minx <- 0 + } + + vdm <- vanderMonde (x, poly.order) + y <- t (fit.to [[]]) + + p <- matrix (nrow = nrow(fit.to) , ncol = poly.order + 1) + for (i in row.seq (fit.to)){ + use.old <- logical (nwl (fit.to)) + use <- !is.na (y [, i]) + + if (debuglevel %in% c(2L, 3L) && i == 1L || debuglevel >= 4L) { + plot(fit.to [i], title.args = list (main = paste ("spectrum", i))) + message ("start: ", sum (use, na.rm=TRUE), " support points") + } + + for (iter in 1 : max.iter) { + p[i,] <- qr.solve (vdm[use,], y[use, i]) + bl <- vdm %*% p [i,] + use.old <- use + use <- y[, i] < bl + noise [i] & !is.na (y [, i]) + + if (debuglevel == 3L && i == 1L || debuglevel >= 4L) { + plot (fit.to[i,, use], add = TRUE, lines.args = list (pch = 20, type = "p"), col= cols [iter]) + lines (fit.to@wavelength, bl, col = cols [iter]) + lines (fit.to@wavelength, bl + noise, col = cols [iter], lty = 2) + message ("Iteration ", iter, ": ", sum (use, na.rm=TRUE), " support points") + } + + if ((sum (use, na.rm=TRUE) < npts.min) || all (use == use.old, na.rm = TRUE)) + break + + if (sum (use, na.rm=TRUE) > sum (use.old, na.rm=TRUE) && stop.on.increase){ + warning("Iteration ", iter, ": Number of support points is about to increase again. Stopping with ", + sum (use.old, na.rm=TRUE), " support points, but this is a local minimum only.") + break + } + } + + if (iter == max.iter) + if ((sum (use.old, na.rm = TRUE) == npts.min) && + ! all (use == use.old, na.rm = TRUE) && + ! sum (use, na.rm = TRUE) < npts.min){ + warning("Reached npts.min, but the solution is not stable. Stopped after ", iter, " iterations.") + } else if (sum (use, na.rm=TRUE) >= npts.min) { + warning ("Stopped after ", iter, " iterations with ", sum (use.old, na.rm = TRUE), " support points.") + } + + if (debuglevel >= 1L) + message (sprintf ("spectrum % 6i: % 5i support points, noise = %0.1f, %3i iterations", i, sum (use.old, na.rm = TRUE), noise [i], iter)) + if ((debuglevel == 2L) && (i == 1L)){ + plot (fit.to[i,, use.old], add = TRUE, lines.args = list (pch = 20, type = "p"), col= cols [iter]) + lines (fit.to@wavelength, bl, col = cols [iter]) + lines (fit.to@wavelength, bl + noise, col = cols [iter], lty = 2) + } + + } + if (is.null (apply.to)){ + fit.to <- new("hyperSpec", spc=p, wavelength=0 : poly.order) + colnames (fit.to@data$spc) <- paste0 ("(x - minx)^", 0 : poly.order) + + validObject (fit.to) + + fit.to$min.x = minx + return (fit.to) + + } else { + x <- apply.to@wavelength - minx + + vdm <- vanderMonde(x, poly.order) # Vandermonde matrix of x + + apply.to@data$spc <- I (t (apply (p, 1, function (p, x) {x %*% p}, vdm))) + + validObject (apply.to) + + apply.to + } +} + +.test (spc.fit.poly.below) <- function (){ + context ("spc.fit.poly.below") + + test_that("no normalization", + bl.nonorm <- spc.fit.poly.below (flu, flu, poly.order = 3, offset.wl = FALSE, npts.min = 25) + ) + + # test effect of wavelength axis normalization + # was issue 1 on github + tmp <- flu + wl (tmp) <- wl (tmp) + 1e4 + + test_that("normalization/offset wavelengths", { + expect_error (spc.fit.poly.below (tmp, poly.order = 3, offset.wl = FALSE, npts.min = 25)) + + bl.1e4 <- spc.fit.poly.below (tmp, tmp, poly.order = 3, offset.wl = TRUE, npts.min = 25) + bl.nonorm <- spc.fit.poly.below (flu, flu, poly.order = 3, offset.wl = FALSE, npts.min = 25) + + expect_equal (bl.nonorm [[]], bl.1e4 [[]]) + }) + + test_that("requesting 2 support points working - issue #58", { + expect_warning (spc.fit.poly.below(chondro[103], npts.min = 2), "Stopped after") + expect_warning (spc.fit.poly.below(chondro[103], npts.min = 2, stop.on.increase = TRUE), "about to increase again") + }) + + test_that("spectrum containing NA", { + tmp <- chondro [1] + tmp [[,, 1600]] <- NA + + coefs <- spc.fit.poly.below(tmp, apply.to = NULL) [[]] + expect_equal( + coefs, + spc.fit.poly.below(chondro [1,, !is.na (tmp)], apply.to = NULL) [[]] + ) + + ## bug was: all coefficients were silently 0 + expect_true (all (abs (coefs) > sqrt (.Machine$double.eps))) + }) +} + + diff --git a/R/spc.identify.R b/R/spc.identify.R new file mode 100644 index 00000000..cf140607 --- /dev/null +++ b/R/spc.identify.R @@ -0,0 +1,276 @@ +##' Identifying Spectra and Spectral Data Points +##' This function allows to identify the spectrum and the wavelength of a point +##' in a plot produced by \code{\link{plotspc}}. +##' +##' This function first finds the spectrum with a point closest to the clicked +##' position (see \code{\link[graphics]{locator}}). The distance to the clicked +##' point is evaluated relative to the size of the tolerance window. +##' +##' In a second step, \code{max.fn} searches for the actual point to label +##' within the specified wavelength window of that spectrum. This allows to +##' label maxima (or minima) without demanding too precise clicks. Currently, +##' the following functions to determine the precise point: \tabular{ll}{ +##' spc.point.default \tab uses the clicked wavelength together with its +##' spectral intensity\cr spc.point.max \tab the point with the highest +##' intensity in the wavelength window \cr spc.point.min \tab the point with +##' the lowest intensity in the wavelength window \cr spc.point.sqr \tab +##' maximum of a parabola fit throug the point with highest intensity and the +##' two surrounding points \cr } \code{point.fn} is called with the arguments +##' \code{wl} containing the considered wavelength window, \code{spc} the +##' respective intensities of the closest spectrum, and \code{wlclick} the +##' wavelength that was clicked. They return a vector of two elements +##' (wavelength and intensity). +##' +##' As a last step, a label for the point produced by \code{formatter} and +##' plotted using \code{\link[graphics]{text}}. Currently, the following +##' \code{formatter}s are available: \tabular{ll}{ spc.label.default \tab +##' spectrum number, wavelength \cr spc.label.wlonly \tab wavelength\cr } +##' \code{formatter} functions receive the number of the spectrum \code{ispc}, +##' the wavelength \code{wl}, and the spectral intensity \code{spc} and produce +##' a character variable suitable for labelling. The predefined formatters +##' surround the label text by spaces in order to easily have an appropriate +##' offset from the point of the spectrum. +##' +##' The warning issued if no spectral point is inside the tolerance window may +##' be switched of by \code{warn = FALSE}. In that case, the click will produce +##' a row of \code{NA}s in the resulting data.frame. +##' +##' \code{spc.identify} uses option \code{debuglevel} to determine whether debugging output should be +##' produced. \code{debuglevel == 2} will plot the tolerance window for every clicked point, +##' \code{debuglevel == 1} will plot the tolerance window only if no data point was inside. See +##' \code{\link[hyperSpec:options]{hyperSpec options}} for details about retrieving and setting +##' options. +##' +##' You may want to adjust the plot's \code{ylim} to ensure that the labels are +##' not clipped. As a dirty shortcut, \code{xpd = NA} may help. +##' +##' @aliases spc.identify spc.label.default spc.label.wlonly spc.point.default +##' spc.point.max spc.point.min spc.point.sqr +##' @param x either the abscissa coordinates or the list returned by +##' \code{\link{plotspc}} +##' @param y the ordinate values. Giving \code{y} will override any values from +##' \code{x$y}. +##' @param wavelengths the wavelengths for the data points. Giving +##' \code{wavelengths} will override any values from \code{x$wavelengths}. +##' @param tol.wl,tol.spc tolerance in wavelength and spectral intensity to +##' search around the clicked point. See details. +##' @param point.fn \code{function (wl, spc, wlclick)} to determine the actual +##' point to label, see details. +##' @param formatter \code{function (i, wl, spc)} that produces the labels. If +##' \code{NULL}, no labels are displayed. +##' @param ... passed to \code{\link[graphics]{text}} in order to produce the +##' labels +##' @param cex,adj,srt see \code{\link[graphics]{par}} +##' @param warn Should the user be warned if no point is in the considered +##' window? In addition, see the discussion of option \code{debuglevel} in +##' the details. +##' +##' If \code{FALSE}, the resulting data.frame will have a row of \code{NA}s +##' instead. +##' @param delta \code{spc.point.sqr} fits the parabola in the window wlclick +##' \eqn{\pm}{+-} delta points. +##' @return a \code{data.frame} with columns \item{ispc}{spectra indices of the +##' identified points, i.e. the rows of the \code{hyperSpec} object that was +##' plotted. +##' +##' If \code{ispc} is given, \code{ispc [i]} is returned rather than \code{i}. +##' } \item{wavelengths}{the wavelengths of the identified points} +##' \item{spc}{the intensities of the identified points} +##' @author C. Beleites +##' @seealso \code{\link[graphics]{locator}}, \code{\link{plotspc}}, +##' \code{\link[hyperSpec:options]{hyperSpec options}} +##' +##' \code{\link{map.identify}} \code{\link{map.sel.poly}} +##' @keywords iplot +##' @rdname spc-identify +##' @export +##' @examples +##' +##' if (interactive ()){ +##' ispc <- sample (nrow (laser), 10) +##' ispc +##' +##' identified <- spc.identify (plotspc (laser[ispc])) +##' +##' ## convert to the "real" spectra indices +##' ispc [identified$ispc] +##' identified$wl +##' identified$spc +##' +##' ## allow the labels to be plotted into the plot margin +##' spc.identify (plotspc (laser[ispc]), ispc = ispc, xpd = NA) +##' +##' spc.identify (plotspc (paracetamol, xoffset = 1100, +##' wl.range = c (600 ~ 1700, 2900 ~ 3150)), +##' formatter = spc.label.wlonly) +##' +##' ## looking for minima +##' spc.identify (plot (-paracetamol, wl.reverse = TRUE), +##' point.fn = spc.point.min, adj = c (1, 0.5)) +##' +##' } +##' +spc.identify <- function (x, y = NULL, wavelengths = NULL, ispc = NULL, + tol.wl = diff (range (x)) / 200, + tol.spc = diff (range (y)) / 50, + point.fn = spc.point.max, # function to find the maximum + formatter = spc.label.default, # NULL: suppress labels + ..., cex = 0.7, adj = c (0, 0.5), srt = 90, # for the label text + warn = TRUE){ + + if (! interactive ()) + stop ("spc.identify works only on interactive graphics devices.") + + if (is.list (x)) { + if (is.null (wavelengths)) + wavelengths <- x$wavelengths + if (is.null (y)) + y <- x$y + x <- x$x + } + + debuglevel <- hy.getOption ("debuglevel") + + if ((length (x) != length (y)) | (length (x) != length (wavelengths))) + stop ("x, y, and wavelength need to have the same length.") + + if (is.null (ispc)) + ispc <- row (y) + else + ispc <- ispc[row(y)] + + pts <- data.frame (ispc = rep (NA, 50), wl = NA, spc = NA) + pos <- 1 + + while (! is.null (tmp <- locator (n = 1))){ + wl <- approx (x, wavelengths, tmp$x, rule = 2)$y # return wl_min / wl_max for outside pts. + + if (debuglevel == 2L) { + points (tmp$x, tmp$y, pch = ".", col = "red") + rect (tmp$x - tol.wl, tmp$y - tol.spc, tmp$x + tol.wl, tmp$y + tol.spc, + border = "red", col = NA) + } + + i.window <- wavelengths >= wl - tol.wl & # window to search for the closest spectrum + wavelengths <= wl + tol.wl & + y >= tmp$y - tol.spc & + y <= tmp$y + tol.spc + + if (! any (i.window)){ + if (warn) + warning ("No spectra in specified window.") + else + pos <- pos + 1 + + if (debuglevel == 1L) { + points (tmp$x, tmp$y, pch = ".", col = "red") + rect (tmp$x - tol.wl, tmp$y - tol.spc, tmp$x + tol.wl, tmp$y + tol.spc, + border = "red", col = NA) + } + + } else { + + ## find spectrum closest to clicked point. + ## x and y distances are scaled according to tolerance. + tmp <- ((wl - wavelengths [i.window]) / tol.wl)^2 + + ((tmp$y - y [i.window]) / tol.spc)^2 + tmp <- which (i.window) [which.min (tmp)] + + pts [pos, "ispc"] <- ispc [tmp] # closest spectrum; + # this will grow the data.frame if necessary + # no time concern with hand-clicked points + + ## search for the max (min) of spectrum pt within tmp$x +- tol.wl + i.window <- which (ispc == ispc [tmp] & + wavelengths >= wl - tol.wl & + wavelengths <= wl + tol.wl) + + pts [pos, 2 : 3] <- point.fn (wl = wavelengths [i.window], + spc = y [i.window], + wlclick = wl) + + ## label the point + if (! is.null (formatter)){ + lab <- formatter (pts [pos, 1], pts [pos, 2], pts [pos, 3]) + + text (approx (wavelengths, x, pts [pos, 2], rule = 2), + pts [pos, 3], labels = lab, cex = cex, adj = adj, srt = srt, ...) + } + + pos <- pos + 1 + } + + + } + + pts [seq_len (pos - 1),] +} + +##' @rdname spc-identify +##' @param wl the wavelength to label +##' @param spc the intensity to label +##' @param wlclick the clicked wavelength +##' @export +spc.point.max <- function (wl, spc, wlclick){ + i <- which.max (spc) + c (wl = wl [i], spc = spc [i]) +} + +##' @rdname spc-identify +##' @export +spc.point.default <- function (wl, spc, wlclick){ + i <- round (approx (wl, seq_along (wl), wlclick, rule = 2)$y) + c (wl = wl [], spc = spc [i]) +} + +##' @rdname spc-identify +##' @export +spc.point.min <- function (wl, spc, wlclick){ + i <- which.min (spc) + c (wl = wl [i], spc = spc [i]) +} + +##' @rdname spc-identify +##' @export +spc.point.sqr <- function (wl, spc, wlclick, delta = 1L){ + i <- which.max (spc) + + ## points (wl [i], spc [i]) + if (i > 1L && i < length (wl)) { + i <- i + (-delta : delta) + i <- i %in% seq_along (wl) # make sure the indices exist + + p <- outer (wl [i], 0 : 2, "^") # Vandermonde matrix + p <- qr.solve (p, spc [i]) + + i <- -p [2] / p [3] / 2 + + ## lines (wl, outer (wl, 0 : 2, "^") %*% p, col = "red") + c (wl = i, spc = sum (p * c(1, i, i^2))) + + } else { + + c (wl = wl [i], spc = spc [i]) + + } +} + +##' @param ispc if a selection of spectra was plotted, their indices can be +##' given in \code{ispc}. In this case \code{ispc [i]} is returned rather +##' than \code{i}. +##' @param digits how many digits of the wavelength should be displayed? +##' @rdname spc-identify +##' @export +spc.label.default <- function (ispc, wl, spc, digits = 3){ + sprintf(" %i, %s ", ispc, format (wl, digits = digits)) +} + +##' @rdname spc-identify +##' @export +spc.label.wlonly <- function (ispc, wl, spc, digits = 3){ + sprintf(" %s ", format (wl, digits = digits)) +} + + + + diff --git a/R/spc.loess.R b/R/spc.loess.R new file mode 100644 index 00000000..20d393fa --- /dev/null +++ b/R/spc.loess.R @@ -0,0 +1,59 @@ +##' loess smoothing interpolation for spectra +##' Spectra can be smoothed and interpolated on a new wavelength axis using +##' \code{\link[stats]{loess}}. +##' +##' Applying \code{\link[stats]{loess}} to each of the spectra, an interpolation onto a new +##' wavelength axis is performed. At the same time, the specta are smoothed in order to increase the +##' signal : noise ratio. See \code{\link[stats]{loess}} and \code{\link[stats]{loess.control}} on +##' the parameters that control the amount of smoothing. +##' +##' @param spc the \code{hyperSpec} object +##' @param newx wavelengh axis to interpolate on +##' @param enp.target,surface,... parameters for \code{\link[stats]{loess}} and +##' \code{\link[stats]{loess.control}}. +##' @return a new \code{hyperspec} object. +##' @rdname spc-loess +##' @export +##' @author C. Beleites +##' @seealso \code{\link[stats]{loess}}, \code{\link[stats]{loess.control}} +##' @keywords manip datagen +##' @examples +##' +##' plot (flu, col = "darkgray") +##' plot (spc.loess(flu, seq (420, 470, 5)), add = TRUE, col = "red") +##' +##' flu [[3, ]] <- NA_real_ +##' smooth <- spc.loess(flu, seq (420, 470, 5)) +##' smooth [[, ]] +##' plot (smooth, add = TRUE, col = "blue") +##' +spc.loess <- function (spc, newx, enp.target = nwl (spc) / 4, + surface = "direct", ...){ + + .loess <- function (y, x){ + if (all (is.na (y))) + NA + else + loess (y ~ x, enp.target = enp.target, surface = surface, ...) + } + + .predict <- function (loess, x){ + if (!is (loess, "loess") && is.na (loess)) + rep (NA_real_, length (x)) + else + predict (loess, x) + } + + chk.hy (spc) + validObject (spc) + + loess <- apply (t (spc[[]]), 2, .loess, spc@wavelength) + + spc@data$spc <- t (sapply (loess, .predict, newx)) + .wl(spc) <- newx + + if (any (is.na (spc@data$spc))) + warning ("NAs were generated. Probably newx was outside the spectral range covered by spc.") + + spc +} diff --git a/R/spc.rubberband.R b/R/spc.rubberband.R new file mode 100644 index 00000000..45b698b5 --- /dev/null +++ b/R/spc.rubberband.R @@ -0,0 +1,145 @@ +##' Rubberband baseline +##' +##' Baseline with support points determined from a convex hull of the spectrum. +##' +##' Use \code{debuglevel >= 1} to obtain debug plots, either directly via function argument or by setting hyperSpec's \code{debuglevel} option. +##' @title Rubberband baseline correction +##' @param spc hyperSpec object +##' @param ... further parameters handed to \code{\link[stats]{smooth.spline}} +##' @param upper logical indicating whether the lower or upper part of the hull should be used +##' @param noise noise level to be taken into account +##' @param spline logical indicating whether the baseline should be an interpolating spline through +##' the support points or piecewise linear. +##' @return hyperSpec object containing the baselines +##' @rdname spc-rubberband +##' @author Claudia Beleites +##' @seealso \code{\link[hyperSpec]{spc.fit.poly}}, \code{\link[hyperSpec]{spc.fit.poly.below}} +##' +##' \code{vignette ("baseline")} +##' +##' \code{\link[hyperSpec]{hy.setOptions}} +##' +##' @note This function is still experimental +##' @export +##' @examples +##' plot (paracetamol [,, 175 ~ 1800]) +##' bl <- spc.rubberband (paracetamol [,, 175 ~ 1800], noise = 300, df = 20) +##' plot (bl, add = TRUE, col = 2) +##' +##' plot (paracetamol [,, 175 ~ 1800] - bl) + +spc.rubberband <- function (spc, ..., upper = FALSE, noise = 0, spline = TRUE){ + spc <- orderwl (spc) + + if (upper) spc@data$spc <- -spc@data$spc + + spc@data$spc <- .rubberband (spc@wavelength, spc@data$spc, + noise = noise, spline = spline, ...) + + if (upper) spc@data$spc <- -spc@data$spc + + spc +} + +##' @importFrom grDevices chull +.rubberband <- function (x, y, noise, spline, ..., debuglevel = hy.getOption ("debuglevel")){ + for (s in seq_len (nrow (y))){ + use <- which (!is.na (y [s,])) + + pts <- chull (x [use], y [s,use]) + pts <- use [pts] + + if (debuglevel >= 1L){ + plot (x, y [s, ], type = "l") + points (x [pts], y [s, pts], pch = 1, col = matlab.dark.palette (length (pts))) + } + + ## `chull` returns points in cw order + ## => points between ncol (y) and 1 are lower part of hull + imax <- which.max (pts) - 1 + + ## if necessary, rotate pts so that ncol (y) is at position 1 + if (imax > 0L) + pts <- c (pts [- seq_len (imax)], pts [seq_len (imax)]) + + ## now keep only pts until column index 1 + pts <- pts [1 : which.min (pts)] + + ## check whether first and last point are minima, + ## if not remove them. + ## If they are minima, 2nd and 2nd last point do not appear in pts + ## last point: + if (pts [2] == pts [1] - 1) pts <- pts [-1] + + ## now sort ascending (anyways needed later on) + pts <- rev (pts) + + ## fist point: + if (pts [2] == pts [1] + 1) pts <- pts [-1] + + if (debuglevel >= 1L){ + points (x [pts], y [s, pts], pch = 19, col = matlab.dark.palette (length (pts)), cex = 0.7) + } + + tmp <- approx (x = x [pts], y = y [s, pts], xout= x, method="linear")$y + + if (spline){ + pts <- which (y [s,] <= tmp + noise) + + if (length (pts) > 3) + tmp <- predict (smooth.spline (x[pts], y[s, pts], ...)$fit, x, 0)$y + else + tmp <- spline (x [pts], y [s, pts], xout = x)$y + + } + + y [s, ] <- tmp + + } + + y +} + +.test (spc.rubberband) <- function (){ + context ("spc.rubberband") + + ## use data that yields fairly stable baseline solution + paracetamol <- paracetamol [,, 300 ~ 550] + + test_that("spectrum containing NA inside", { + tmp <- paracetamol + tmp [[,, 400]] <- NA + + coefs <- spc.rubberband (tmp) + expect_equal( + coefs [[,, !is.na (tmp)]], + spc.rubberband(paracetamol [,, !is.na (tmp)]) [[]] + ) + + ## bug was: all coefficients were silently 0 + expect_true (all (abs (coefs [[]]) > sqrt (.Machine$double.eps))) + }) + + test_that ("spectrum containing NA at first wavelength (issue #95)", { + tmp <- paracetamol + tmp [[,, 1, wl.index = TRUE]] <- NA + + coefs <- spc.rubberband (tmp) + expect_equal( + coefs [[,, !is.na (tmp)]], + spc.rubberband(paracetamol [,, !is.na (tmp)]) [[]] + ) + }) + + test_that ("spectrum containing NA at end", { + tmp <- paracetamol [1] + tmp [[,, nwl (paracetamol), wl.index = TRUE]] <- NA + + coefs <- spc.rubberband (tmp) + expect_equal( + coefs [[,, !is.na (tmp)]], + spc.rubberband(paracetamol [1,, !is.na (tmp)]) [[]] + ) + }) + +} \ No newline at end of file diff --git a/R/spc.spline.R b/R/spc.spline.R new file mode 100644 index 00000000..d44bbcda --- /dev/null +++ b/R/spc.spline.R @@ -0,0 +1,53 @@ +##' Smoothing splines +##' +##' Spectral smoothing by splines +##' @title Spectral smoothing by splines +##' @param spc hyperSpec object +##' @param newx wavelengh axis to interpolate on +##' @param ... further parameters handed to \code{\link[stats]{smooth.spline}} +##' @return hyperSpec object containing smoothed spectra +##' @rdname spc-spline +##' @author Claudia Beleites +##' @seealso \code{\link[hyperSpec]{spc.loess}} +##' +##' \code{\link[stats]{smooth.spline}} +##' @note This function is still experimental +##' @export +##' @examples +##' p <- paracetamol [,,2200 ~ max] +##' plot (p, col = "gray") +##' smooth <- spc.smooth.spline (p [,, c (2200 ~ 2400, 2500 ~ 2825, 3150 ~ max)], +##' wl (paracetamol [,, 2200 ~ max]), +##' df = 4, spar = 1) +##' plot (smooth, col = "red", add = TRUE) +##' +##' plot (p - smooth) +##' +spc.smooth.spline <- function (spc, newx = wl (spc), ...){ + + .spline <- function (x, y, newx){ + pts <- ! is.na (y) + fit <- smooth.spline (x [pts], y [pts], ...)$fit + predict (fit, newx, deriv = 0)$y + } + + spc <- orderwl (spc) #includes chk.hy and validObject + + newspc <- matrix (NA_real_, ncol = length (newx), nrow = nrow (spc)) + i <- rowSums (is.na (spc@data$spc)) < nwl (spc) + + newspc [i, ] <- t (apply (spc@data$spc [i,, drop = FALSE], 1, + .spline, x = spc@wavelength, newx = newx)) + + if (any (is.na (newspc [i, ]))) + warning ("NAs generated. Probably newx was outside the spectral range covered by spc.") + + spc@data$spc <- newspc + .wl(spc) <- newx + + validObject (spc) + + + spc +} + diff --git a/R/split.string.R b/R/split.string.R new file mode 100644 index 00000000..1c4f3d05 --- /dev/null +++ b/R/split.string.R @@ -0,0 +1,33 @@ +###----------------------------------------------------------------------------- +### +### split.string - split string at pattern +### +### + +split.string <- function (x, separator, trim.blank = TRUE, remove.empty = TRUE) { + pos <- gregexpr (separator, x) + if (length (pos) == 1 && pos [[1]] == -1) + return (x) + + pos <- pos [[1]] + + pos <- matrix (c (1, pos + attr (pos, "match.length"), + pos - 1, nchar (x)), + ncol = 2) + + if (pos [nrow (pos), 1] > nchar (x)) + pos <- pos [- nrow (pos), ] + + x <- apply (pos, 1, function (p, x) substr (x, p [1], p [2]), x) + + if (trim.blank){ + blank.pattern <- "^[[:blank:]]*([^[:blank:]]+.*[^[:blank:]]+)[[:blank:]]*$" + x <- sub (blank.pattern, "\\1", x) + } + + if (remove.empty){ + x <- x [sapply (x, nchar) > 0] + } + + x +} diff --git a/R/splitdots.R b/R/splitdots.R new file mode 100644 index 00000000..6905b526 --- /dev/null +++ b/R/splitdots.R @@ -0,0 +1,35 @@ +## experimental splitting of dots arguments into arg lists for different functions. +## + +##' @noRd +.split.dots <- function (dots, functions, drop = TRUE){ + fun.names <- paste ("^", names (functions), "[.]", sep = "") + dot.names <- names (dots) + + ## sort args to functions according to functionname.argumentname + args <- lapply (fun.names, grep, dot.names) + nomatch <- setdiff (seq_along (dots), unlist (args)) + + ## for now: + if (length (nomatch) > 0) + stop ("unmatched arguments: ", + paste (dot.names [nomatch], dots [nomatch], sep = " = ", collapse =", ") + ) + + args <- lapply (args, function (args, dots) dots [args], dots) + names (args) <- names (functions) + + ## drop the function indicating part of the argument names + args <- mapply (function (args, fname) { + names (args) <- gsub (fname, "", names (args)) + args + }, + args, fun.names) + if (drop) + args [sapply (args, length) > 0] + else + args +} + +## TODO: tests + diff --git a/R/trellis.factor.key.R b/R/trellis.factor.key.R new file mode 100644 index 00000000..8dd909d3 --- /dev/null +++ b/R/trellis.factor.key.R @@ -0,0 +1,50 @@ +##' Color coding legend for factors +##' Modifies a list of lattice arguments (as for \code{\link[lattice]{levelplot}}, etc.) according to +##' the factor levels. The colorkey will shows all levels (including unused), and the drawing colors +##' will be set accordingly. +##' +##' \code{trellis.factor.key} is used during \code{levelplot}-based plotting of factors (for +##' hyperSpec objects) unless \code{transform.factor = FALSE} is specified. +##' +##' @param f the factor that will be color-coded +##' @param levelplot.args a list with levelplot arguments +##' @return the modified list with levelplot arguments. +##' @author C. Beleites +##' @seealso \code{\link[lattice]{levelplot}} +##' @keywords aplot +##' @export +##' @importFrom lattice level.colors +##' @examples +##' +##' chondro$z <- factor (rep (c("a", "a", "d", "c"), +##' length.out = nrow (chondro)), +##' levels = letters [1 : 4]) +##' +##' str (trellis.factor.key (chondro$z)) +##' +##' plotmap (chondro, z ~ x * y) +##' +##' ## switch off using trellis.factor.key: +##' ## note that the factor levels are collapsed to c(1, 2, 3) rather than +##' ## c (1, 3, 4) +##' plotmap (chondro, z ~ x * y, transform.factor = FALSE) +##' +##' plotmap (chondro, z ~ x * y, +##' col.regions = c ("gray", "red", "blue", "dark green")) +##' +##' @importFrom utils modifyList +trellis.factor.key <- function (f, levelplot.args = list ()) { + at <- seq (0, nlevels (f)) + .5 + + if (is.null (levelplot.args$col.regions)) + cols <- level.colors (seq_along (levels (f)), at) + else + cols <- level.colors (seq_along (levels (f)), at, levelplot.args$col.regions) + + modifyList (list (at = at, + col.regions = cols, + colorkey = list (lab = list (at = seq_along (levels (f)), + lab = levels (f)))), + levelplot.args) + +} diff --git a/R/units.R b/R/units.R new file mode 100644 index 00000000..db1494ce --- /dev/null +++ b/R/units.R @@ -0,0 +1,3 @@ +.wn.shift <- expression (Delta*tilde(nu) / cm^-1) +.wn <- expression (tilde(nu) / cm^-1) +.wl <- expression (lambda / nm) diff --git a/R/unittest.R b/R/unittest.R new file mode 100644 index 00000000..f278a359 --- /dev/null +++ b/R/unittest.R @@ -0,0 +1,57 @@ +##' hyperSpec unit tests +##' +##' If \code{\link[testthat]{testthat}} is available, run the unit tests and +##' display the results. +##' +##' @param standalone run the unit test on their own, e.g. from the console +##' (`TRUE`) or within testthat tests (`FALSE`), e.g. via `devtools::test()` +##' @param reporter the reporter to use, defaults to [testthat::ProgressReporter] +##' +##' @rdname unittests +##' @return Invisibly returns a data frame with the test results +##' +##' @author Claudia Beleites +##' +##' @keywords programming utilities +##' @import testthat +##' @export +##' @examples +##' +##' \donttest{hy.unittest ()} +##' +hy.unittest <- function (standalone = TRUE, reporter = "progress"){ + if (!requireNamespace("testthat", quietly=TRUE)) { + warning("testthat required to run the unit tests.") + return(NA) + } + if (! "package:testthat" %in% search ()) + attachNamespace("testthat") + + tests <- eapply(env = getNamespace ("hyperSpec"), FUN = get.test, all.names=TRUE) + tests <- tests [! sapply (tests, is.null)] + + if (standalone) { + with_reporter(reporter = reporter, start_end_reporter = TRUE, + for (t in tests) t()) + } else { + for (t in tests) t() + } +} + +##' @noRd +{ + `.test<-` <- function (f, value) { + attr (f, "test") <- value + f + } + + skip_if_not_fileio_available <- function () { + skip_if_not (file.exists("fileio"), message = "file import test files not installed") + } +} + +##' get test that is attached to object as "test" attribute +##' @noRd +get.test <- function (object) + attr (object, "test") + diff --git a/R/validate.R b/R/validate.R new file mode 100644 index 00000000..934a51f6 --- /dev/null +++ b/R/validate.R @@ -0,0 +1,11 @@ +.validate <- function (object) { + ncol <- ncol (object@data$spc) + + if (is.null (ncol)) + ncol <- 0 + + if (length (object@wavelength) != ncol) + return ("Length of wavelength vector differs from number of data points per spectrum.") + + TRUE + } diff --git a/R/wc.R b/R/wc.R new file mode 100644 index 00000000..3e6ff737 --- /dev/null +++ b/R/wc.R @@ -0,0 +1,11 @@ +##' line/word/character count of ASCII files +##' +##' `wc()` is defunct and will be removed from hyperSpec in future. Consider using [count_lines()] instead for line counting. +##' @seealso [count_lines()] +##' @export +##' @author C. Beleites +wc <- function (){ + .Defunct(new = "count_lines", + msg = "wc() is now defunct and has been removed.") +} + diff --git a/R/wleval.R b/R/wleval.R new file mode 100644 index 00000000..869fc66c --- /dev/null +++ b/R/wleval.R @@ -0,0 +1,75 @@ +##' Evaluate function on wavelengths of hyperSpec object +##' +##' This is useful for generating certain types of baseline "reference spectra". +##' +##' @param x hyperSpec object +##' @param ... hyperSpec method: expressions to be evaluated +##' @param normalize.wl function to transorm the wavelengths before evaluating the polynomial (or +##' other function). Use \code{\link[hyperSpec]{normalize01}} to map the wavelength range to the interval [0, 1]. +##' @return hyperSpec object containing one spectrum for each expression +##' @export +##' @seealso \code{\link[hyperSpec]{vanderMonde}} for polynomials, +##' +##' \code{\link[hyperSpec]{normalize01}} to normalize the wavenumbers before evaluating the function +##' @author C. Beleites +##' @examples +##' plot (wl.eval (laser, exp = function (x) exp (-x))) +wl.eval <- function (x, ..., normalize.wl = I){ + chk.hy (x) + validObject (x) + + fun <- list (...) + + wl <- normalize.wl (x@wavelength) + + x <- decomposition (x, t (sapply (fun, function (f) f (wl))), scores = FALSE) + x$.f <- if (is.null (names (fun))) + rep (NA, length (fun)) + else + names (fun) + x +} + +##' @include unittest.R +.test (wl.eval) <- function (){ + context ("wl.eval") + + test_that("error on function not returning same length as input", { + expect_error (wl.eval (flu, function (x) 1)) + }) + + test_that("wl.eval against manual evaluation", { + expect_equivalent (wl.eval (flu, function (x) rep (5, length (x)), normalize.wl = normalize01) [[]], + matrix (rep (5, nwl (flu)), nrow = 1)) + + expect_equivalent (wl.eval (flu, function (x) x), + vanderMonde(flu, 1)[2]) + + expect_equivalent (wl.eval (flu, function (x) exp (-x)) [[]], + matrix (exp (-flu@wavelength), nrow = 1)) + }) + + test_that("normalization", { + expect_equivalent (wl.eval (flu, function (x) rep (5, length (x)), normalize.wl = normalize01) [[]], + matrix (rep (5, nwl (flu)), nrow = 1)) + + expect_equivalent (wl.eval (flu, function (x) x, normalize.wl = normalize01) [[]], + matrix (seq (0, 1, length.out = nwl (flu)), nrow = 1)) + + expect_equivalent (wl.eval (flu, function (x) exp (x), normalize.wl = normalize01) [[]], + matrix (exp (seq (0, 1, length.out = nwl (flu))), nrow = 1)) + }) + + + test_that("multiple functions", { + expect_equivalent (wl.eval (flu, function (x) rep (1, length (x)), function (x) x), + vanderMonde(flu, 1)) + + }) + + test_that("function names", { + tmp <- wl.eval (flu, f = function (x) x, g = function (x) exp (-x)) + + expect_equal(tmp$.f, c ("f", "g")) + }) +} diff --git a/R/write.txt.long.R b/R/write.txt.long.R new file mode 100644 index 00000000..3cee20ae --- /dev/null +++ b/R/write.txt.long.R @@ -0,0 +1,99 @@ +###----------------------------------------------------------------------------- +### +### write.txt.long +### +### + +##' @param object the \code{hyperSpec} object +##' @param order which columns should be \code{\link[base]{order}}ed? +##' \code{order} is used as index vector into a \code{data.frame} with +##' columns given by \code{cols}. +##' @param na.last handed to \code{\link[base]{order}} by +##' \code{write.txt.long}. +##' @param quote,sep,col.names,row.names have their usual meaning (see +##' \code{\link[utils]{read.table}} and \code{\link[utils]{write.table}}), +##' but different default values. +##' +##' For file import, \code{row.names} should usually be \code{NULL} so that the +##' first column becomes a extra data column (as opposed to row names of the extra data). +##' @param col.labels Should the column labels be used rather than the +##' colnames? +##' @param append Should the output be appended to an existing file? +##' @aliases write.txt.long +##' @rdname textio +##' @export +##' @importFrom utils write.table +write.txt.long <- function (object, + file = "", + order = c (".rownames", ".wavelength"), + na.last = TRUE, decreasing = FALSE, + quote = FALSE, sep = "\t", + row.names = FALSE, + cols = NULL, + col.names = TRUE, + col.labels = FALSE, # use labels instead of column names? + append = FALSE, + ...){ + validObject (object) + + col.spc <- match ("spc", colnames (object@data)) + + X <- as.long.df (object, rownames = TRUE) + + if (!is.null (order)){ + if (is.character (order)) { + tmp <- match (order, colnames (X)) + if (any (is.na (tmp))) + stop ("write.txt.long: no such columns: ", + paste (order [is.na (tmp)], collapse = ", ")) + order <- tmp + } + + + if (length (decreasing) < length (order)) + decreasing <- rep (decreasing, length.out = length (order)) + + order.data <- as.list (X [, order, drop = FALSE]) + + for (i in seq_along (order)){ + if (is.factor(order.data [[i]])) + order.data [[i]] <- rank (order.data [[i]], na.last = na.last | is.na (na.last)) + + if (decreasing [i]) + order.data [[i]] <- - order.data [[i]] + } + + X <- X[do.call ("order", + c (order.data, na.last = na.last | is.na (na.last), decreasing = FALSE) + ), ] + } + + if (is.na (na.last)) + X <- X[! is.na (X$spc), ] + + if (!is.null (cols)) + X <- X [, cols, drop = FALSE] + + if (!row.names) + X$.rownames <- NULL + else + cln [match (".rownames", cln)] <- "row" + + if (col.names){ + if (col.labels){ + cln <- match (colnames (X), names (object@label)) + cln[!is.na (cln)] <- object@label [cln[!is.na(cln)]] + cln[is.na (cln)] <- colnames (X) [is.na(cln)] + cln <- sapply (cln, as.character) + } else { + cln <- colnames (X) + } + + write.table (matrix (cln, nrow = 1), file = file, append = append, + quote = quote, sep = sep, row.names = FALSE, col.names = FALSE) + append <- TRUE + } + + write.table (X, file, append = append, quote = quote, sep = sep, + row.names = FALSE, col.names = FALSE, ...) +} diff --git a/R/write.txt.wide.R b/R/write.txt.wide.R new file mode 100644 index 00000000..53577c39 --- /dev/null +++ b/R/write.txt.wide.R @@ -0,0 +1,88 @@ +###----------------------------------------------------------------------------- +### +### write.txt.wide +### +### +##' @param header.lines Toggle one or two line header (wavelengths in the +##' second header line) for \code{write.txt.wide} +##' @aliases write.txt.wide +##' @rdname textio +##' @export +##' @importFrom utils write.table +##' + +write.txt.wide <- function (object, + file = "", + cols = NULL, + quote = FALSE, sep = "\t", + row.names = FALSE, + col.names = TRUE, + header.lines = 1, # 1 or 2 line header? + # use labels instead of column names? + col.labels = if (header.lines == 1) FALSE else TRUE, + append = FALSE, + ...){ + validObject (object) + + if (! is.null (cols)) + object <- object [, cols] + + if (col.names){ + col.spc <- match ("spc", colnames (object@data)) + + if (col.labels){ + cln <- match (colnames (object@data), names (object@label)) + cln[!is.na (cln)] <- object@label [cln[!is.na(cln)]] + cln[is.na (cln)] <- colnames (object@data) [is.na(cln)] + cln <- sapply (cln, as.character) + #cln [-col.spc] <- object@label [] + } else { + cln <- colnames (object@data) + } + + i <- seq_along (cln) + + if (header.lines == 1){ + write.table (matrix (c(if (row.names) "" else NULL, + cln [i < col.spc], + object@wavelength, + cln [i > col.spc] + ), nrow = 1), + file = file, append = append, quote = quote, sep = sep, + row.names = FALSE, col.names = FALSE) + append = TRUE + } else if (header.lines == 2) { + ## 1st line + write.table (matrix (c ( + if (row.names) "" else NULL, + cln [i < col.spc], + if (col.labels) cln [col.spc] else "", + rep ("", length (object@wavelength) - 1), + cln [i > col.spc]), nrow = 1), + file = file, append = append, quote = quote, sep = sep, + row.names = FALSE, col.names = FALSE) + append = TRUE + ## 2nd line + write.table (matrix (c (if (row.names) (if (col.labels) as.character (object@label$.wavelength) + else "wavelength") + else NULL, + rep ("", sum (i < col.spc)), + object@wavelength, + rep ("", sum (i > col.spc)) + ), nrow = 1), + file = file, append = append, quote, sep, + row.names = FALSE, col.names = FALSE) + + } else { + stop ("Only 1 or 2 line headers supported.") + } + + } + + # no AsIs columns! + for (c in which (sapply (object@data, class) == "AsIs")) + class (object@data [[c]]) <- NULL + + write.table (object@data, file = file, append = append, quote = quote, sep = sep, + row.names = row.names, col.names = FALSE, ...) +} diff --git a/R/y-pastenames.R b/R/y-pastenames.R new file mode 100644 index 00000000..1eb08647 --- /dev/null +++ b/R/y-pastenames.R @@ -0,0 +1,17 @@ +.pastenames <- function (...){ + if (nargs () == 1L & is.list (..1)) + dots <- ..1 + else + dots <- list (...) + + names <- names (dots) + names <- sapply (names, + function (x){ + if (nchar (x) > 0L) + sprintf ("%s = ", x) + else + "" + }) + + paste (names, dots, collapse = ", ", sep = "") +} diff --git a/tests/testthat/_snaps/attached/plot-voronoi.svg b/tests/testthat/_snaps/attached/plot-voronoi.svg new file mode 100644 index 00000000..e979774f --- /dev/null +++ b/tests/testthat/_snaps/attached/plot-voronoi.svg @@ -0,0 +1,185 @@ + + + + + + + + + + + + + +x, px +y, px + + + + + + + + + + + + + + + + + + + + + + + +1 +2 +3 +4 +5 +6 +7 + + + + + + + +1 +2 +3 +4 +5 +6 +7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +2.4 +2.5 +2.6 +2.7 +2.8 +2.9 +3.0 +3.1 +3.2 + + diff --git a/tests/testthat/test-plotspc.r b/tests/testthat/test-plotspc.r new file mode 100644 index 00000000..b3f09968 --- /dev/null +++ b/tests/testthat/test-plotspc.r @@ -0,0 +1,10 @@ +context ("plotspc") + +test_that("BARBITURATES", { + spc <- do.call (collapse, barbiturates [1:3]) + + plotspc (spc, col = matlab.dark.palette (3), stacked = TRUE, lines.args = list (type = "h")) + +}) + + diff --git a/tests/unittests.R b/tests/unittests.R new file mode 100644 index 00000000..4485df79 --- /dev/null +++ b/tests/unittests.R @@ -0,0 +1,3 @@ +library (testthat) +test_check("hyperSpec") +