Merge pull request #191 from isoverse/dev

update to 1.4.1
isoverse · Jul 31, 2023 · 66bc66c · 66bc66c
2 parents d5c5274 + e7bdd07
commit 66bc66c
Show file tree

Hide file tree

Showing 6 changed files with 92 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: isoreader
 Title: Read Stable Isotope Data Files
 Description: Interface to the raw data file formats commonly encountered in scientific disciplines that make use of stable isotopes.
-Version: 1.4.0
+Version: 1.4.1
 Authors@R: 
     c(person(
         given = "Sebastian", family = "Kopf", 

diff --git a/R/isoread_isodat.R b/R/isoread_isodat.R
@@ -499,6 +499,9 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU
   if (nrow(extracted_dt$cell_values) == 0L) {
     stop("could not find any vendor table data", call. = FALSE)
   }
+
+  # propagated newly registered problems
+  ds <- ds |> set_problems(combined_problems(ds, extracted_dt))
 
   # store vendor data table
   data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check = TRUE), by = "Nr.")
@@ -727,24 +730,40 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun =
     ds$source <- cap_at_fun(ds$source)
   }
 
-  columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)
-
-  # safety check: to make sure all columns have the same format specification
-  if (!all(ok <- columns$n_formats == 1)) {
-    formats <- map_chr(columns$data[!ok], ~collapse(unique(.x$format), ", "))
-    problems <- glue("column {columns$column[!ok]} has multiple formats '{formats}'")
+  # start output
+  output <- list()
+
+  # add columns
+  output$columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)
+
+  # safety check: to make sure all output$columns have the same format specification
+  if (!all(ok <- output$columns$n_types == 1)) {
+    formats <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$format), ", "))
+    problems <- glue("column {output$columns$column[!ok]} has multiple formats '{formats}'")
     iso_source_file_op_error(ds$source, glue("mismatched data column formats:\n{collapse(problems, '\n')}"))
   }
 
+  # safety check: warn if different precisions
+  if (!all(ok <- output$columns$n_precisions == 1)) {
+    precisions <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$precision), ", "))
+    problems <- glue("column {output$columns$column[!ok]} has multiple precisions '{precisions}'")
+    output <- register_warning(
+      output, 
+      details = glue("mismatched data column formats:\n{collapse(problems, '\n')}"),
+      func = "extract_isodat_main_vendor_data_table"
+    )
+  }
+
   # safety check: to make sure all formats are resolved
-  if (!all(ok <- !is.na(columns$type))) {
-    problems <- glue("column {columns$column[!ok]} has unknown format '{columns$column_format[!ok]}'")
+  if (!all(ok <- !is.na(output$columns$column_type))) {
+    problems <- glue("column {output$columns$column[!ok]} has unknown format '{output$columns$column_format[!ok]}'")
     iso_source_file_op_error(ds$source, glue("unknown column formats:\n{collapse(problems, '\n')}"))
   }
 
-  cell_values <- extract_isodat_main_vendor_data_table_values(ds, columns)
+  # finish output with cell values
+  output$cell_values <- extract_isodat_main_vendor_data_table_values(ds, output$columns)
 
-  return(list(columns = columns, cell_values = cell_values))
+  return(output)
 }
 
 # extract the main (recurring) portion of the vendor data table
@@ -804,32 +823,45 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$source$po
     dplyr::mutate(row = cumsum(.data$column == .data$column[1])) |>
     # remove duplicates
     dplyr::group_by(.data$column, .data$row) |>
-    dplyr::summarize(
-      group = .data$group[1],
-      continue_pos = .data$continue_pos[1],
-      id = .data$id[1],
-      format = .data$format[1],
-      `gas_config?` = .data$`gas_config?`[1],
-      units = .data$units[1],
-      ref_frame = .data$units[1],
-      .groups = "drop"
-    ) |>
+    dplyr::filter(dplyr::row_number() == 1) |>
+    dplyr::ungroup() |>
+    # dplyr::summarize(
+    #   group = .data$group[1],
+    #   continue_pos = .data$continue_pos[1],
+    #   id = .data$id[1],
+    #   format = .data$format[1],
+    #   `gas_config?` = .data$`gas_config?`[1],
+    #   units = .data$units[1],
+    #   ref_frame = .data$units[1],
+    #   .groups = "drop"
+    # ) |>
     dplyr::arrange(.data$group) |>
-    # nest by column and expand column details
-    tidyr::nest(data = c(-"column")) |>
+    # parse column format
     dplyr::mutate(
-      n_formats = purrr::map_int(.data$data, ~length(unique(.x$format))),
-      column_format = purrr::map_chr(.data$data, ~.x$format[1]),
-      column_units = purrr::map_chr(.data$data, ~.x$units[1]),
       type =
         dplyr::case_when(
-          .data$column_format == "%s" ~ "text",
-          .data$column_format %in% c("%u", "%d") ~ "integer",
-          str_detect(.data$column_format, "\\%[0-9.]*f") ~ "double",
+          .data$format == "%s" ~ "text",
+          .data$format %in% c("%u", "%d") ~ "integer",
+          str_detect(.data$format, "\\%[0-9.]*f") ~ "double",
           TRUE ~ NA_character_
-        )
+        ),
+      precision = dplyr::if_else(
+        type == "double", 
+        stringr::str_extract(.data$format, "(?<=\\.)\\d*(?=f)"),
+        NA_character_
+      )
+    ) |>
+    # nest by column and expand column details
+    tidyr::nest(data = c(-"column")) |>
+    dplyr::mutate(
+      n_types = purrr::map_int(.data$data, ~length(unique(.x$type))),
+      n_precisions = purrr::map_int(.data$data, ~length(unique(.x$precision))),
+      line1 = purrr::map(.data$data, ~.x[1,c("format", "units", "type", "precision")])
     ) |>
+    tidyr::unnest(line1) |>
     # naming adjustments
+    dplyr::rename("column_format" = "format", "column_units" = "units", 
+                  "column_type" = "type", "column_precision" = "precision") |>
     dplyr::mutate(
       # avoid issues with delta symbol on different OS
       column = stringr::str_replace(.data$column, fixed("\U03B4"), "d"),
@@ -883,7 +915,7 @@ extract_isodat_main_vendor_data_table_values <- function(ds, columns) {
 
   # get cell values
   columns |>
-    filter(!is.na(type)) |>
+    filter(!is.na(.data$column_type)) |>
     unnest("data") |>
     select("column", "continue_pos", "type", "row") |>
     nest(data = c(-row)) |>

diff --git a/R/isoread_nu.R b/R/isoread_nu.R
@@ -220,7 +220,6 @@ process_nu_parser <- function(ds, parser, options = list()) {
   if (length(matches) > 0) {
     header <- ds$source$header[matches]
     data <- ds$source$data[matches]
-    #if (default(debug)) nu_data <<- data
     value <- rlang::eval_tidy(rlang::get_expr(parser$parse_quo))
     if (n_problems(value) > 0) {
       ds <- set_problems(ds, combined_problems(ds, value))

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -7,7 +7,7 @@ template:
     ganalytics: UA-106138361-3
 
 development:
-  mode: devel
+  mode: auto
 
 home:
   strip_header: true    

diff --git a/tests/testthat/test-continuous-flow.R b/tests/testthat/test-continuous-flow.R
@@ -154,11 +154,14 @@ test_that("test that additional continous flow files can be read", {
   iso_turn_reader_caching_off()
 
   # testing wrapper
-  check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL) {
+  check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL, n_problems = 0) {
     file_path <- get_isoreader_test_file(file, local_folder = test_folder)
     expect_true(file.exists(file_path))
-    expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
-    expect_equal(nrow(problems(file)), 0)
+    if (n_problems > 0)
+      expect_warning(file <- iso_read_continuous_flow(file_path, read_cache = FALSE))
+    else
+      expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
+    expect_equal(nrow(problems(file)), n_problems)
     expect_equal(nrow(file$file_info), 1)
     if (!is.null(file_info_cols)) 
       expect_equal(names(file$file_info), file_info_cols)
@@ -190,6 +193,28 @@ test_that("test that additional continous flow files can be read", {
     )
   )
 
+  dxf2 <- check_continuous_flow_test_file(
+    "dxf_example_H_02.dxf",
+    c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", 
+      "file_size", "Row", "Peak Center", "Check Ref. Dilution", "H3 Stability", 
+      "H3 Factor", "Conditioning", "Seed Oxidation", "GC Method", "AS Sample", 
+      "AS Method", "Identifier 1", "Identifier 2", "Analysis", "Preparation", "Method", 
+      "measurement_info", "MS_integration_time.s"),
+    53,
+    c(Nr. = NA, Start = "s", Rt = "s", End = "s", `Ampl 2` = "mV",
+      `Ampl 3` = "mV", `BGD 2` = "mV", `BGD 3` = "mV", `rIntensity 2` = "mVs",
+      `rIntensity 3` = "mVs", `rIntensity All` = "mVs", `Intensity 2` = "Vs",
+      `Intensity 3` = "Vs", `Intensity All` = "Vs", `Sample Dilution` = "%",
+      `List First Peak` = NA, `rR 3H2/2H2` = NA, `Is Ref.?` = NA, `R 3H2/2H2` = NA,
+      `Ref. Name` = NA, `rd 3H2/2H2` = "permil", `d 3H2/2H2` = "permil",
+      `R 2H/1H` = NA, `d 2H/1H` = "permil", `AT% 2H/1H` = "%", `Rps 3H2/2H2` = NA,
+      `Master Peak` = NA, `DeltaDelta 3H2/2H2` = "permil"
+    ),
+    n_problems = 1L
+  )
+  expect_equal(problems(dxf2)$type, "warning")
+  expect_true(stringr::str_detect(problems(dxf2)$details, "has multiple precisions"))
+
   check_continuous_flow_test_file(
     "dxf_example_HO_01.dxf",
     c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", 

diff --git a/tests/testthat/test_data/dxf_example_H_02.dxf b/tests/testthat/test_data/dxf_example_H_02.dxf