Skip to content

Commit

Permalink
Merge pull request #191 from isoverse/dev
Browse files Browse the repository at this point in the history
update to 1.4.1
  • Loading branch information
sebkopf committed Jul 31, 2023
2 parents d5c5274 + e7bdd07 commit 66bc66c
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 36 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,7 +1,7 @@
Package: isoreader
Title: Read Stable Isotope Data Files
Description: Interface to the raw data file formats commonly encountered in scientific disciplines that make use of stable isotopes.
Version: 1.4.0
Version: 1.4.1
Authors@R:
c(person(
given = "Sebastian", family = "Kopf",
Expand Down
92 changes: 62 additions & 30 deletions R/isoread_isodat.R
Expand Up @@ -499,6 +499,9 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU
if (nrow(extracted_dt$cell_values) == 0L) {
stop("could not find any vendor table data", call. = FALSE)
}

# propagated newly registered problems
ds <- ds |> set_problems(combined_problems(ds, extracted_dt))

# store vendor data table
data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check = TRUE), by = "Nr.")
Expand Down Expand Up @@ -727,24 +730,40 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun =
ds$source <- cap_at_fun(ds$source)
}

columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)

# safety check: to make sure all columns have the same format specification
if (!all(ok <- columns$n_formats == 1)) {
formats <- map_chr(columns$data[!ok], ~collapse(unique(.x$format), ", "))
problems <- glue("column {columns$column[!ok]} has multiple formats '{formats}'")
# start output
output <- list()

# add columns
output$columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)

# safety check: to make sure all output$columns have the same format specification
if (!all(ok <- output$columns$n_types == 1)) {
formats <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$format), ", "))
problems <- glue("column {output$columns$column[!ok]} has multiple formats '{formats}'")
iso_source_file_op_error(ds$source, glue("mismatched data column formats:\n{collapse(problems, '\n')}"))
}

# safety check: warn if different precisions
if (!all(ok <- output$columns$n_precisions == 1)) {
precisions <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$precision), ", "))
problems <- glue("column {output$columns$column[!ok]} has multiple precisions '{precisions}'")
output <- register_warning(
output,
details = glue("mismatched data column formats:\n{collapse(problems, '\n')}"),
func = "extract_isodat_main_vendor_data_table"
)
}

# safety check: to make sure all formats are resolved
if (!all(ok <- !is.na(columns$type))) {
problems <- glue("column {columns$column[!ok]} has unknown format '{columns$column_format[!ok]}'")
if (!all(ok <- !is.na(output$columns$column_type))) {
problems <- glue("column {output$columns$column[!ok]} has unknown format '{output$columns$column_format[!ok]}'")
iso_source_file_op_error(ds$source, glue("unknown column formats:\n{collapse(problems, '\n')}"))
}

cell_values <- extract_isodat_main_vendor_data_table_values(ds, columns)
# finish output with cell values
output$cell_values <- extract_isodat_main_vendor_data_table_values(ds, output$columns)

return(list(columns = columns, cell_values = cell_values))
return(output)
}

# extract the main (recurring) portion of the vendor data table
Expand Down Expand Up @@ -804,32 +823,45 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$source$po
dplyr::mutate(row = cumsum(.data$column == .data$column[1])) |>
# remove duplicates
dplyr::group_by(.data$column, .data$row) |>
dplyr::summarize(
group = .data$group[1],
continue_pos = .data$continue_pos[1],
id = .data$id[1],
format = .data$format[1],
`gas_config?` = .data$`gas_config?`[1],
units = .data$units[1],
ref_frame = .data$units[1],
.groups = "drop"
) |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::ungroup() |>
# dplyr::summarize(
# group = .data$group[1],
# continue_pos = .data$continue_pos[1],
# id = .data$id[1],
# format = .data$format[1],
# `gas_config?` = .data$`gas_config?`[1],
# units = .data$units[1],
# ref_frame = .data$units[1],
# .groups = "drop"
# ) |>
dplyr::arrange(.data$group) |>
# nest by column and expand column details
tidyr::nest(data = c(-"column")) |>
# parse column format
dplyr::mutate(
n_formats = purrr::map_int(.data$data, ~length(unique(.x$format))),
column_format = purrr::map_chr(.data$data, ~.x$format[1]),
column_units = purrr::map_chr(.data$data, ~.x$units[1]),
type =
dplyr::case_when(
.data$column_format == "%s" ~ "text",
.data$column_format %in% c("%u", "%d") ~ "integer",
str_detect(.data$column_format, "\\%[0-9.]*f") ~ "double",
.data$format == "%s" ~ "text",
.data$format %in% c("%u", "%d") ~ "integer",
str_detect(.data$format, "\\%[0-9.]*f") ~ "double",
TRUE ~ NA_character_
)
),
precision = dplyr::if_else(
type == "double",
stringr::str_extract(.data$format, "(?<=\\.)\\d*(?=f)"),
NA_character_
)
) |>
# nest by column and expand column details
tidyr::nest(data = c(-"column")) |>
dplyr::mutate(
n_types = purrr::map_int(.data$data, ~length(unique(.x$type))),
n_precisions = purrr::map_int(.data$data, ~length(unique(.x$precision))),
line1 = purrr::map(.data$data, ~.x[1,c("format", "units", "type", "precision")])
) |>
tidyr::unnest(line1) |>
# naming adjustments
dplyr::rename("column_format" = "format", "column_units" = "units",
"column_type" = "type", "column_precision" = "precision") |>
dplyr::mutate(
# avoid issues with delta symbol on different OS
column = stringr::str_replace(.data$column, fixed("\U03B4"), "d"),
Expand Down Expand Up @@ -883,7 +915,7 @@ extract_isodat_main_vendor_data_table_values <- function(ds, columns) {

# get cell values
columns |>
filter(!is.na(type)) |>
filter(!is.na(.data$column_type)) |>
unnest("data") |>
select("column", "continue_pos", "type", "row") |>
nest(data = c(-row)) |>
Expand Down
1 change: 0 additions & 1 deletion R/isoread_nu.R
Expand Up @@ -220,7 +220,6 @@ process_nu_parser <- function(ds, parser, options = list()) {
if (length(matches) > 0) {
header <- ds$source$header[matches]
data <- ds$source$data[matches]
#if (default(debug)) nu_data <<- data
value <- rlang::eval_tidy(rlang::get_expr(parser$parse_quo))
if (n_problems(value) > 0) {
ds <- set_problems(ds, combined_problems(ds, value))
Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
Expand Up @@ -7,7 +7,7 @@ template:
ganalytics: UA-106138361-3

development:
mode: devel
mode: auto

home:
strip_header: true
Expand Down
31 changes: 28 additions & 3 deletions tests/testthat/test-continuous-flow.R
Expand Up @@ -154,11 +154,14 @@ test_that("test that additional continous flow files can be read", {
iso_turn_reader_caching_off()

# testing wrapper
check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL) {
check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL, n_problems = 0) {
file_path <- get_isoreader_test_file(file, local_folder = test_folder)
expect_true(file.exists(file_path))
expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
expect_equal(nrow(problems(file)), 0)
if (n_problems > 0)
expect_warning(file <- iso_read_continuous_flow(file_path, read_cache = FALSE))
else
expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
expect_equal(nrow(problems(file)), n_problems)
expect_equal(nrow(file$file_info), 1)
if (!is.null(file_info_cols))
expect_equal(names(file$file_info), file_info_cols)
Expand Down Expand Up @@ -190,6 +193,28 @@ test_that("test that additional continous flow files can be read", {
)
)

dxf2 <- check_continuous_flow_test_file(
"dxf_example_H_02.dxf",
c("file_id", "file_root", "file_path", "file_subpath", "file_datetime",
"file_size", "Row", "Peak Center", "Check Ref. Dilution", "H3 Stability",
"H3 Factor", "Conditioning", "Seed Oxidation", "GC Method", "AS Sample",
"AS Method", "Identifier 1", "Identifier 2", "Analysis", "Preparation", "Method",
"measurement_info", "MS_integration_time.s"),
53,
c(Nr. = NA, Start = "s", Rt = "s", End = "s", `Ampl 2` = "mV",
`Ampl 3` = "mV", `BGD 2` = "mV", `BGD 3` = "mV", `rIntensity 2` = "mVs",
`rIntensity 3` = "mVs", `rIntensity All` = "mVs", `Intensity 2` = "Vs",
`Intensity 3` = "Vs", `Intensity All` = "Vs", `Sample Dilution` = "%",
`List First Peak` = NA, `rR 3H2/2H2` = NA, `Is Ref.?` = NA, `R 3H2/2H2` = NA,
`Ref. Name` = NA, `rd 3H2/2H2` = "permil", `d 3H2/2H2` = "permil",
`R 2H/1H` = NA, `d 2H/1H` = "permil", `AT% 2H/1H` = "%", `Rps 3H2/2H2` = NA,
`Master Peak` = NA, `DeltaDelta 3H2/2H2` = "permil"
),
n_problems = 1L
)
expect_equal(problems(dxf2)$type, "warning")
expect_true(stringr::str_detect(problems(dxf2)$details, "has multiple precisions"))

check_continuous_flow_test_file(
"dxf_example_HO_01.dxf",
c("file_id", "file_root", "file_path", "file_subpath", "file_datetime",
Expand Down
Binary file added tests/testthat/test_data/dxf_example_H_02.dxf
Binary file not shown.

0 comments on commit 66bc66c

Please sign in to comment.