Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to 1.4.1 #191

Merged
merged 3 commits into from Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,7 +1,7 @@
Package: isoreader
Title: Read Stable Isotope Data Files
Description: Interface to the raw data file formats commonly encountered in scientific disciplines that make use of stable isotopes.
Version: 1.4.0
Version: 1.4.1
Authors@R:
c(person(
given = "Sebastian", family = "Kopf",
Expand Down
92 changes: 62 additions & 30 deletions R/isoread_isodat.R
Expand Up @@ -499,6 +499,9 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU
if (nrow(extracted_dt$cell_values) == 0L) {
stop("could not find any vendor table data", call. = FALSE)
}

# propagated newly registered problems
ds <- ds |> set_problems(combined_problems(ds, extracted_dt))

# store vendor data table
data_table <- full_join(peaks, mutate(extracted_dt$cell_values, .check = TRUE), by = "Nr.")
Expand Down Expand Up @@ -727,24 +730,40 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun =
ds$source <- cap_at_fun(ds$source)
}

columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)

# safety check: to make sure all columns have the same format specification
if (!all(ok <- columns$n_formats == 1)) {
formats <- map_chr(columns$data[!ok], ~collapse(unique(.x$format), ", "))
problems <- glue("column {columns$column[!ok]} has multiple formats '{formats}'")
# start output
output <- list()

# add columns
output$columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include)

# safety check: to make sure all output$columns have the same format specification
if (!all(ok <- output$columns$n_types == 1)) {
formats <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$format), ", "))
problems <- glue("column {output$columns$column[!ok]} has multiple formats '{formats}'")
iso_source_file_op_error(ds$source, glue("mismatched data column formats:\n{collapse(problems, '\n')}"))
}

# safety check: warn if different precisions
if (!all(ok <- output$columns$n_precisions == 1)) {
precisions <- map_chr(output$columns$data[!ok], ~collapse(unique(.x$precision), ", "))
problems <- glue("column {output$columns$column[!ok]} has multiple precisions '{precisions}'")
output <- register_warning(
output,
details = glue("mismatched data column formats:\n{collapse(problems, '\n')}"),
func = "extract_isodat_main_vendor_data_table"
)
}

# safety check: to make sure all formats are resolved
if (!all(ok <- !is.na(columns$type))) {
problems <- glue("column {columns$column[!ok]} has unknown format '{columns$column_format[!ok]}'")
if (!all(ok <- !is.na(output$columns$column_type))) {
problems <- glue("column {output$columns$column[!ok]} has unknown format '{output$columns$column_format[!ok]}'")
iso_source_file_op_error(ds$source, glue("unknown column formats:\n{collapse(problems, '\n')}"))
}

cell_values <- extract_isodat_main_vendor_data_table_values(ds, columns)
# finish output with cell values
output$cell_values <- extract_isodat_main_vendor_data_table_values(ds, output$columns)

return(list(columns = columns, cell_values = cell_values))
return(output)
}

# extract the main (recurring) portion of the vendor data table
Expand Down Expand Up @@ -804,32 +823,45 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$source$po
dplyr::mutate(row = cumsum(.data$column == .data$column[1])) |>
# remove duplicates
dplyr::group_by(.data$column, .data$row) |>
dplyr::summarize(
group = .data$group[1],
continue_pos = .data$continue_pos[1],
id = .data$id[1],
format = .data$format[1],
`gas_config?` = .data$`gas_config?`[1],
units = .data$units[1],
ref_frame = .data$units[1],
.groups = "drop"
) |>
dplyr::filter(dplyr::row_number() == 1) |>
dplyr::ungroup() |>
# dplyr::summarize(
# group = .data$group[1],
# continue_pos = .data$continue_pos[1],
# id = .data$id[1],
# format = .data$format[1],
# `gas_config?` = .data$`gas_config?`[1],
# units = .data$units[1],
# ref_frame = .data$units[1],
# .groups = "drop"
# ) |>
dplyr::arrange(.data$group) |>
# nest by column and expand column details
tidyr::nest(data = c(-"column")) |>
# parse column format
dplyr::mutate(
n_formats = purrr::map_int(.data$data, ~length(unique(.x$format))),
column_format = purrr::map_chr(.data$data, ~.x$format[1]),
column_units = purrr::map_chr(.data$data, ~.x$units[1]),
type =
dplyr::case_when(
.data$column_format == "%s" ~ "text",
.data$column_format %in% c("%u", "%d") ~ "integer",
str_detect(.data$column_format, "\\%[0-9.]*f") ~ "double",
.data$format == "%s" ~ "text",
.data$format %in% c("%u", "%d") ~ "integer",
str_detect(.data$format, "\\%[0-9.]*f") ~ "double",
TRUE ~ NA_character_
)
),
precision = dplyr::if_else(
type == "double",
stringr::str_extract(.data$format, "(?<=\\.)\\d*(?=f)"),
NA_character_
)
) |>
# nest by column and expand column details
tidyr::nest(data = c(-"column")) |>
dplyr::mutate(
n_types = purrr::map_int(.data$data, ~length(unique(.x$type))),
n_precisions = purrr::map_int(.data$data, ~length(unique(.x$precision))),
line1 = purrr::map(.data$data, ~.x[1,c("format", "units", "type", "precision")])
) |>
tidyr::unnest(line1) |>
# naming adjustments
dplyr::rename("column_format" = "format", "column_units" = "units",
"column_type" = "type", "column_precision" = "precision") |>
dplyr::mutate(
# avoid issues with delta symbol on different OS
column = stringr::str_replace(.data$column, fixed("\U03B4"), "d"),
Expand Down Expand Up @@ -883,7 +915,7 @@ extract_isodat_main_vendor_data_table_values <- function(ds, columns) {

# get cell values
columns |>
filter(!is.na(type)) |>
filter(!is.na(.data$column_type)) |>
unnest("data") |>
select("column", "continue_pos", "type", "row") |>
nest(data = c(-row)) |>
Expand Down
1 change: 0 additions & 1 deletion R/isoread_nu.R
Expand Up @@ -220,7 +220,6 @@ process_nu_parser <- function(ds, parser, options = list()) {
if (length(matches) > 0) {
header <- ds$source$header[matches]
data <- ds$source$data[matches]
#if (default(debug)) nu_data <<- data
value <- rlang::eval_tidy(rlang::get_expr(parser$parse_quo))
if (n_problems(value) > 0) {
ds <- set_problems(ds, combined_problems(ds, value))
Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
Expand Up @@ -7,7 +7,7 @@ template:
ganalytics: UA-106138361-3

development:
mode: devel
mode: auto

home:
strip_header: true
Expand Down
31 changes: 28 additions & 3 deletions tests/testthat/test-continuous-flow.R
Expand Up @@ -154,11 +154,14 @@ test_that("test that additional continous flow files can be read", {
iso_turn_reader_caching_off()

# testing wrapper
check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL) {
check_continuous_flow_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL, n_problems = 0) {
file_path <- get_isoreader_test_file(file, local_folder = test_folder)
expect_true(file.exists(file_path))
expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
expect_equal(nrow(problems(file)), 0)
if (n_problems > 0)
expect_warning(file <- iso_read_continuous_flow(file_path, read_cache = FALSE))
else
expect_is(file <- iso_read_continuous_flow(file_path, read_cache = FALSE), "continuous_flow")
expect_equal(nrow(problems(file)), n_problems)
expect_equal(nrow(file$file_info), 1)
if (!is.null(file_info_cols))
expect_equal(names(file$file_info), file_info_cols)
Expand Down Expand Up @@ -190,6 +193,28 @@ test_that("test that additional continous flow files can be read", {
)
)

dxf2 <- check_continuous_flow_test_file(
"dxf_example_H_02.dxf",
c("file_id", "file_root", "file_path", "file_subpath", "file_datetime",
"file_size", "Row", "Peak Center", "Check Ref. Dilution", "H3 Stability",
"H3 Factor", "Conditioning", "Seed Oxidation", "GC Method", "AS Sample",
"AS Method", "Identifier 1", "Identifier 2", "Analysis", "Preparation", "Method",
"measurement_info", "MS_integration_time.s"),
53,
c(Nr. = NA, Start = "s", Rt = "s", End = "s", `Ampl 2` = "mV",
`Ampl 3` = "mV", `BGD 2` = "mV", `BGD 3` = "mV", `rIntensity 2` = "mVs",
`rIntensity 3` = "mVs", `rIntensity All` = "mVs", `Intensity 2` = "Vs",
`Intensity 3` = "Vs", `Intensity All` = "Vs", `Sample Dilution` = "%",
`List First Peak` = NA, `rR 3H2/2H2` = NA, `Is Ref.?` = NA, `R 3H2/2H2` = NA,
`Ref. Name` = NA, `rd 3H2/2H2` = "permil", `d 3H2/2H2` = "permil",
`R 2H/1H` = NA, `d 2H/1H` = "permil", `AT% 2H/1H` = "%", `Rps 3H2/2H2` = NA,
`Master Peak` = NA, `DeltaDelta 3H2/2H2` = "permil"
),
n_problems = 1L
)
expect_equal(problems(dxf2)$type, "warning")
expect_true(stringr::str_detect(problems(dxf2)$details, "has multiple precisions"))

check_continuous_flow_test_file(
"dxf_example_HO_01.dxf",
c("file_id", "file_root", "file_path", "file_subpath", "file_datetime",
Expand Down
Binary file added tests/testthat/test_data/dxf_example_H_02.dxf
Binary file not shown.