diff --git a/.Rbuildignore b/.Rbuildignore index af67788c..e30c098d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,14 +11,14 @@ ^Makefile$ ^NEWS$ ^docs$ -^cache/.* -^tests/testthat/cache/.* +^cache +^tests/testthat/cache ^inst/other$ ^tmp.*$ ^README-.*\.png$ ^README\.Rmd$ ^logo\.png$ -^vignettes/cache/.* +^vignettes/cache ^vignettes/fig_output$ ^doc$ ^.*\.xlsx$ diff --git a/.github/workflows/R-CMD-check-devel.yaml b/.github/workflows/R-CMD-check-devel.yaml deleted file mode 100644 index 8f28a21a..00000000 --- a/.github/workflows/R-CMD-check-devel.yaml +++ /dev/null @@ -1,94 +0,0 @@ -on: - push: - branches: - - master - pull_request: - branches: - - master - schedule: - - cron: "0 0 * * FRI" - -name: devel - -jobs: - devel: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macOS-latest, r: 'devel'} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@master - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Remove .Rprofile - run: rm .Rprofile - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v1 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{ hashFiles('depends.Rds') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}- - - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Install package - run: | - # install isoreader before check so parallel processing examples work - remotes::install_cran("devtools") - devtools::install(".", dependencies = FALSE) - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: | - # include test_data by removing it from .Rbuildignore - cat(stringr::str_subset(readr::read_lines(".Rbuildignore"), "test_data", negate = TRUE), file = ".Rbuildignore", sep = "\n") - # run RMD check - rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@master - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 8f51fe86..8e615b12 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -26,6 +26,7 @@ jobs: - {os: ubuntu-latest, r: 'release'} env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes @@ -45,6 +46,12 @@ jobs: extra-packages: any::rcmdcheck needs: check + - name: enable additional tests + run: | + # include test_data by removing it from .Rbuildignore + cat(stringr::str_subset(readr::read_lines(".Rbuildignore"), "test_data", negate = TRUE), file = ".Rbuildignore", sep = "\n") + shell: Rscript {0} + - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true diff --git a/DESCRIPTION b/DESCRIPTION index 29bd86b6..a74dabf2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,32 +22,32 @@ Authors@R: URL: http://isoreader.isoverse.org, https://github.com/isoverse/isoreader BugReports: https://github.com/isoverse/isoreader/issues Depends: - R (>= 4.0.0), + R (>= 4.2.0), stats Imports: methods, R.utils, - magrittr, - rlang (>= 0.4.5), - tidyselect (>= 1.0.0), - vctrs (>= 0.3.4), + rlang (>= 1.1.0), + lifecycle (>= 1.0.0), + tidyselect (>= 1.2.0), + vctrs (>= 0.6.0), tibble (>= 3.0.0), - dplyr (>= 1.0.0), - tidyr (>= 1.0.0), - glue (>= 1.4.0), - stringr (>= 1.4.0), - purrr (>= 0.3.4), - future (>= 1.18.0), - lubridate (>= 1.7.9.2), - readr (>= 1.4.0), + dplyr (>= 1.1.0), + tidyr (>= 1.3.0), + glue (>= 1.6.0), + stringr (>= 1.5.0), + purrr (>= 1.0.0), + future (>= 1.33.0), + lubridate (>= 1.9.0), + readr (>= 2.0.0), progress (>= 1.2.2) Suggests: devtools, testthat, feather (>= 0.3.5), - readxl (>= 1.3.1), - openxlsx (>= 4.1.5), - xml2 (>= 1.3.1), + readxl (>= 1.4.0), + openxlsx (>= 4.2.0), + xml2 (>= 1.3.5), rhdf5 (>= 2.0.0), knitr, markdown, @@ -57,4 +57,4 @@ biocViews: License: GPL (>= 2) Encoding: UTF-8 VignetteBuilder: knitr -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 diff --git a/Makefile b/Makefile index b399b4bc..5b21b614 100644 --- a/Makefile +++ b/Makefile @@ -7,16 +7,17 @@ all: docu check docu: Rscript -e "devtools::document(roclets=c('rd', 'collate', 'namespace'))" -# test package functionality without all example files (= as if on CRAN) +# test package functionality without example files (= as if on CRAN) check: - R -q -e "devtools::check(env_vars = c())" + R -q -e "message('\nINFO: running check as if on CRAN\n'); devtools::check(env_vars = c())" +# tests without file tests (= as if on CRAN) auto_test: - R -q -e "rm(list = ls()); Sys.setenv(NOT_CRAN = \"false\"); testthat::auto_test_package()" + R -q -e "rm(list = ls()); options("isoreader.skip_file_tests" = TRUE); message('\nINFO: running tests as if on CRAN\n'); testthat::auto_test_package()" -# test with all example files (= as if not on CRAN) +# tests with file test auto_test_all: - R -q -e "rm(list = ls()); testthat::auto_test_package()" + R -q -e "rm(list = ls()); message('\nINFO: running all tests (as if NOT on CRAN)\n'); testthat::auto_test_package()" # check code complexity count: diff --git a/NAMESPACE b/NAMESPACE index 9f7902d7..9a350d86 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -74,7 +74,6 @@ S3method(vec_ptype_abbr,iso_double_with_units) S3method(vec_ptype_full,iso_double_with_units) export("!!!") export("!!") -export("%>%") export(ends_with) export(everything) export(extract_substring) @@ -82,12 +81,11 @@ export(extract_word) export(filter) export(iso_add_file_info) export(iso_as_file_list) -export(iso_calculate_ratios) export(iso_cleanup_reader_cache) -export(iso_convert_signals) -export(iso_convert_time) export(iso_double_with_units) export(iso_expand_paths) +export(iso_export_files_to_excel) +export(iso_export_files_to_feather) export(iso_export_to_excel) export(iso_export_to_feather) export(iso_filter_files) @@ -108,6 +106,7 @@ export(iso_get_reader_examples) export(iso_get_reader_examples_folder) export(iso_get_resistors) export(iso_get_resistors_info) +export(iso_get_source_file_structure) export(iso_get_standards) export(iso_get_standards_info) export(iso_get_supported_file_types) @@ -126,9 +125,6 @@ export(iso_make_units_implicit) export(iso_mutate_file_info) export(iso_omit_files_with_problems) export(iso_parse_file_info) -export(iso_plot_continuous_flow_data) -export(iso_plot_dual_inlet_data) -export(iso_plot_raw_data) export(iso_print_source_file_structure) export(iso_read_continuous_flow) export(iso_read_dual_inlet) @@ -159,7 +155,6 @@ export(iso_turn_info_messages_on) export(iso_turn_reader_caching_off) export(iso_turn_reader_caching_on) export(iso_with_units) -export(isoread) export(matches) export(parse_datetime) export(parse_double) @@ -208,7 +203,6 @@ importFrom(glue,glue) importFrom(lubridate,as_datetime) importFrom(lubridate,duration) importFrom(lubridate,interval) -importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(methods,setOldClass) importFrom(purrr,is_empty) diff --git a/NEWS.md b/NEWS.md index 24fb1565..f89dfc0f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,24 @@ +# isoreader 1.4.0 + +## Breaking changes + +Several previously deprecated functions have been removed: `iso_calculate_ratios()`, `iso_convert_signals()`, `iso_convert_time()`, `iso_plot_continuous_flow_data()`, `iso_plot_dual_inlet_data()`, `iso_plot_raw_data()`, `isoread()`. Older code that still uses these functions instead of their replacements will no longer work. + +## New features + +* simpler cache file names that also removed dependency on the `UNF` package +* faster implementation of binary file structure analysis for isodat files, accessible via the new `iso_get_source_file_structure()` function + +## Enhancements + +* ~20% performance increase in reading isodat files through code optimization +* `iso_export_to_excel()` and `iso_export_to_feather()` renamed to `iso_export_files_to_excel()` and `iso_export_files_to_feather()`, respectively, to avoid ambiguity with other export functions. + +## Bug fixes + +* fixes to be compatible with latest tidyselect and dplyr updates + + # isoreader 1.3.0 ## Major changes @@ -40,6 +61,6 @@ This package provides the following data aggregation and data processing functio This package provides the following data export functionality for all supported data files: - - export to open Excel (.xslx) with `?iso_export_to_excel` - - export to the Python/R cross-over feather file format with `?iso_export_to_feather` + - export to open Excel (.xslx) with `?iso_export_files_to_excel` + - export to the Python/R cross-over feather file format with `?iso_export_files_to_feather` \ No newline at end of file diff --git a/R/aggregate_data.R b/R/aggregate_data.R index 212c3fd8..4d9d5bdf 100644 --- a/R/aggregate_data.R +++ b/R/aggregate_data.R @@ -23,14 +23,14 @@ iso_get_data_summary <- function(iso_files, quiet = default(quiet)) { iso_files <- iso_as_file_list(iso_files) if (!quiet) { - glue("Info: aggregating data summary from {length(iso_files)} data file(s)") %>% + glue("Info: aggregating data summary from {length(iso_files)} data file(s)") |> message() } if (length(iso_files) == 0) return(tibble()) # aggregate all the info - tibble( + info <- tibble( file_id = names(iso_files), file_path_ = map_chr( iso_files, @@ -38,18 +38,21 @@ iso_get_data_summary <- function(iso_files, quiet = default(quiet)) { file_subpath = map_chr( iso_files, ~if (col_in_df(.x$file_info, "file_subpath")) { .x$file_info$file_subpath } else { NA_character_ }) - ) %>% - left_join(get_raw_data_info(iso_files), by = "file_id") %>% - left_join(get_file_info_info(iso_files), by = "file_id") %>% - left_join(get_method_info_info(iso_files), by = "file_id") %>% - { - # scan files don't have vendor data table - if (!iso_is_scan(iso_files)) - left_join(., get_vendor_data_table_info(iso_files), by = "file_id") - else . - } %>% - mutate(file_path = ifelse(!is.na(file_subpath), glue("{file_path_}|{file_subpath}"), file_path_)) %>% - select(-file_path_, -file_subpath) + ) |> + left_join(get_raw_data_info(iso_files), by = "file_id") |> + left_join(get_file_info_info(iso_files), by = "file_id") |> + left_join(get_method_info_info(iso_files), by = "file_id") + + return(info) + + # scan files don't have vendor data table + if (!iso_is_scan(iso_files)) + info <- info |> left_join(iso_files |> get_vendor_data_table_info(), by = "file_id") + + # wrap up + info |> + mutate(file_path = ifelse(!is.na(file_subpath), glue("{file_path_}|{file_subpath}"), file_path_)) |> + select(-"file_path_", -"file_subpath") } # summary of raw data info @@ -68,14 +71,14 @@ get_raw_data_info <- function(iso_files) { tibble( file_id = names(iso_files), read_raw_data = map_lgl(iso_files, ~.x$read_options$raw_data), - all_ions = map(iso_files, ~names(.x$raw_data) %>% str_subset("^[iIvV]C?(\\d+)\\.")), + all_ions = map(iso_files, ~names(.x$raw_data) |> str_subset("^[iIvV]C?(\\d+)\\.")), n_ions = map_int(.data$all_ions, length), full_ions = map2_chr(.data$all_ions, .data$n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}), - ions = .data$full_ions %>% str_replace_all("[^0-9,]", "") + ions = .data$full_ions |> str_replace_all("[^0-9,]", "") ) if (iso_is_continuous_flow(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum |> mutate( n_tps = map_int(iso_files, ~nrow(.x$raw_data)), label = case_when( @@ -85,7 +88,7 @@ get_raw_data_info <- function(iso_files) { ) ) } else if (iso_is_dual_inlet(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum |> mutate( n_cycles = map_int(iso_files, ~as.integer(floor(nrow(.x$raw_data)/2))), label = case_when( @@ -95,7 +98,7 @@ get_raw_data_info <- function(iso_files) { ) ) } else if (iso_is_scan(iso_files)) { - raw_data_sum <- raw_data_sum %>% + raw_data_sum <- raw_data_sum |> mutate( n_tps = map_int(iso_files, ~nrow(.x$raw_data)), label = case_when( @@ -109,10 +112,10 @@ get_raw_data_info <- function(iso_files) { stop("make_iso_file_data_structure should never be called directly", call. = FALSE) } else { # should not get here - glue("cannot process '{class(iso_files[[1]])[1]}' in get_raw_data_info") %>% stop(call. = FALSE) + glue("cannot process '{class(iso_files[[1]])[1]}' in get_raw_data_info") |> stop(call. = FALSE) } - return(dplyr::select(raw_data_sum, .data$file_id, raw_data = .data$label)) + return(dplyr::select(raw_data_sum, "file_id", raw_data = "label")) } # summary of file info @@ -130,7 +133,7 @@ get_file_info_info <- function(iso_files) { file_id = names(iso_files), read_file_info = map_lgl(iso_files, ~.x$read_options$file_info), file_info = ifelse(!.data$read_file_info, "file info not read", paste(map_int(iso_files, ~length(.x$file_info)), "entries")) - ) %>% select(.data$file_id, .data$file_info) + ) |> select("file_id", "file_info") } } @@ -157,7 +160,7 @@ get_method_info_info <- function(iso_files) { has_resistors ~ "resistors", TRUE ~ "no method info" ) - ) %>% select(.data$file_id, .data$method_info) + ) |> select("file_id", "method_info") } } @@ -182,20 +185,23 @@ get_vendor_data_table_info <- function(iso_files) { .data$rows > 0 & .data$cols > 0 ~ sprintf("%d rows, %d columns", .data$rows, .data$cols), TRUE ~ "no vendor data table" ) - ) %>% select(.data$file_id, .data$vendor_data_table) + ) |> select("file_id", "vendor_data_table") } } # Specific data aggregation calls ===== -#' DEPRECATED -#' -#' Please use \link{iso_get_all_data} instead. -#' @param ... forwarded to \link{iso_get_all_data} -#' +#' @rdname deprecated +#' @details \code{iso_get_data}: use \link{iso_get_all_data} instead #' @export iso_get_data <- function(...) { - warning("'iso_get_data()' is deprecated in favor of the more descriptive 'iso_get_all_data()'. Please use 'iso_get_all_data()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) + lifecycle::deprecate_warn( + "1.3.0", + "iso_get_data()", + "iso_get_all_data()", + details = "Function renamed to be more descriptive.", + always = TRUE + ) iso_get_all_data(...) } @@ -225,7 +231,7 @@ iso_get_all_data <- function( # info iso_files <- iso_as_file_list(iso_files) - if (!quiet) sprintf("Info: aggregating all data from %d data file(s)", length(iso_files)) %>% message() + if (!quiet) sprintf("Info: aggregating all data from %d data file(s)", length(iso_files)) |> message() # deprecated parameter if (!missing(with_ratios)) { @@ -253,7 +259,7 @@ iso_get_all_data <- function( file_class <- tibble( file_id = names(iso_files), - file_type = map_chr(iso_files, ~class(.x)[1]) %>% unname() + file_type = map_chr(iso_files, ~class(.x)[1]) |> unname() ) # all file data @@ -262,34 +268,34 @@ iso_get_all_data <- function( # data merge function merge_with_file_class <- function(new_df, col_name) { - nested_df <- nest(new_df[c(),], !!col_name := c(-.data$file_id)) + nested_df <- nest(new_df[c(),], !!col_name := c(-"file_id")) if (ncol(new_df) > 1) - nested_df <- nest(new_df, !!col_name := c(-.data$file_id)) + nested_df <- nest(new_df, !!col_name := c(-"file_id")) nested_df <- bind_rows(nested_df, tibble(file_id = setdiff(file_class$file_id, nested_df$file_id), !!col_name := list(tibble()))) left_join(file_class, nested_df, by = "file_id") } # file info if (include_file_info) { - file_class <- iso_get_file_info(iso_files, select = !!include_file_info_exp, quiet = TRUE) %>% + file_class <- iso_get_file_info(iso_files, select = !!include_file_info_exp, quiet = TRUE) |> merge_with_file_class("file_info") } # raw data if (include_raw_data) { - file_class <- iso_get_raw_data(iso_files, select = !!include_raw_data_exp, gather = gather, quiet = TRUE) %>% + file_class <- iso_get_raw_data(iso_files, select = !!include_raw_data_exp, gather = gather, quiet = TRUE) |> merge_with_file_class("raw_data") } # standards if (include_standards) { - file_class <- iso_get_standards(iso_files, select = !!include_standards_exp, quiet = TRUE) %>% + file_class <- iso_get_standards(iso_files, select = !!include_standards_exp, quiet = TRUE) |> merge_with_file_class("standards") } # resistors if (include_resistors) { - file_class <- iso_get_resistors(iso_files, select = !!include_resistors_exp, quiet = TRUE) %>% + file_class <- iso_get_resistors(iso_files, select = !!include_resistors_exp, quiet = TRUE) |> merge_with_file_class("resistors") } @@ -298,13 +304,13 @@ iso_get_all_data <- function( file_class <- iso_get_vendor_data_table( iso_files, with_explicit_units = with_explicit_units, - select = !!include_vendor_data_table_exp, quiet = TRUE) %>% + select = !!include_vendor_data_table_exp, quiet = TRUE) |> merge_with_file_class("vendor_data_table") } # problems if (include_problems) { - file_class <- iso_get_problems(iso_files, select = !!include_problems_exp) %>% + file_class <- iso_get_problems(iso_files, select = !!include_problems_exp) |> merge_with_file_class("problems") } @@ -331,21 +337,21 @@ iso_get_file_info <- function(iso_files, select = everything(), file_specific = if (!quiet) { glue::glue( "Info: aggregating file info from {length(iso_files)} data file(s)", - "{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") %>% + "{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") |> message() } check_read_options(iso_files, "file_info") # retrieve info - file_info <- iso_files %>% - { - if (rlang::as_label(select_exp) != "everything()") - # select columns - iso_select_file_info(., !!select_exp, file_specific = file_specific, quiet = TRUE) - else . # much faster (if selecting everything) - } %>% + if (rlang::as_label(select_exp) != "everything()") + file_info_files <- iso_files |> + iso_select_file_info(!!select_exp, file_specific = file_specific, quiet = TRUE) + else + file_info_files <- iso_files + + file_info <- file_info_files |> # retrieve file info - map(~.x$file_info) %>% + map(~.x$file_info) |> # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) safe_bind_rows() @@ -387,7 +393,7 @@ iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, i glue::glue( "Info: aggregating raw data from {length(iso_files)} data file(s)", "{get_info_message_concat(select_exp, prefix = ', selecting data columns ', empty = 'everything()')}", - "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") %>% message() + "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") |> message() } check_read_options(iso_files, "raw_data") @@ -400,33 +406,33 @@ iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, i tibble( file_id = names(iso_files), raw_data = map(iso_files, ~.x$raw_data) - ) %>% + ) |> # make sure to include only existing raw data - filter(!map_lgl(raw_data, is.null)) %>% + filter(!map_lgl(raw_data, is.null)) |> # unnest - unnest(raw_data) + unnest("raw_data") # check for rows - if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) + if (nrow(data) == 0) return(dplyr::select(data, "file_id")) # selecting columns select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file id always included - data <- data %>% + data <- data |> # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) + dplyr::select(dplyr::all_of(select_cols)) # if gathering if (gather) { data_cols_re <- "^([^0-9]+)(\\d+/?\\d*)(\\.(.+))?$" gather_cols <- stringr::str_subset(names(data), data_cols_re) - data <- data %>% + data <- data |> # gather all masses and ratios - tidyr::pivot_longer(gather_cols, names_to = "column", values_to = "value", values_drop_na = TRUE) %>% + tidyr::pivot_longer(dplyr::all_of(gather_cols), names_to = "column", values_to = "value", values_drop_na = TRUE) |> # extract unit information - extract(.data$column, into = c("prefix", "data", "extra_parens", "units"), regex = data_cols_re) %>% - dplyr::select(-.data$extra_parens) %>% + extract(.data$column, into = c("prefix", "data", "extra_parens", "units"), regex = data_cols_re) |> + dplyr::select(-"extra_parens") |> mutate( # units cleanup units = ifelse(is.na(units) | nchar(units) == 0, NA_character_, units), @@ -442,8 +448,8 @@ iso_get_raw_data <- function(iso_files, select = everything(), gather = FALSE, i .data$category == "delta" ~ paste0("d", .data$data), TRUE ~ paste0(.data$prefix, .data$data) ) - ) %>% - dplyr::select(-.data$prefix) + ) |> + dplyr::select(-"prefix") } # if file info @@ -475,7 +481,7 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE, glue( "Info: aggregating background data from {length(iso_files)} data file(s)", "{get_info_message_concat(select_exp, prefix = ', selecting data columns ', empty = 'everything()')}", - "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") %>% message() + "{get_info_message_concat(include_file_info_quo, prefix = ', including file info ')}") |> message() } check_read_options(iso_files, "raw_data") @@ -488,35 +494,35 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE, tibble( file_id = names(iso_files), bgrd_data = map(iso_files, ~.x$bgrd_data) - ) %>% + ) |> # make sure to include only existing raw data - filter(!map_lgl(bgrd_data, is.null)) %>% + filter(!map_lgl(bgrd_data, is.null)) |> # unnest - unnest(bgrd_data) + unnest("bgrd_data") # check for rows - if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) + if (nrow(data) == 0) return(dplyr::select(data, "file_id")) # selecting columns select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - data <- data %>% + data <- data |> # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) + dplyr::select(dplyr::all_of(select_cols)) # if gathering if (gather) { column <- value <- extra_parens <- category <- NULL # global vars masses_ratios_re <- "^([vir])(\\d+/?\\d*)(\\.(.+))?$" - data <- data %>% + data <- data |> # gather all masses and ratios - gather(column, value, matches(masses_ratios_re)) %>% + gather(column, value, matches(masses_ratios_re)) |> # extract unit information - extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>% - dplyr::select(-.data$extra_parens) %>% + extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) |> + dplyr::select(-"extra_parens") |> # remove unknown data - filter(!is.na(.data$value)) %>% + filter(!is.na(.data$value)) |> # assign category mutate(category = ifelse(.data$category == "r", "ratio", "mass")) } @@ -529,14 +535,17 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE, return(data) } -#' DEPRECATED -#' -#' Please use \link{iso_get_standards} instead. -#' @param ... forwarded to \link{iso_get_standards} -#' +#' @rdname deprecated +#' @details \code{iso_get_standards_info}: use \link{iso_get_standards} instead #' @export iso_get_standards_info <- function(...) { - warning("'iso_get_standards_info()' is deprecated in favor of the simpler 'iso_get_standards()'. Please use 'iso_get_standards()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) + lifecycle::deprecate_warn( + "1.3.0", + "iso_get_standards_info()", + "iso_get_standards()", + details = "Function renamed for simplification.", + always = TRUE + ) iso_get_standards(...) } @@ -562,7 +571,7 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf include_file_info_quo <- enquo(include_file_info) if (!quiet) { sprintf("Info: aggregating standards info from %d data file(s)%s", length(iso_files), - get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() + get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) |> message() } # deprecated parameter @@ -585,15 +594,15 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf ) # check for rows - if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) + if (nrow(data) == 0) return(dplyr::select(data, "file_id")) # merge info - standards <- data %>% - dplyr::select(.data$file_id, standards) %>% - dplyr::filter(!map_lgl(.data$standards, is.null)) %>% unnest(.data$standards) - ref_ratios <- data %>% dplyr::select(.data$file_id, .data$ref_ratios) %>% - dplyr::filter(!map_lgl(.data$ref_ratios, is.null)) %>% - tidyr::unnest(.data$ref_ratios) + standards <- data |> + dplyr::select("file_id", "standards") |> + dplyr::filter(!map_lgl(.data$standards, is.null)) |> unnest("standards") + ref_ratios <- data |> dplyr::select("file_id", "ref_ratios") |> + dplyr::filter(!map_lgl(.data$ref_ratios, is.null)) |> + tidyr::unnest("ref_ratios") if ("reference" %in% names(ref_ratios)) data <- dplyr::left_join(standards, ref_ratios, by = c("file_id", "reference")) else @@ -607,7 +616,7 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf select_cols <- c("file_id", select_cols) # file info always included # focus on selected columns only (also takes care of the rename) - data <- dplyr::select(data, !!!select_cols) %>% unique() + data <- dplyr::select(data, dplyr::all_of(select_cols)) |> unique() # if file info if (!quo_is_null(include_file_info_quo)) { @@ -617,16 +626,17 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf return(data) } - -#' DEPRECATED -#' -#' Please use \link{iso_get_resistors} instead. -#' -#' @param ... forwarded to \link{iso_get_resistors} -#' +#' @rdname deprecated +#' @details \code{iso_get_resistors_info}: use \link{iso_get_resistors} instead #' @export iso_get_resistors_info <- function(...) { - warning("'iso_get_resistors_info()' is deprecated in favor of the simpler 'iso_get_resistors()'. Please use 'iso_get_resistors()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE) + lifecycle::deprecate_warn( + "1.3.0", + "iso_get_resistors_info()", + "iso_get_resistors()", + details = "Function renamed for simplification.", + always = TRUE + ) iso_get_resistors(...) } @@ -646,7 +656,7 @@ iso_get_resistors <- function(iso_files, select = everything(), include_file_in include_file_info_quo <- enquo(include_file_info) if (!quiet) { sprintf("Info: aggregating resistors info from %d data file(s)%s", length(iso_files), - get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() + get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) |> message() } check_read_options(iso_files, "method_info") @@ -660,14 +670,14 @@ iso_get_resistors <- function(iso_files, select = everything(), include_file_in tibble( file_id = names(iso_files), resistors = map(iso_files, ~.x$method_info$resistors) - ) %>% + ) |> # make sure to include only existing raw data - dplyr::filter(!map_lgl(resistors, is.null)) %>% + dplyr::filter(!map_lgl(resistors, is.null)) |> # unnest - tidyr::unnest(resistors) + tidyr::unnest("resistors") # check for rows - if (nrow(data) == 0) return(dplyr::select(data, .data$file_id)) + if (nrow(data) == 0) return(dplyr::select(data, "file_id")) # select columns select_cols <- get_column_names(data, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select @@ -675,7 +685,7 @@ iso_get_resistors <- function(iso_files, select = everything(), include_file_in select_cols <- c("file_id", select_cols) # file info always included # focus on selected columns only (also takes care of the rename) - data <- dplyr::select(data, !!!select_cols) + data <- dplyr::select(data, dplyr::all_of(select_cols)) # if file info if (!quo_is_null(include_file_info_quo)) { @@ -717,7 +727,7 @@ iso_get_vendor_data_table <- function( sprintf("Info: aggregating vendor data table%s from %d data file(s)%s", if (with_explicit_units) " with explicit units" else "", length(iso_files), - get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>% message() + get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) |> message() } check_read_options(iso_files, "vendor_data_table") @@ -740,21 +750,21 @@ iso_get_vendor_data_table <- function( tibble( file_id = names(iso_files), dt = map(iso_files, ~.x$vendor_data_table) - ) %>% + ) |> # make sure to include only existing data filter(map_lgl(dt, ~!is.null(.x) & nrow(.x) > 0)) # check for any rows - if (nrow(vendor_data_table) == 0) return(dplyr::select(vendor_data_table, .data$file_id)) + if (nrow(vendor_data_table) == 0) return(dplyr::select(vendor_data_table, "file_id")) # make units explicit if wanted if (with_explicit_units) { - vendor_data_table <- vendor_data_table %>% + vendor_data_table <- vendor_data_table |> mutate(dt = map(dt, iso_make_units_explicit)) } # unnest - vendor_data_table <- dplyr::select(vendor_data_table, .data$file_id, .data$dt) %>% unnest(.data$dt) + vendor_data_table <- dplyr::select(vendor_data_table, "file_id", "dt") |> unnest("dt") # get include information select_cols <- get_column_names(vendor_data_table, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select @@ -762,7 +772,7 @@ iso_get_vendor_data_table <- function( select_cols <- c("file_id", select_cols) # file info always included # focus on selected columns only (also takes care of the rename) - vendor_data_table <- dplyr::select(vendor_data_table, !!!select_cols) + vendor_data_table <- dplyr::select(vendor_data_table, dplyr::all_of(select_cols)) # include file info if (!quo_is_null(include_file_info_quo)) { @@ -776,7 +786,7 @@ iso_get_vendor_data_table <- function( # check if read options are compatible check_read_options <- function(iso_files, option) { iso_files <- iso_as_file_list(iso_files) - option_values <- map(iso_files, "read_options") %>% map_lgl(option) + option_values <- map(iso_files, "read_options") |> map_lgl(option) if (!all(option_values)) { warning(sum(!option_values), "/", length(iso_files), " files were read without extracting the ", str_replace_all(option, "_", " "), @@ -801,7 +811,7 @@ convert_file_path_to_rooted <- function(iso_files, root = ".", ...) { # get paths paths <- - map_chr(iso_files[needs_conversion], ~.x$file_info$file_path) %>% + map_chr(iso_files[needs_conversion], ~.x$file_info$file_path) |> iso_root_paths(root = root, check_existence = FALSE) # prepare file info updates @@ -809,8 +819,8 @@ convert_file_path_to_rooted <- function(iso_files, root = ".", ...) { names(file_info_update) <- names(iso_files[needs_conversion]) # make sure to keep format - iso_files <- as.list(iso_files) %>% - modifyList(file_info_update) %>% + iso_files <- as.list(iso_files) |> + modifyList(file_info_update) |> iso_as_file_list(...) } @@ -834,7 +844,7 @@ safe_bind_rows <- function(df_list, exclude = names(make_iso_file_data_structure # @note - could this use the new tidyr::chop more effectively? ensure_data_frame_list_columns <- function(x, exclude = names(make_iso_file_data_structure()$file_info)) { # make sure all columns are ready - cols_to_list <- names(x)[!map_lgl(x, is.list)] %>% setdiff(exclude) + cols_to_list <- names(x)[!map_lgl(x, is.list)] |> setdiff(exclude) if(length(cols_to_list) > 0) { func <- if (is.data.frame(x)) as.list else list x[cols_to_list] <- map(x[cols_to_list], func) @@ -881,7 +891,7 @@ unnest_aggregated_data_frame <- function(df) { # warning message about inconsistent data columns with multiple data types if (any(!cols$has_identical_class)) { glue("encountered different value types within the same column(s), they cannot be automatically unnested: ", - "'{collapse(filter(cols, !has_identical_class)$column, sep = \"', '\")}'") %>% + "'{collapse(filter(cols, !has_identical_class)$column, sep = \"', '\")}'") |> warning(immediate. = TRUE, call. = FALSE) } @@ -911,7 +921,7 @@ unnest_aggregated_data_frame <- function(df) { # have to switch to int b/c there is no map2_datetime yet map2_int(!!sym(cols$column[i]), cols$is_missing[[i]], # NA_integer_ is okay here because of the as_datetime wrapper afterwards - ~if (.y) { NA_integer_ } else { as.integer(.x[1]) }) %>% + ~if (.y) { NA_integer_ } else { as.integer(.x[1]) }) |> as_datetime(tz = Sys.timezone())) else if (cols$identical_class[i] == "iso_double_with_units") df <- mutate(df, !!cols$column[i] := @@ -922,7 +932,7 @@ unnest_aggregated_data_frame <- function(df) { ) ) else { - glue("cannot unnest file info column {cols$column[i]}, encountered unusual class {cols$identical_class[i]}") %>% + glue("cannot unnest file info column {cols$column[i]}, encountered unusual class {cols$identical_class[i]}") |> warning(immediate. = TRUE, call. = FALSE) } } else if (cols$renest_missing_value[i]) { @@ -940,6 +950,6 @@ unnest_aggregated_data_frame <- function(df) { # helper function to concatenate list columns for export file formats that cannot handle the embedded data collapse_list_columns <- function(df, sep = ", ") { collapse_function <- function(x) collapse(x, sep = sep) - df %>% + df |> mutate_if(.predicate = is.list, .funs = map_chr, collapse_function) } diff --git a/R/calculate_ratios.R b/R/calculate_ratios.R deleted file mode 100644 index 5ad3d96c..00000000 --- a/R/calculate_ratios.R +++ /dev/null @@ -1,8 +0,0 @@ - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_calculate_ratios <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} diff --git a/R/cleanup.R b/R/cleanup.R index aa849d53..e316c525 100644 --- a/R/cleanup.R +++ b/R/cleanup.R @@ -107,6 +107,6 @@ extract_word <- function(string, capture_n = 1, include_numbers = TRUE, include_ chr_dash <- if(include_dash) "-" else "" chr_space <- if(include_space) " " else "" chr_colon <- if(include_colon) "." else "" - pattern <- glue("[{chr_numbers}A-Za-z{chr_underscore}{chr_dash}{chr_space}{chr_colon}]+") %>% as.character() + pattern <- glue("[{chr_numbers}A-Za-z{chr_underscore}{chr_dash}{chr_space}{chr_colon}]+") |> as.character() extract_substring(string, pattern = pattern, capture_n = capture_n, missing = missing) } diff --git a/R/export.R b/R/export.R index 80e67676..bc0fdef9 100644 --- a/R/export.R +++ b/R/export.R @@ -1,5 +1,19 @@ ## Export functions ======= +#' @rdname deprecated +#' @details \code{iso_export_to_excel}: use \link{iso_export_files_to_excel} instead +#' @export +iso_export_to_excel <- function(...) { + lifecycle::deprecate_warn( + "1.4.0", + "iso_export_to_excel()", + "iso_export_files_to_excel()", + details = "Function renamed to avoid ambiguity with other export functions.", + always = TRUE + ) + iso_export_files_to_excel(...) +} + #' Export data to Excel #' #' This function exports the passed in iso_files to Excel. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate tabs within the excel file. Use the various \code{include_...} parameters to specify what information to include. Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in the excel export. Note that the openxlsx package required for this export is not installed automatically as part of isoreader. Please install it manually if missing using \code{install.packages("openxlsx")}. @@ -10,7 +24,7 @@ #' @family export functions #' @return returns the iso_files object invisibly for use in pipelines #' @export -iso_export_to_excel <- function( +iso_export_files_to_excel <- function( iso_files, filepath, include_file_info = everything(), include_raw_data = everything(), include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), @@ -37,7 +51,7 @@ iso_export_to_excel <- function( # info message if (!quiet) { sprintf("Info: exporting data from %d iso_files into Excel '%s'", length(export_iso_files), - str_replace(filepath, "^\\.(/|\\\\)", "")) %>% message() + str_replace(filepath, "^\\.(/|\\\\)", "")) |> message() } # include method info message @@ -70,40 +84,40 @@ iso_export_to_excel <- function( if ("file_info" %in% names(all_data)) { # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', ' file_info <- - all_data %>% select(.data$file_id, .data$file_info) %>% - unnest(.data$file_info) %>% + all_data |> dplyr::select("file_id", "file_info") |> + unnest("file_info") |> collapse_list_columns() add_excel_sheet(wb, "file info", file_info) } # raw data if ("raw_data" %in% names(all_data)) { - raw_data <- all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) + raw_data <- all_data |> select("file_id", "raw_data") |> unnest("raw_data") add_excel_sheet(wb, "raw data", raw_data) } # standards if ("standards" %in% names(all_data)) { - standards <- all_data %>% select(.data$file_id, standards) %>% unnest(standards) + standards <- all_data |> select("file_id", "standards") |> unnest("standards") add_excel_sheet(wb, "standards", standards) } # resistors if ("resistors" %in% names(all_data)) { - resistors <- all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) + resistors <- all_data |> select("file_id", "resistors") |> unnest("resistors") add_excel_sheet(wb, "resistors", resistors) } # vendor data table if ("vendor_data_table" %in% names(all_data)) { - vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>% - unnest(.data$vendor_data_table) %>% iso_strip_units() + vendor_data <- all_data |> select("file_id", "vendor_data_table") |> + unnest("vendor_data_table") |> iso_strip_units() add_excel_sheet(wb, "vendor data table", vendor_data) } # problems if ("problems" %in% names(all_data)) { - problems <- all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) + problems <- all_data |> select("file_id", "problems") |> unnest("problems") add_excel_sheet(wb, "problems", problems) } openxlsx::saveWorkbook(wb, filepath, overwrite = TRUE) @@ -155,10 +169,10 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = # calculate header widths header_widths <- - sheet_data_sets %>% + sheet_data_sets |> # account for bold width purrr::map(~nchar(names(.x))) - max_n_cols <- purrr::map_int(header_widths, length) %>% max() + max_n_cols <- purrr::map_int(header_widths, length) |> max() # calculate data widths if (max_n_cols > 0) { @@ -169,9 +183,9 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = return(max(c(0, nchar(x)), na.rm = TRUE)) } data_widths <- - sheet_data_sets %>% + sheet_data_sets |> purrr::map( - ~dplyr::summarise_all(.x, list(calculate_data_width)) %>% + ~dplyr::summarise_all(.x, list(calculate_data_width)) |> unlist(use.names = FALSE) ) max_widths <- purrr::map2(header_widths, data_widths , ~{ @@ -185,17 +199,31 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width = } +#' @rdname deprecated +#' @details \code{iso_export_to_feather}: use \link{iso_export_files_to_feather} instead +#' @export +iso_export_to_feather <- function(...) { + lifecycle::deprecate_warn( + "1.4.0", + "iso_export_to_feather()", + "iso_export_files_to_feather()", + details = "Function renamed to avoid ambiguity with other export functions.", + always = TRUE + ) + iso_export_files_to_feather(...) +} + #' Export to feather #' #' This function exports the passed in iso_files to the Python and R shared feather file format. The different kinds of data (raw data, file info, methods info, etc.) are exported to separate feather files that are saved with the provided \code{filepath_prefix} as prefix. All are only exported if the corresponding \code{include_} parameter is set to \code{TRUE} and only for data types for which this type of data is available and was read (see \code{\link{iso_read_dual_inlet}}, \code{\link{iso_read_continuous_flow}} for details on read parameters). Note that in rare instances where vectorized data columns exist in the file information (e.g. measurement_info), they are concatenated with ', ' in feather output. Note that the feather package required for this export is not installed automatically as part of isoreader. Please install it manually if missing using \code{install.packages("feather")}. #' #' @inheritParams iso_save -#' @inheritParams iso_export_to_excel +#' @inheritParams iso_export_files_to_excel #' @param filepath_prefix what to use as the prefix for the feather file names (e.g. name of the data collection or current date) #' @family export functions #' @return returns the iso_files object invisibly for use in pipelines #' @export -iso_export_to_feather <- function( +iso_export_files_to_feather <- function( iso_files, filepath_prefix, include_file_info = everything(), include_raw_data = everything(), include_standards = !!enexpr(include_method_info), include_resistors = !!enquo(include_method_info), @@ -226,7 +254,7 @@ iso_export_to_feather <- function( # info if (!quiet) { sprintf("Info: exporting data from %d iso_files into %s files at '%s'", length(iso_as_file_list(iso_files)), - filepaths[['ext']], str_replace(filepaths[['base']], "^\\.(/|\\\\)", "")) %>% message() + filepaths[['ext']], str_replace(filepaths[['base']], "^\\.(/|\\\\)", "")) |> message() } # get all data @@ -246,40 +274,40 @@ iso_export_to_feather <- function( # file info if ("file_info" %in% names(all_data)) { # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', ' - all_data %>% select(.data$file_id, .data$file_info) %>% - unnest(.data$file_info) %>% - collapse_list_columns() %>% + all_data |> select("file_id", "file_info") |> + unnest("file_info") |> + collapse_list_columns() |> feather::write_feather(filepaths[['file_info']]) } # raw data if ("raw_data" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>% + all_data |> dplyr::select("file_id", "raw_data") |> unnest("raw_data") |> feather::write_feather(filepaths[['raw_data']]) } # standards if ("standards" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>% + all_data |> dplyr::select("file_id", "standards") |> unnest("standards") |> feather::write_feather(filepaths[['method_info_standards']]) } # resistors if ("resistors" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>% + all_data |> dplyr::select("file_id", "resistors") |> unnest("resistors") |> feather::write_feather(filepaths[['method_info_resistors']]) } # vendor data table if ("vendor_data_table" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$vendor_data_table) %>% - unnest(.data$vendor_data_table) %>% iso_strip_units() %>% + all_data |> dplyr::select("file_id", "vendor_data_table") |> + unnest("vendor_data_table") |> iso_strip_units() |> feather::write_feather(filepaths[['vendor_data_table']]) } # problems if ("problems" %in% names(all_data)) { - all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>% + all_data |> dplyr::select("file_id", "problems") |> unnest("problems") |> feather::write_feather(filepaths[['problems']]) } @@ -297,7 +325,7 @@ get_export_filepath <- function(filepath, ext) { folder <- dirname(filepath) if (!file.exists(folder)) stop("the folder '", folder, "' does not exist", call. = FALSE) if (!is.null(ext)) - filename <- filename %>% str_replace(fixed(ext), "") %>% str_c(ext) # to make sure correct extension + filename <- filename |> str_replace(fixed(ext), "") |> str_c(ext) # to make sure correct extension return(file.path(folder, filename)) } diff --git a/R/file_info_operations.R b/R/file_info_operations.R index 2054fa66..cf7db6fc 100644 --- a/R/file_info_operations.R +++ b/R/file_info_operations.R @@ -40,7 +40,7 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F "Info: selecting/renaming the following file info across {length(iso_files)} data file(s): ", if (length(select_exps) == 0) "keeping only 'file_id'" else get_info_message_concat(select_exps, include_names = TRUE, names_sep = "->", flip_names_and_values = TRUE) - ) %>% message() + ) |> message() } } @@ -83,7 +83,7 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F ) # select file_info columns - isofile$file_info <- dplyr::select(isofile$file_info, !!!select_cols) + isofile$file_info <- dplyr::select(isofile$file_info, dplyr::all_of(select_cols)) # check for file id if (!"file_id" %in% names(isofile$file_info)) { @@ -99,19 +99,19 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # summarize individual file updates if (!quiet) { - info <- map(isofiles_select, "vars") %>% - bind_rows() %>% - group_by(.data$file_id) %>% + info <- map(isofiles_select, "vars") |> + bind_rows() |> + group_by(.data$file_id) |> summarize( label = ifelse( .data$changed, sprintf("'%s'->'%s'", .data$from, .data$to), sprintf("'%s'", .data$from) - ) %>% paste(collapse = ", ") - ) %>% - dplyr::count(.data$label) %>% - mutate(label = sprintf(" - for %d file(s): %s", .data$n, .data$label)) %>% + ) |> paste(collapse = ", ") + ) |> + dplyr::count(.data$label) |> + mutate(label = sprintf(" - for %d file(s): %s", .data$n, .data$label)) |> arrange(desc(.data$n)) message(paste(info$label, collapse = "\n")) } @@ -125,9 +125,9 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F } else { # across all files - fast but less flexible # retrieve info - file_info <- iso_files %>% + file_info <- iso_files |> # retrieve file info - map(~.x$file_info) %>% + map(~.x$file_info) |> # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) safe_bind_rows() @@ -146,9 +146,9 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # final processing file_info <- - file_info %>% + file_info |> # focus on selected columns only (also takes care of the rename) - dplyr::select(!!!select_cols) + dplyr::select(dplyr::all_of(select_cols)) # check for file id if (!"file_id" %in% names(file_info)) { @@ -157,17 +157,17 @@ iso_select_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # convert back to list format file_info <- - file_info %>% + file_info |> # should still be list columns but doesn't hurt to check - ensure_data_frame_list_columns() %>% + ensure_data_frame_list_columns() |> # split by file info - split(seq(nrow(file_info))) %>% + split(seq(nrow(file_info))) |> # clean back out the columns that were only added through the row bind map(~.x[!map_lgl(.x, ~is.list(.x) && all(map_lgl(.x, is.null)))]) # update updated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) |> iso_as_file_list() } else { @@ -231,7 +231,7 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F glue::glue( "Info: renaming the following file info across {length(iso_files)} data file(s): ", get_info_message_concat(rename_exps, include_names = TRUE, names_sep = "->", flip_names_and_values = TRUE) - ) %>% message() + ) |> message() } } @@ -271,7 +271,7 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # rename file_info columns if (length(rename_cols) > 0) - isofile$file_info <- dplyr::rename(isofile$file_info, !!!rename_cols) + isofile$file_info <- dplyr::rename(isofile$file_info, dplyr::all_of(rename_cols)) # check for file id if (!"file_id" %in% names(isofile$file_info)) { @@ -287,19 +287,19 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # summarize individual file updates if (!quiet) { - info <- map(isofiles_rename, "vars") %>% - bind_rows() %>% - group_by(.data$file_id) %>% + info <- map(isofiles_rename, "vars") |> + bind_rows() |> + group_by(.data$file_id) |> summarize( label = ifelse( .data$changed, sprintf("'%s'->'%s'", .data$from, .data$to), sprintf("'%s'", .data$from) - ) %>% paste(collapse = ", ") - ) %>% - dplyr::count(.data$label) %>% - mutate(label = sprintf(" - for %d file(s): %s", .data$n, .data$label)) %>% + ) |> paste(collapse = ", ") + ) |> + dplyr::count(.data$label) |> + mutate(label = sprintf(" - for %d file(s): %s", .data$n, .data$label)) |> arrange(desc(.data$n)) message(paste(info$label, collapse = "\n")) } @@ -313,9 +313,9 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F } else { # across all files - fast but less flexible # retrieve info - file_info <- iso_files %>% + file_info <- iso_files |> # retrieve file info - map(~.x$file_info) %>% + map(~.x$file_info) |> # combine in data frame (use safe bind to make sure different data column # types of the same name don't trip up the combination) safe_bind_rows() @@ -327,7 +327,7 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F cols_must_exist = FALSE)$rename # then run the rename - file_info <- dplyr::rename(file_info, !!!rename_cols) + file_info <- dplyr::rename(file_info, dplyr::all_of(rename_cols)) # check for file id if (!"file_id" %in% names(file_info)) { @@ -336,17 +336,17 @@ iso_rename_file_info.iso_file_list <- function(iso_files, ..., file_specific = F # convert back to list format file_info <- - file_info %>% + file_info |> # should still be list columns but doesn't hurt to check - ensure_data_frame_list_columns() %>% + ensure_data_frame_list_columns() |> # split by file info - split(seq(nrow(file_info))) %>% + split(seq(nrow(file_info))) |> # clean back out the columns that were only added through the row bind map(~.x[!map_lgl(.x, ~is.list(.x) && all(map_lgl(.x, is.null)))]) # update updated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) |> iso_as_file_list() } @@ -393,7 +393,7 @@ iso_filter_files.iso_file <- function(iso_files, ..., quiet = default(quiet)) { #' @export iso_filter_files.iso_file_list <- function(iso_files, ..., quiet = default(quiet)) { # filter iso_files by file info - file_info <- iso_get_file_info(iso_files, quiet = TRUE) %>% dplyr::filter(...) + file_info <- iso_get_file_info(iso_files, quiet = TRUE) |> dplyr::filter(...) filtered_iso_files <- if (nrow(file_info) == 0) NULL else iso_files[names(iso_files) %in% file_info$file_id] @@ -401,7 +401,7 @@ iso_filter_files.iso_file_list <- function(iso_files, ..., quiet = default(quiet # information if (!quiet) { str_interp("Info: applying file filter, keeping $[d]{n} of $[d]{n_all} files", - list(n = length(filtered_iso_files), n_all = length(iso_files))) %>% message() + list(n = length(filtered_iso_files), n_all = length(iso_files))) |> message() } return(filtered_iso_files) @@ -447,24 +447,24 @@ iso_mutate_file_info.iso_file_list <- function(iso_files, ..., quiet = default(q # information if (!quiet) { - glue::glue("Info: mutating file info for {length(iso_files)} data file(s)") %>% + glue::glue("Info: mutating file info for {length(iso_files)} data file(s)") |> message() } # mutate iso_files' file info file_info <- - iso_get_file_info(iso_files, quiet = TRUE) %>% + iso_get_file_info(iso_files, quiet = TRUE) |> dplyr::mutate(...) # convert back to list format file_info <- - file_info %>% - ensure_data_frame_list_columns() %>% + file_info |> + ensure_data_frame_list_columns() |> split(seq(nrow(file_info))) # mutate mutated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) |> iso_as_file_list() # return @@ -511,7 +511,7 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL glue::glue( "Info: setting file root for {length(iso_files)} data file(s)", if(length(root) == 1) {" to '{root}'"} else {""}, - if(!is.null(remove_embedded_root)) {" and removing embedded root '{remove_embedded_root}'"} else {""}) %>% + if(!is.null(remove_embedded_root)) {" and removing embedded root '{remove_embedded_root}'"} else {""}) |> message() } @@ -520,8 +520,8 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL embedded_root_simplified <- iso_shorten_relative_paths(remove_embedded_root)$path original_paths <- map_chr(iso_files, ~.x$file_info$file_path) paths <- - original_paths %>% - iso_root_paths(root = embedded_root_simplified, check_existence = FALSE) %>% + original_paths |> + iso_root_paths(root = embedded_root_simplified, check_existence = FALSE) |> mutate(original_path = !!original_paths) no_match_paths <- filter(paths, root != !!embedded_root_simplified) @@ -530,11 +530,11 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL "%d/%d file paths do not include the embedded root. The following paths could NOT be simplified:\n - %s", nrow(no_match_paths), nrow(paths), paste(no_match_paths$original_path, collapse = "\n - ") - ) %>% warning(immediate. = TRUE, call. = FALSE) + ) |> warning(immediate. = TRUE, call. = FALSE) } # file info updates - paths <- paths %>% mutate( + paths <- paths |> mutate( path = ifelse(.data$root == !!embedded_root_simplified, .data$path, .data$original_path), root = !!root ) @@ -548,8 +548,8 @@ iso_set_file_root <- function(iso_files, root = ".", remove_embedded_root = NULL } # update - iso_files <- as.list(iso_files) %>% - modifyList(file_info_update) %>% + iso_files <- as.list(iso_files) |> + modifyList(file_info_update) |> iso_as_file_list() # return single (if passed in as single) @@ -621,13 +621,13 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = names(file_info)[tidyselect::eval_select(rlang::enexpr(datetime), file_info)], text = names(file_info)[tidyselect::eval_select(rlang::enexpr(text), file_info)] - ) %>% - tibble::enframe(name = "parse", value = "column") %>% - tidyr::unnest(.data$column) %>% + ) |> + tibble::enframe(name = "parse", value = "column") |> + tidyr::unnest("column") |> # find out number of casts per column - group_by(.data$column) %>% mutate(n = n()) %>% ungroup() %>% + group_by(.data$column) |> mutate(n = n()) |> ungroup() |> # get column info - left_join(classes, by = "parse") %>% + left_join(classes, by = "parse") |> mutate( old_class = map_chr(.data$column, ~class(file_info[[.x]])[1]), already_cast = .data$new_class == .data$old_class, @@ -637,69 +637,67 @@ iso_parse_file_info.iso_file_list <- function(iso_files, number = c(), double = # check on multi-casts if (any(vars$n > 1)) { probs <- - vars %>% filter(.data$n > 1) %>% group_by(.data$column) %>% - summarize(convert_to = paste(unique(.data$parse), collapse = ", ")) %>% + vars |> filter(.data$n > 1) |> group_by(.data$column) |> + summarize(convert_to = paste(unique(.data$parse), collapse = ", ")) |> mutate(label = sprintf(" - '%s' to %s", .data$column, .data$convert_to)) glue::glue("cannot convert the same column(s) to multiple formats:\n", - "{paste(probs$label, collapse = '\n')}") %>% + "{paste(probs$label, collapse = '\n')}") |> stop(call. = FALSE) } # information if (!quiet) { info <- - vars %>% filter(!.data$problem, !.data$already_cast) %>% - group_by(.data$parse) %>% - summarize(convert = paste(unique(.data$column), collapse = "', '")) %>% + vars |> filter(!.data$problem, !.data$already_cast) |> + group_by(.data$parse) |> + summarize(convert = paste(unique(.data$column), collapse = "', '")) |> mutate(label = sprintf(" - to %s: '%s'", .data$parse, .data$convert)) - already <- filter(vars, .data$already_cast)$column %>% - { if(length(.) > 0) - sprintf("\n - already the target data type (and thus ignored): '%s'", - paste(., collapse = "', '")) - else "" - } + + already_cols <- filter(vars, .data$already_cast)$column + already <- "" + if(length(already_cols) > 0) + already <- sprintf( + "\n - already the target data type (and thus ignored): '%s'", + paste(already_cols, collapse = "', '")) + glue::glue( "Info: parsing {nrow(filter(vars, !.data$problem, !.data$already_cast))} ", "file info columns for {length(iso_files)} data file(s)", if (nrow(info) > 0) ":\n{paste(info$label, collapse = '\n')}" else "", - "{already}") %>% + "{already}") |> message() } # check on conversion problems if (any(vars$problem)) { probs <- - vars %>% filter(.data$problem) %>% + vars |> filter(.data$problem) |> mutate(label = sprintf(" - cannot convert '%s' from %s to %s", .data$column, .data$old_class, .data$parse)) glue::glue( "missing automatic parsers for the following type conversions ", - "(columns are ignored):\n{paste(probs$label, collapse = '\n')}") %>% + "(columns are ignored):\n{paste(probs$label, collapse = '\n')}") |> warning(immediate. = TRUE, call. = FALSE) } # cast - mutate_quos <- - vars %>% filter(!.data$problem, !.data$already_cast) %>% - # note for RMD check, since this is a with statement, does not take .data! - { - rlang::set_names( - purrr::map2(.$column, .$func, ~quo((!!.y)(!!sym(.x)))), - .$column - ) - } - + mutate_quos <- vars |> filter(!.data$problem, !.data$already_cast) + mutate_quos <- rlang::set_names( + purrr::map2(mutate_quos$column, mutate_quos$func, ~quo((!!.y)(!!sym(.x)))), + mutate_quos$column + ) + # mutate file info file_info <- - file_info %>% - mutate(!!!mutate_quos) %>% - ensure_data_frame_list_columns() %>% + file_info |> + mutate(!!!mutate_quos) |> + ensure_data_frame_list_columns() |> split(seq(nrow(file_info))) # mutate mutated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) |> iso_as_file_list() # return @@ -726,7 +724,7 @@ iso_add_file_info.iso_file_list <- function(iso_files, new_file_info, ..., quiet # add to iso_files' file_info file_info <- - iso_get_file_info(iso_files, quiet = TRUE) %>% + iso_get_file_info(iso_files, quiet = TRUE) |> iso_add_file_info(new_file_info = new_file_info, ..., quiet = quiet) # safety check @@ -736,13 +734,13 @@ iso_add_file_info.iso_file_list <- function(iso_files, new_file_info, ..., quiet # convert back to list format file_info <- - file_info %>% - ensure_data_frame_list_columns() %>% + file_info |> + ensure_data_frame_list_columns() |> split(seq(nrow(file_info))) # mutate updated_iso_files <- - map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) %>% + map2(iso_files, file_info, ~{ .x$file_info <- .y; .x }) |> iso_as_file_list() return(updated_iso_files) @@ -768,13 +766,13 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default glue::glue( "Info: adding new file information ('{paste(new_cols, collapse = \"', '\")}') ", "to {n_data_files} data file(s), ", - "joining by '{purrr::map_chr(join_bys, paste, collapse = \"'+'\") %>% paste(collapse = \"' then '\")}'...") %>% + "joining by '{purrr::map_chr(join_bys, paste, collapse = \"'+'\") |> paste(collapse = \"' then '\")}'...") |> message() } # additional safety checks if (length(new_cols) == 0) { - glue::glue("no new information columns that don't already exist, returning data unchanged") %>% + glue::glue("no new information columns that don't already exist, returning data unchanged") |> warning(immediate. = TRUE, call. = FALSE) return(df) } @@ -785,7 +783,7 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default "all join_by (...) columns must exist in both the existing file ", "information and the new_file_info", if(length(missing_df) > 0) "\n - missing in existing file info: '{paste(missing_df, collapse = \"', '\")}'" else "", - if (length(missing_new_fi) > 0) "\n - missing in new file info: '{paste(missing_new_fi, collapse = \"', '\")}'" else "") %>% + if (length(missing_new_fi) > 0) "\n - missing in new file info: '{paste(missing_new_fi, collapse = \"', '\")}'" else "") |> stop(call. = FALSE) } @@ -797,24 +795,28 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default ) new_data_rows <- - join_by_cols %>% - unnest(.data$join_by_col) %>% + join_by_cols |> + unnest("join_by_col") |> mutate( new_data_idx = map(.data$join_by_col, ~which(!is.na(new_file_info[[.x]]) & nchar(as.character(new_file_info[[.x]])) > 0)) - ) %>% - group_by(.data$..priority) %>% - summarize(new_data_idx = list(Reduce(intersect, .data$new_data_idx))) %>% + ) |> + group_by(.data$..priority) |> + summarize(new_data_idx = list(Reduce(intersect, .data$new_data_idx))) |> right_join(join_by_cols, by = "..priority") # prep for joins - shared_cols <- intersect(names(new_file_info), names(df)) %>% { rlang::set_names(., paste0("..ni_temp_", .)) } + shared_cols <- intersect(names(new_file_info), names(df)) + shared_cols <- rlang::set_names(shared_cols, paste0("..ni_temp_", shared_cols)) + df <- mutate(df, ..df_id = dplyr::row_number()) new_file_info <- mutate(new_file_info, ..ni_id = dplyr::row_number()) # join new file info based on the join by and new row indices join_new_file_info <- function(join_by, new_rows, shared_cols) { if (length(join_by) > 0 && length(new_rows) > 0) { - dplyr::inner_join(df, rename(new_file_info[new_rows, ], !!!shared_cols), by = join_by) + # allow many to many here -- duplicates are caught later + # consider catching the issue here instead for speed + dplyr::inner_join(df, rename(new_file_info[new_rows, ], dplyr::all_of(shared_cols)), by = join_by, relationship = "many-to-many") } else { tibble() } @@ -822,7 +824,7 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default # generate joined data joined_data <- - new_data_rows %>% + new_data_rows |> mutate( n_ni_considered = map_int(.data$new_data_idx, length), shared_cols = map(.data$join_by_col, ~shared_cols[!shared_cols %in% .x]), @@ -833,27 +835,27 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default # select data based on priority final_data <- - joined_data %>% - select(.data$..priority, .data$data) %>% + joined_data |> + select("..priority", "data") |> # avoid problems with the temp columns during unnest - mutate(data = map(.data$data, ~select(.x, -starts_with("..ni_temp_")))) %>% - unnest(.data$data) %>% - select(-starts_with("..ni_temp_")) %>% - group_by(.data$..df_id) %>% - filter(.data$..priority == max(.data$..priority)) %>% + mutate(data = map(.data$data, ~select(.x, -starts_with("..ni_temp_")))) |> + unnest("data") |> + dplyr::select(-dplyr::starts_with("..ni_temp_")) |> + group_by(.data$..df_id) |> + filter(.data$..priority == max(.data$..priority)) |> ungroup() # make sure all data is present (even those not matched by any join) - final_data <- final_data %>% - vctrs::vec_rbind(filter(df, !.data$..df_id %in% final_data$..df_id)) %>% + final_data <- final_data |> + vctrs::vec_rbind(filter(df, !.data$..df_id %in% final_data$..df_id)) |> arrange(.data$..df_id) # safety checks - dup_data <- final_data %>% group_by(.data$..df_id) %>% mutate(n = n()) %>% filter(.data$n > 1L) + dup_data <- final_data |> group_by(.data$..df_id) |> mutate(n = n()) |> filter(.data$n > 1L) if (nrow(dup_data) > 0) { - error_data <- dup_data %>% - left_join(joined_data, by = "..priority") %>% - group_by(.data$..priority) %>% + error_data <- dup_data |> + left_join(joined_data, by = "..priority") |> + group_by(.data$..priority) |> summarize( label = sprintf( "'%s' join: %d/%d new info rows match %d/%d data files but would lead to the duplication of %d data files.", @@ -868,18 +870,18 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default glue::glue( "join operation(s) would create duplicate entries:\n - ", - "{paste(error_data$label, collapse = '\n - ')}") %>% + "{paste(error_data$label, collapse = '\n - ')}") |> stop(call. = FALSE) } # info summary info_sum <- - final_data %>% group_by(.data$..priority) %>% + final_data |> group_by(.data$..priority) |> summarize( n_ni_actual = length(unique(.data$..ni_id)), n_df_actual = length(unique(.data$file_id)) - ) %>% - right_join(joined_data, by = "..priority") %>% + ) |> + right_join(joined_data, by = "..priority") |> mutate( n_ni_actual = ifelse(is.na(.data$n_ni_actual), 0L, .data$n_ni_actual), n_df_actual = ifelse(is.na(.data$n_df_actual), 0L, .data$n_df_actual), @@ -907,7 +909,7 @@ iso_add_file_info.data.frame <- function(df, new_file_info, ..., quiet = default message(" - ", paste(info_sum$label, collapse = "\n - ")) } - return(select(final_data, -.data$..df_id, -.data$..ni_id, -.data$..priority)) + return(select(final_data, -"..df_id", -"..ni_id", -"..priority")) } # check doesn't work unless it's at the beginning diff --git a/R/isodata_structures.R b/R/isodata_structures.R index e65abfd4..515f0e0f 100644 --- a/R/isodata_structures.R +++ b/R/isodata_structures.R @@ -22,7 +22,7 @@ make_iso_file_data_structure <- function(file_id = NA_character_) { raw_data = tibble::tibble() # all mass data ), class = c("iso_file") - ) %>% + ) |> initialize_problems_attribute() } @@ -70,20 +70,20 @@ get_last_structure_update_version <- function() { # get version for all objects get_iso_object_versions <- function(iso_obj) { - iso_obj %>% iso_as_file_list() %>% + iso_obj |> iso_as_file_list() |> purrr::map(~if (!is.null(.x$version)) { .x$version } else { as.package_version("0.0.0") }) } # get outdated boolean vector get_iso_object_outdated <- function(iso_obj) { - iso_obj %>% - get_iso_object_versions() %>% + iso_obj |> + get_iso_object_versions() |> purrr::map_lgl(~.x < get_last_structure_update_version()) } # test whether an iso object structure is outdated is_iso_object_outdated <- function(iso_obj) { - iso_obj %>% get_iso_object_outdated() %>% any() + iso_obj |> get_iso_object_outdated() |> any() } @@ -163,17 +163,18 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { if (length(iso_objs) == 0) { # empty list iso_list <- list() - all_problems <- get_problems_structure() %>% mutate(file_id = character()) %>% select(.data$file_id, everything()) + all_problems <- get_problems_structure() |> mutate(file_id = character()) |> + select("file_id", dplyr::everything()) } else { # check if everything is an iso object if(!all(is_iso <- map_lgl(iso_objs, iso_is_object))) { stop("can only process iso_file and iso_file_list objects, encountered incompatible data type(s): ", - unlist(lapply(iso_objs[!is_iso], class)) %>% unique() %>% str_c(collapse = ", "), + unlist(lapply(iso_objs[!is_iso], class)) |> unique() |> str_c(collapse = ", "), call. = FALSE) } # flatten isofiles and isofile lists to make one big isofile list - iso_list <- map(iso_objs, ~if(iso_is_file_list(.x)) { .x } else { list(.x) }) %>% unlist(recursive = FALSE) + iso_list <- map(iso_objs, ~if(iso_is_file_list(.x)) { .x } else { list(.x) }) |> unlist(recursive = FALSE) # reset file ids file_ids <- map_chr(iso_list, ~.x$file_info$file_id) @@ -185,8 +186,8 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { # check if al elements are the same data type classes <- map_chr(iso_list, ~class(.x)[1]) if (!all(classes == classes[1])) { - wrong_dt <- classes[classes != classes[1]] %>% unique %>% collapse(", ") - glue("can only process iso_file objects with the same data type (first: {classes[1]}), encountered: {wrong_dt}") %>% + wrong_dt <- classes[classes != classes[1]] |> unique() |> collapse(", ") + glue("can only process iso_file objects with the same data type (first: {classes[1]}), encountered: {wrong_dt}") |> stop(call. = FALSE) } list_classes <- c(paste0(classes[1], "_list"), list_classes) @@ -196,10 +197,10 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { tibble( idx = 1:length(iso_list), file_id = names(iso_list) - ) %>% - group_by(.data$file_id) %>% - mutate(n = 1:n(), has_duplicates = any(n > 1)) %>% - ungroup() %>% + ) |> + group_by(.data$file_id) |> + mutate(n = 1:n(), has_duplicates = any(n > 1)) |> + ungroup() |> filter(has_duplicates) # process duplicates @@ -229,8 +230,9 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { } # propagate problems - all_problems <- map(iso_list, ~get_problems(.x) %>% mutate(file_id = .x$file_info$file_id)) %>% - bind_rows() %>% dplyr::select(.data$file_id, everything()) + all_problems <- map(iso_list, ~get_problems(.x) |> mutate(file_id = .x$file_info$file_id)) |> + bind_rows() |> + dplyr::select("file_id", dplyr::everything()) } # problems @@ -243,7 +245,7 @@ iso_as_file_list <- function(..., discard_duplicates = TRUE) { structure( iso_list, class = unique(list_classes) - ) %>% set_problems(all_problems) + ) |> set_problems(all_problems) } @@ -260,10 +262,13 @@ print.iso_file_list <- function(x, ...) { # what type of iso files if (length(x) == 0) data_type <- "unknown" - else data_type <- class(x[[1]]) %>% { .[.!="iso_file"][1] } %>% str_replace("_", " ") + else { + data_types <- class(x[[1]]) + data_type <- data_types[data_types != "iso_file"][1] |> str_replace("_", " ") + } # print summary - glue("Data from {length(x)} {data_type} iso files:") %>% cat("\n") + glue("Data from {length(x)} {data_type} iso files:") |> cat("\n") print(iso_get_data_summary(x, quiet = TRUE)) if (n_problems(x) > 0) { @@ -278,9 +283,12 @@ print.iso_file_list <- function(x, ...) { #' @rdname iso_printing #' @export print.iso_file <- function(x, ..., show_problems = TRUE) { - data_type <- class(x) %>% { .[.!="iso_file"][1] } %>% str_to_title() %>% str_replace("_", " ") + + data_types <- class(x) + data_type <- data_types[data_types != "iso_file"][1] |> str_to_title() |> str_replace("_", " ") + if (is.na(data_type)) data_type <- "Iso" - glue("{data_type} iso file '{x$file_info$file_id}': {get_raw_data_info(x)$raw_data}") %>% cat("\n") + glue("{data_type} iso file '{x$file_info$file_id}': {get_raw_data_info(x)$raw_data}") |> cat("\n") if (show_problems && n_problems(x) > 0) { cat("Problems:\n") print(iso_get_problems(x), ...) @@ -344,7 +352,7 @@ get_ds_file_path <- function(ds, include_root = TRUE) { update_read_options <- function(ds, read_options) { # remove read_ prefix in function parameters if(!is.list(read_options)) read_options <- as.list(read_options) - names(read_options) <- names(read_options) %>% str_replace("^read_", "") + names(read_options) <- names(read_options) |> str_replace("^read_", "") update <- read_options[names(read_options) %in% names(ds$read_options)] # update all that exist in the read options ds$read_options <- modifyList(ds$read_options, update) @@ -377,7 +385,7 @@ set_ds_file_size <- function(ds) { # make sure file size is at the proper position if it is introduced for the first time if (!col_exists) { - ds$file_info <- dplyr::select(ds$file_info, starts_with("file_"), everything()) + ds$file_info <- dplyr::select(ds$file_info, dplyr::starts_with("file_"), everything()) } return(ds) } diff --git a/R/isoread.R b/R/isoread.R index 62b10b89..f729e3b0 100644 --- a/R/isoread.R +++ b/R/isoread.R @@ -53,7 +53,7 @@ register_file_reader <- function(type, call, extension, func, description, softw if (length(env) > 1) glue::glue("function '{func}' exists in more than one environment ", "({paste(env, collapse = ', ')})", - ", please specify parameter 'env' to clarify") %>% + ", please specify parameter 'env' to clarify") |> stop(call. = FALSE) frs <- default("file_readers", allow_null = TRUE) @@ -77,12 +77,12 @@ register_file_reader <- function(type, call, extension, func, description, softw # already exists but don't overwrite --> error glue::glue( "file reader for extension '{extension}' already exists, specify overwrite = TRUE to replace the existing file reader" - ) %>% + ) |> stop(call. = FALSE) } # already exists and will be overwritten - glue::glue("file reader for extension '{extension}' already exists and will be overwritten") %>% + glue::glue("file reader for extension '{extension}' already exists and will be overwritten") |> warning(immediate. = TRUE, call. = FALSE) frs <- dplyr::filter(frs, extension != !!extension) } @@ -93,7 +93,8 @@ register_file_reader <- function(type, call, extension, func, description, softw # convenience function to find packages where function is located find_func <- function(func) { if (!is.character(func)) stop("please provide the function name rather than the function itself", call. = FALSE) - methods::findFunction(func) %>% map_chr(environmentName) %>% str_replace("^package:", "") %>% { .[!str_detect(., "^imports:")] } %>% unique() + funcs <- methods::findFunction(func) |> map_chr(environmentName) |> str_replace("^package:", "") + return(funcs[!str_detect(funcs, "^imports:")] |> unique()) } #' Supported file types @@ -103,40 +104,28 @@ find_func <- function(func) { #' @family file_types #' @export iso_get_supported_file_types <- function() { - default("file_readers") %>% - dplyr::select(.data$type, .data$extension, .data$software, .data$description, .data$call) %>% + default("file_readers") |> + dplyr::select("type", "extension", "software", "description", "call") |> dplyr::arrange(.data$type, .data$extension) } get_supported_di_files <- function() { - dplyr::filter(default("file_readers"), .data$type == "dual inlet") %>% + dplyr::filter(default("file_readers"), .data$type == "dual inlet") |> dplyr::arrange(.data$extension) } get_supported_cf_files <- function() { - dplyr::filter(default("file_readers"), .data$type == "continuous flow") %>% + dplyr::filter(default("file_readers"), .data$type == "continuous flow") |> dplyr::arrange(.data$extension) } get_supported_scan_files <- function() { - dplyr::filter(default("file_readers"), .data$type == "scan") %>% + dplyr::filter(default("file_readers"), .data$type == "scan") |> dplyr::arrange(.data$extension) } # file reading =========== -#' Read isotope data file -#' -#' This function from the original isoread package is deprecated, please use \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan} instead. -#' -#' @param ... original isoread parameters -#' @export -isoread <- function(...) { - stop( - "Deprecated, use iso_read_dual_inlet(), iso_read_continuous_flow() or iso_read_scan() instead.", - call. = FALSE) -} - #' Load dual inlet data #' #' @inheritParams iso_read_files @@ -326,7 +315,7 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, glue::glue( "{parallel_cores} cores were requested for parallel processing ", "but only {available_cores} are available" - ) %>% warning(immediate. = TRUE, call. = FALSE) + ) |> warning(immediate. = TRUE, call. = FALSE) } cores <- min(parallel_cores, available_cores) oplan <- plan(parallel_plan) @@ -367,22 +356,22 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, "preparing to read {nrow(filepaths)} data files", if (cache) { " (all will be cached)" } else {""}, if (parallel) { ", setting up {min(cores, nrow(filepaths))} parallel processes..." } - else {"..."}) %>% + else {"..."}) |> log_message() } # generate read files overview files <- - filepaths %>% + filepaths |> mutate( file_n = 1:n(), files_n = n(), cachepath = generate_cache_filepaths(file.path(.data$root, .data$path)), process = if(!parallel) NA_integer_ else ((.data$file_n - 1) %% cores) + 1L, reader_options = list(!!reader_options) - ) %>% + ) |> # merge in supported extensions with reader and cacheable info - match_to_supported_file_types(supported_extensions) %>% + match_to_supported_file_types(supported_extensions) |> # make cache read/write decisions mutate( read_from_cache = read_cache & .data$cacheable & file.exists(.data$cachepath), @@ -405,8 +394,8 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # setup up processes set_temp("parallel_process", NA_integer_) # mark the main process processes <- - files %>% - nest(data = c(-.data$process)) %>% + files |> + nest(data = c(-"process")) |> mutate( result = purrr::map2( .data$process, @@ -420,12 +409,12 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # evaluate result for sequential vs. parallel processing if (!parallel) { # sequential - iso_files <- processes$result %>% unlist(recursive = FALSE) + iso_files <- processes$result |> unlist(recursive = FALSE) } else { # parallel monitor_parallel_logs(processes) cleanup_parallel_logs() - iso_files <- processes$result %>% lapply(future::value) %>% + iso_files <- processes$result |> lapply(future::value) |> unlist(recursive = FALSE) } @@ -438,7 +427,7 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, sprintf( "finished reading %s files in %.2f %s", nrow(files), as.numeric(end_time - start_time), - attr(end_time - start_time, "units")) %>% + attr(end_time - start_time, "units")) |> log_message() } @@ -450,7 +439,7 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, fixed("outdated version of the isoreader package")))) { glue::glue( "some files were read from outdated cache files. They were checked for compatibility and should work without issues. However, to avoid this warning and improve read spead, please call iso_reread_outdated_files() on your collection of iso files to refresh outdated cache files." - ) %>% + ) |> warning(immediate. = TRUE, call. = FALSE) } ## unix file creation date @@ -459,7 +448,7 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, fixed("file creation date cannot be accessed on this Linux system")))) { glue::glue( "file creation date could not be accessed for all files because this information is not available on some Linux systems, reporting last modified time for file_datetime instead. To turn these warnings off, call iso_turn_datetime_warnings_off() and reread these files with iso_reread_all_files()." - ) %>% + ) |> warning(immediate. = TRUE, call. = FALSE) } @@ -468,10 +457,10 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, # bring files into the correct order after potential parallel processing jumble indices <- - tibble(path = purrr::map_chr(iso_files, ~.x$file_info$file_path) %>% unname(), idx = 1:length(.data$path)) %>% - dplyr::left_join(files, by = "path") %>% - dplyr::arrange(.data$file_n) %>% - dplyr::pull(.data$idx) %>% + tibble(path = purrr::map_chr(iso_files, ~.x$file_info$file_path) |> unname(), idx = 1:length(.data$path)) |> + dplyr::left_join(files, by = "path") |> + dplyr::arrange(.data$file_n) |> + dplyr::pull(.data$idx) |> unique() iso_files <- iso_files[indices] @@ -488,13 +477,13 @@ iso_read_files <- function(paths, root, supported_extensions, data_structure, create_read_process <- function(process, data_structure, files) { # specify relevant files columns to match read_iso_file parameters - files <- files %>% + files <- files |> select( - .data$root, .data$path, .data$file_n, .data$files_n, - .data$read_from_cache, .data$reread_outdated_cache, - .data$write_to_cache, .data$cachepath, - .data$post_read_check, ext = .data$extension, - reader_fun = .data$func, reader_options = .data$reader_options, reader_fun_env = .data$env + "root", "path", "file_n", "files_n", + "read_from_cache", "reread_outdated_cache", + "write_to_cache", "cachepath", + "post_read_check", ext = "extension", + reader_fun = "func", reader_options = "reader_options", reader_fun_env = "env" ) # parallel @@ -502,9 +491,14 @@ create_read_process <- function(process, data_structure, files) { # session options all_opts <- get_all_options() # find required global functions and packages from the used readers - func_globals <- filter(files, .data$reader_fun_env == "R_GlobalEnv")$reader_fun %>% - unique() %>% { rlang::set_names(purrr::map(., ~rlang::eval_tidy(rlang::sym(.x))), .) } - packages <- c("isoreader", "purrr", filter(files, .data$reader_fun_env != "R_GlobalEnv")$reader_fun_env) %>% unique() + func_globals <- filter(files, .data$reader_fun_env == "R_GlobalEnv")$reader_fun |> + unique() + func_globals <- rlang::set_names( + purrr::map(func_globals, ~rlang::eval_tidy(rlang::sym(.x))), + func_globals + ) + + packages <- c("isoreader", "purrr", filter(files, .data$reader_fun_env != "R_GlobalEnv")$reader_fun_env) |> unique() log_file <- get_temp("parallel_log_file") progress_file <- get_temp("parallel_progress_file") # parallel via futures @@ -615,13 +609,13 @@ read_iso_file <- function( ) # warning and compatibility checks iso_file <- - iso_file %>% + iso_file |> # warning register_warning( details = outdated_message(iso_file), func = "read_iso_file", warn = FALSE - ) %>% + ) |> # compatibility ensure_iso_file_backwards_compatibility() } @@ -677,9 +671,8 @@ read_iso_file <- function( } - # cleanup any binary and source content depending on debug setting + # cleanup source content depending on debug setting if (!default(debug)) { - iso_file$binary <- NULL # @FIXME: binary should be renamed to source throughout iso_file$source <- NULL iso_file$temp <- NULL } @@ -722,7 +715,7 @@ ensure_file_info_list_columns <- function(iso_files) { iso_files <- map(iso_files, ~{ .x$file_info <- ensure_data_frame_list_columns(.x$file_info, exclude = standard_fields) .x - }) %>% iso_as_file_list() + }) |> iso_as_file_list() } else { iso_files$file_info <- ensure_data_frame_list_columns(iso_files$file_info, exclude = standard_fields) } @@ -750,7 +743,7 @@ ensure_iso_file_backwards_compatibility <- function(iso_files) { # check list vs. single file if (iso_is_file_list(iso_files)) { - iso_files <- map(iso_files, ensure_compatibility) %>% iso_as_file_list() + iso_files <- map(iso_files, ensure_compatibility) |> iso_as_file_list() } else { iso_files <- ensure_compatibility(iso_files) } @@ -795,8 +788,8 @@ reread_iso_files <- function( all_files <- names(iso_files) old_files <- all_files[get_iso_object_outdated(iso_files)] trouble_files <- problems(iso_files) - error_files <- dplyr::filter(trouble_files, .data$type == "error") %>% dplyr::pull(.data$file_id) - warning_files <- dplyr::filter(trouble_files, .data$type == "warning") %>% dplyr::pull(.data$file_id) + error_files <- dplyr::filter(trouble_files, .data$type == "error") |> dplyr::pull(.data$file_id) + warning_files <- dplyr::filter(trouble_files, .data$type == "warning") |> dplyr::pull(.data$file_id) good_files <- setdiff(all_files, c(error_files, warning_files)) reread_file_ids <- c() if (reread_files_without_problems) reread_file_ids <- c(reread_file_ids, good_files) @@ -809,21 +802,22 @@ reread_iso_files <- function( file_paths <- tibble( file_id = reread_file_ids, - file_root = iso_files[reread_file_ids] %>% map_chr(get_ds_file_root) %>% as.character(), - file_path = iso_files[reread_file_ids] %>% map_chr(get_ds_file_path, include_root = FALSE) %>% as.character(), - file_exists = file.path(.data$file_root, .data$file_path) %>% map_lgl(file.exists) + file_root = iso_files[reread_file_ids] |> map_chr(get_ds_file_root) |> as.character(), + file_path = iso_files[reread_file_ids] |> map_chr(get_ds_file_path, include_root = FALSE) |> as.character(), + file_exists = file.path(.data$file_root, .data$file_path) |> map_lgl(file.exists) ) # safety check for non existent data files if (!all(file_paths$file_exists)) { msg <- # 'unique' paths to account for IARC type multi-file re-reads - file_paths %>% select(-.data$file_id) %>% filter(!.data$file_exists) %>% unique() %>% - { - sprintf( - "%d file(s) do not exist at their referenced location and can not be re-read. Consider setting a new root directory with iso_set_file_root() first:\n - %s\n", - length(.$file_exists), paste(sprintf("'%s' in root '%s'", .$file_path, .$file_root), collapse = "\n - ")) - } + file_paths |> select(-"file_id") |> filter(!.data$file_exists) |> unique() + + msg <- + sprintf( + "%d file(s) do not exist at their referenced location and can not be re-read. Consider setting a new root directory with iso_set_file_root() first:\n - %s\n", + length(msg$file_exists), paste(sprintf("'%s' in root '%s'", msg$file_path, msg$file_root), collapse = "\n - ")) + if (stop_if_missing) { stop(msg, call. = FALSE) } else { @@ -841,9 +835,9 @@ reread_iso_files <- function( # finalize file paths file_paths <- - file_paths %>% + file_paths |> # don't re-read non-existent - filter(.data$file_exists) %>% + filter(.data$file_exists) |> # check if has cache mutate( cachepath = generate_cache_filepaths(file.path(.data$file_root, .data$file_path)), @@ -875,14 +869,14 @@ reread_iso_files <- function( } sprintf("found %d %s%sdata file(s)%s, re-reading %d/%d%s", nrow(file_paths), changed, outdated, reread_sum, nrow(file_paths), length(all_files), - if(nrow(file_paths) > 0) { ":" } else {"."}) %>% + if(nrow(file_paths) > 0) { ":" } else {"."}) |> log_message() } # reread files if (nrow(file_paths) > 0) { # 'unique' paths to account for IARC type multi-file re-reads - reread_file_paths <- file_paths %>% select(-.data$file_id) %>% unique() + reread_file_paths <- file_paths |> select(-"file_id") |> unique() args <- c(list( paths = reread_file_paths$file_path, root = reread_file_paths$file_root, read_cache = reread_only_outdated_files, @@ -943,14 +937,14 @@ iso_reread_files <- function(iso_files, ...) { #'saved_files_path <- "saved_isofile.scan.rds" #' #'# create saved collection -#'iso_get_reader_examples_folder() %>% -#' iso_read_scan() %>% +#'iso_get_reader_examples_folder() |> +#' iso_read_scan() |> #' iso_save(saved_files_path) #' #'# load collection -#'iso_read_scan(saved_files_path) %>% +#'iso_read_scan(saved_files_path) |> #' # reread outdated files (alternatively "_all_" or "_changed_") -#' iso_reread_outdated_files() %>% +#' iso_reread_outdated_files() |> #' # re-save collection to its original location #' iso_save(saved_files_path) #' @@ -1054,12 +1048,12 @@ iso_reread_archive <- function(...) { generate_cache_filepaths <- function(filepaths) { # generate cache filepaths - file.info(filepaths) %>% - tibble::rownames_to_column(var = "filepath") %>% + file.info(filepaths) |> + tibble::rownames_to_column(var = "filepath") |> dplyr::mutate( cache_file = sprintf("%s_%s_%.0f.rds", basename(.data$filepath), format(.data$mtime, "%Y%m%d%H%M%S"), .data$size), cache_filepath = file.path(default("cache_dir"), .data$cache_file) - ) %>% + ) |> dplyr::pull(.data$cache_filepath) } @@ -1079,7 +1073,7 @@ load_cached_iso_file <- function(filepath) { # make sure object in file was loaded properly if (!(iso_is_object(iso_file))) { - sprintf("cached file '%s' does not contain iso_file(s)", basename(filepath)) %>% stop(call. = FALSE) + sprintf("cached file '%s' does not contain iso_file(s)", basename(filepath)) |> stop(call. = FALSE) } # return diff --git a/R/isoread_caf.R b/R/isoread_caf.R index d24c1df9..7b0021fe 100644 --- a/R/isoread_caf.R +++ b/R/isoread_caf.R @@ -8,7 +8,7 @@ iso_read_caf <- function(ds, options = list()) { stop("data structure must be a 'dual_inlet' iso_file", call. = FALSE) # read binary file - ds$binary <- get_ds_file_path(ds) %>% read_binary_isodat_file() + ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file() # process file info if(ds$read_options$file_info) { @@ -44,60 +44,61 @@ iso_read_caf <- function(ds, options = list()) { extract_caf_raw_voltage_data <- function(ds) { # locate masses - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify measured masses") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify measured masses") |> move_to_C_block_range("CResultData", "CEvalDataIntTransferPart") # read all masses masses_re <- re_combine(re_x_000(), re_text_x(), re_unicode("rIntensity")) masses <- tibble( - pos = find_next_patterns(ds$binary, masses_re) + masses_re$size, + pos = find_next_patterns(ds$source, masses_re) + masses_re$size, # capture cup and mass data = map(.data$pos, function(pos) { - ds$binary %>% - move_to_pos(pos) %>% - capture_data_till_pattern("cup", "text", re_text_x(), data_bytes_max = 8, move_past_dots = TRUE) %>% - move_to_next_pattern(re_unicode("rIntensity "), max_gap = 0L) %>% - capture_data_till_pattern("mass", "text", re_text_x(), data_bytes_max = 8) %>% - { dplyr::as_tibble(.$data[c("cup", "mass")]) } + capture <- + ds$source |> + move_to_pos(pos) |> + capture_data_till_pattern("cup", "text", re_text_x(), data_bytes_max = 8, move_past_dots = TRUE) |> + move_to_next_pattern(re_unicode("rIntensity "), max_gap = 0L) |> + capture_data_till_pattern("mass", "text", re_text_x(), data_bytes_max = 8) + dplyr::as_tibble(capture$data[c("cup", "mass")]) }) - ) %>% + ) |> # unnest data - unnest(.data$data) %>% + unnest("data") |> mutate( cup = as.integer(.data$cup), column = str_c("v", .data$mass, ".mV") ) # locate voltage data - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot locate voltage data") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot locate voltage data") |> move_to_C_block_range("CDualInletRawData", "CResultData") # find binary positions for voltage standards and samples standard_block_start <- find_next_pattern( - ds$binary, re_combine(re_unicode("Standard Block"), re_null(4), re_x_000())) + ds$source, re_combine(re_unicode("Standard Block"), re_null(4), re_x_000())) sample_block_start <- find_next_pattern( - ds$binary, re_combine(re_unicode("Sample Block"), re_null(4), re_x_000())) + ds$source, re_combine(re_unicode("Sample Block"), re_null(4), re_x_000())) # safety checks if (is.null(standard_block_start) || is.null(sample_block_start) || standard_block_start > sample_block_start) { - iso_source_file_op_error(ds$binary, "cannot find standard and sample voltage data blocks at expected positions") + iso_source_file_op_error(ds$source, "cannot find standard and sample voltage data blocks at expected positions") } # read voltage data - ds$binary <- set_binary_file_error_prefix(ds$binary, "cannot process voltage data") + ds$source <- set_binary_file_error_prefix(ds$source, "cannot process voltage data") # right before this sequence there is a 4 byte sequence that could be a date, the last block is the # of masses read_blocks_re <- re_combine(re_null(4), re_block("etx"), re_x_000()) - positions <- find_next_patterns(ds$binary, read_blocks_re) + positions <- find_next_patterns(ds$source, read_blocks_re) # function to capture voltages capture_voltages <- function(pos) { - bin <- ds$binary %>% - move_to_pos(pos - 4) %>% + bin <- ds$source |> + move_to_pos(pos - 4) |> capture_n_data("n_masses", "integer", n = 1) # safety check @@ -105,7 +106,7 @@ extract_caf_raw_voltage_data <- function(ds) { iso_source_file_op_error(bin, glue("inconsistent number of voltage measurements encountered ({bin$data$n_masses}), expected {nrow(masses)}")) } - bin <- bin %>% + bin <- bin |> capture_n_data("voltage", "double", n = nrow(masses), sensible = c(-1000, 100000)) # return voltage data @@ -117,28 +118,28 @@ extract_caf_raw_voltage_data <- function(ds) { pos = positions + read_blocks_re$size, # note last read in the sample block is actually the "pre"-read of the standard type = ifelse(.data$pos < sample_block_start | .data$pos==max(.data$pos), "standard", "sample") - ) %>% - group_by(.data$type) %>% + ) |> + group_by(.data$type) |> mutate( cycle = as.integer(ifelse(.data$type[1] == "standard" & .data$pos == max(.data$pos), 0L, 1L:n())), # capture voltages voltages = map(.data$pos, capture_voltages) - ) %>% ungroup() %>% + ) |> ungroup() |> # unnest voltager data - unnest(.data$voltages) %>% + unnest("voltages") |> # combine with cup/mass information - left_join(select(masses, .data$cup, .data$column), by = "cup") + left_join(select(masses, "cup", "column"), by = "cup") # safety check if (any(notok <- is.na(voltages$column))) { - iso_source_file_op_error(ds$binary, glue("inconsistent cup designations: {collapse(voltages$cup[notok], ', ')}")) + iso_source_file_op_error(ds$source, glue("inconsistent cup designations: {collapse(voltages$cup[notok], ', ')}")) } # voltages data frame ds$raw_data <- - voltages %>% - select(-.data$pos, -.data$cup) %>% - spread(.data$column, .data$voltage) %>% + voltages |> + select(-"pos", -"cup") |> + spread(.data$column, .data$voltage) |> arrange(desc(.data$type), .data$cycle) return(ds) @@ -148,11 +149,11 @@ extract_caf_raw_voltage_data <- function(ds) { extract_caf_vendor_data_table <- function(ds) { # reset navigation - ds$binary <- reset_binary_file_navigation(ds$binary) + ds$source <- reset_binary_file_navigation(ds$source) # get data table extracted_dt <- - ds %>% + ds |> # FIXME: see testing.Rmd for trying to switch to vendor_data_table2 (not possible yet) extract_isodat_main_vendor_data_table( C_block = "CResultData", cap_at_fun = NULL, @@ -163,12 +164,12 @@ extract_caf_vendor_data_table <- function(ds) { # safety check req_cols <- c("Nr.", "Is Ref.?") if (!all(ok <- req_cols %in% names(vendor_dt))) { - glue("not all required columns found, missing: {collapse(req_cols[!ok], '. ')}") %>% + glue("not all required columns found, missing: {collapse(req_cols[!ok], '. ')}") |> stop(call. = FALSE) } # divided row columns (some are in the first block, some in the second) - second_block_cols <- vendor_dt[1,] %>% map_lgl(~is.na(.x)) + second_block_cols <- vendor_dt[1,] |> map_lgl(~is.na(.x)) if (sum(second_block_cols) > 0) { # separate and merge the two blocks condition <- as.name(names(vendor_dt)[which(second_block_cols)[1]]) @@ -179,11 +180,11 @@ extract_caf_vendor_data_table <- function(ds) { } # assign data table - ds$vendor_data_table <- vendor_dt %>% - arrange(!!as.name("Nr.")) %>% - mutate(cycle = as.integer(1:n())) %>% - select(-!!as.name("Nr."), -!!as.name("Is Ref.?")) %>% - select(!!as.name("cycle"), everything()) + ds$vendor_data_table <- vendor_dt |> + arrange(!!as.name("Nr.")) |> + mutate(cycle = as.integer(1:n())) |> + select(-"Nr.", -"Is Ref.?") |> + select("cycle", dplyr::everything()) # save information on the column units attr(ds$vendor_data_table, "units") <- diff --git a/R/isoread_cf.R b/R/isoread_cf.R index bcb6a9b7..196c3c76 100644 --- a/R/isoread_cf.R +++ b/R/isoread_cf.R @@ -8,7 +8,7 @@ iso_read_cf <- function(ds, options = list()) { stop("data structure must be a 'continuous_flow' iso_file", call. = FALSE) # read binary file - ds$binary <- get_ds_file_path(ds) %>% read_binary_isodat_file() + ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file() # process file info if(ds$read_options$file_info) { @@ -53,13 +53,13 @@ iso_read_cf <- function(ds, options = list()) { # extract voltage data in cf file extract_cf_raw_voltage_data <- function(ds) { # move to beginning of intensity information (the larger block coming - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify measured masses") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify measured masses") |> # can have data in multiple positions (e.g. if peak jumping) throughout the rest of the binary move_to_C_block("CRawDataScanStorage", reset_cap = TRUE) # get trace positions - gas_positions <- ds$binary %>% + gas_positions <- ds$source |> find_next_patterns(re_text_0(), re_text_x(), re_unicode("Trace Data "), re_block("text"), re_null(4), re_block("stx")) # raw_data @@ -67,39 +67,39 @@ extract_cf_raw_voltage_data <- function(ds) { # loop through gas positions for (gas_pos in gas_positions) { - ds$binary <- ds$binary %>% - move_to_pos(gas_pos) %>% - skip_pos(30) %>% + ds$source <- ds$source |> + move_to_pos(gas_pos) |> + skip_pos(30) |> capture_data_till_pattern("gas", "text", re_null(4), re_block("stx")) - gas_config <- ds$binary$data$gas + gas_config <- ds$source$data$gas # data start data_start_re <- re_combine( re_block("stx"), re_text_0(), re_block("stx"), re_direct(".{4}", size = 4, label = ".{4}")) - ds$binary <- ds$binary %>% move_to_next_pattern(data_start_re) - data_start <- ds$binary$pos + ds$source <- ds$source |> move_to_next_pattern(data_start_re) + data_start <- ds$source$pos # find all masses at end of data data_end_re <- re_combine( re_direct(".{2}", size = 2, label = ".{2}"), re_block("stx"), re_text_0(), re_block("stx"), re_null(4)) - ds$binary <- ds$binary %>% move_to_next_pattern(data_end_re) - data_end <- ds$binary$pos - data_end_re$size + ds$source <- ds$source |> move_to_next_pattern(data_end_re) + data_end <- ds$source$pos - data_end_re$size mass_re <- re_combine(re_text_x(), re_unicode("Mass ")) - mass_positions <- ds$binary %>% - cap_at_next_pattern(re_unicode("MS/Clock")) %>% + mass_positions <- ds$source |> + cap_at_next_pattern(re_unicode("MS/Clock")) |> find_next_patterns(mass_re) masses <- c() for (pos in mass_positions) { # a bit tricky to capture but this should do the trick reliably raw_mass <- - ds$binary %>% move_to_pos(pos + mass_re$size) %>% - capture_data_till_pattern("mass", "raw", re_text_x(), ignore_trailing_zeros = FALSE) %>% - { .$data$mass } + ds$source |> move_to_pos(pos + mass_re$size) |> + capture_data_till_pattern("mass", "raw", re_text_x(), ignore_trailing_zeros = FALSE) |> + purrr::pluck("data", "mass") text_mass <- parse_raw_data(grepRaw("^([0-9]\\x00)+", raw_mass, value = TRUE), type = "text") masses <- c(masses, text_mass) } @@ -112,10 +112,10 @@ extract_cf_raw_voltage_data <- function(ds) { if (n_data_points %% 1 > 0) stop("number of data points for ", gas_config, " is not an integer (", n_data_points, ")", call. = FALSE) - ds$binary<- ds$binary %>% - move_to_pos(data_start) %>% + ds$source<- ds$source |> + move_to_pos(data_start) |> capture_n_data("voltages", c("float", rep("double", length(masses))), n_data_points) - voltages <- bind_rows(ds$binary$data$voltages %>% dplyr::as_tibble() %>% rlang::set_names(c("time.s", masses_columns))) + voltages <- bind_rows(ds$source$data$voltages |> dplyr::as_tibble() |> rlang::set_names(c("time.s", masses_columns))) # check for data if (nrow(voltages) == 0) @@ -128,9 +128,9 @@ extract_cf_raw_voltage_data <- function(ds) { # add time point column ds$raw_data <- - raw_data %>% arrange(.data$time.s) %>% - mutate(tp = 1:n()) %>% - select(.data$tp, .data$time.s, everything()) + raw_data |> arrange(.data$time.s) |> + mutate(tp = 1:n()) |> + select("tp", "time.s", everything()) return(ds) } diff --git a/R/isoread_did.R b/R/isoread_did.R index a0c3a250..a8ca3296 100644 --- a/R/isoread_did.R +++ b/R/isoread_did.R @@ -8,7 +8,7 @@ iso_read_did <- function(ds, options = list()) { stop("data structure must be a 'dual_inlet' iso_file", call. = FALSE) # read binary file - ds$binary <- get_ds_file_path(ds) %>% read_binary_isodat_file() + ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file() # process file info if(ds$read_options$file_info) { @@ -39,47 +39,47 @@ iso_read_did <- function(ds, options = list()) { extract_did_raw_voltage_data <- function(ds) { # mass information - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify measured masses") %>% - move_to_C_block("CBinary") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify measured masses") |> + move_to_C_block("CBinary") |> move_to_next_C_block_range("CTraceInfoEntry", "CPlotRange") # read all masses masses_re <- re_combine(re_text_x(), re_unicode("Mass ")) - masses_positions <- find_next_patterns(ds$binary, masses_re) + masses_positions <- find_next_patterns(ds$source, masses_re) masses <- map_chr(masses_positions, function(pos) { - ds$binary %>% - move_to_pos(pos + masses_re$size) %>% + ds$source |> + move_to_pos(pos + masses_re$size) |> capture_data_till_pattern("mass", "text", re_or(re_text_x(), re_block("C-block")), - data_bytes_max = 8, move_past_dots = FALSE) %>% - { .$data$mass } + data_bytes_max = 8, move_past_dots = FALSE) |> + purrr::pluck("data", "mass") }) # mass column formatting masses_columns <- str_c("v", masses, ".mV") # locate voltage data - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot locate voltage data") %>% - move_to_C_block_range("CDualInletRawData", "CTwoDoublesArrayData") %>% - move_to_next_C_block("CIntegrationUnitTransferPart") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot locate voltage data") |> + move_to_C_block_range("CDualInletRawData", "CTwoDoublesArrayData") |> + move_to_next_C_block("CIntegrationUnitTransferPart") |> set_binary_file_error_prefix("cannot process voltage data") # find binary positions for voltage standards and samples voltages <- list() standard_voltage_start_re <- re_combine(re_unicode("/"), re_text_x(), re_text_x(), re_unicode("Standard ")) - standard_positions <- find_next_patterns(ds$binary, standard_voltage_start_re) + standard_positions <- find_next_patterns(ds$source, standard_voltage_start_re) sample_voltage_start_re <- re_combine(re_unicode("/"), re_text_0(), re_text_x(), re_unicode("Sample ")) - sample_positions <- find_next_patterns(ds$binary, sample_voltage_start_re) + sample_positions <- find_next_patterns(ds$source, sample_voltage_start_re) # function to capture voltages capture_voltages <- function(pos) { - bin <- ds$binary %>% - move_to_pos(pos) %>% - capture_data_till_pattern("cycle", "text", re_null(4), re_block("stx"), move_past_dots = TRUE) %>% - move_to_next_pattern(re_unicode("/"), re_text_0(), re_text_0(), re_null(4), re_block("stx")) %>% - move_to_next_pattern(re_x_000(), re_x_000()) %>% + bin <- ds$source |> + move_to_pos(pos) |> + capture_data_till_pattern("cycle", "text", re_null(4), re_block("stx"), move_past_dots = TRUE) |> + move_to_next_pattern(re_unicode("/"), re_text_0(), re_text_0(), re_null(4), re_block("stx")) |> + move_to_next_pattern(re_x_000(), re_x_000()) |> capture_data_till_pattern("voltage", "double", re_null(6),re_x_000(), sensible = c(-1000, 100000)) # safety check @@ -96,16 +96,16 @@ extract_did_raw_voltage_data <- function(ds) { bind_rows( tibble(pos = standard_positions + standard_voltage_start_re$size, type = "standard"), tibble(pos = sample_positions + sample_voltage_start_re$size, type = "sample") - ) %>% + ) |> mutate( voltages = map(.data$pos, capture_voltages) - ) %>% - unnest(voltages) %>% + ) |> + unnest("voltages") |> # join in the mass information - left_join(tibble(cup = 1:length(masses), mass = masses_columns), by = "cup") %>% + left_join(tibble(cup = 1:length(masses), mass = masses_columns), by = "cup") |> # spread out the volrages - select(-.data$pos, -.data$cup) %>% - spread(.data$mass, .data$voltage) %>% + select(-"pos", -"cup") |> + spread(.data$mass, .data$voltage) |> # update cycle mutate(cycle = as.integer(ifelse(.data$cycle == "Pre", -1L, .data$cycle)) + 1L) @@ -119,14 +119,14 @@ extract_did_raw_voltage_data <- function(ds) { extract_did_vendor_data_table <- function(ds) { # find vendor data table - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process vendor computed data table") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process vendor computed data table") |> move_to_C_block_range("CDualInletEvaluatedData", "CParsedEvaluationString") # cap - if (!is.null(pos <- find_next_pattern(ds$binary, re_unicode("Gas Indices")))) { - ds$binary <- ds$binary %>% cap_at_pos(pos - 20) - } else iso_source_file_op_error(ds$binary, "cannot find data deliminter 'Gas Indices'") + if (!is.null(pos <- find_next_pattern(ds$source, re_unicode("Gas Indices")))) { + ds$source <- ds$source |> cap_at_pos(pos - 20) + } else iso_source_file_op_error(ds$source, "cannot find data deliminter 'Gas Indices'") # find data positions column_header_re <- re_combine(re_block("etx"), re_unicode("/"), re_text_x(), re_block("text"), # Delta or AT% @@ -134,45 +134,45 @@ extract_did_vendor_data_table <- function(ds) { re_null(4), re_block("stx")) column_data_re <- re_combine(re_unicode("/"), re_text_0(), re_text_x(), re_block("text"), re_null(4), re_x_000(), re_x_000()) # data comes after this - column_header_positions <- find_next_patterns(ds$binary, column_header_re) - column_data_positions <- find_next_patterns(ds$binary, column_data_re) + column_header_positions <- find_next_patterns(ds$source, column_header_re) + column_data_positions <- find_next_patterns(ds$source, column_data_re) # safety checks if (length(column_header_positions) == 0) { - iso_source_file_op_error(ds$binary, "no column headers found") + iso_source_file_op_error(ds$source, "no column headers found") } else if (length(column_header_positions) != length(column_data_positions)) { - iso_source_file_op_error(ds$binary, sprintf("unequal number of column headers (%d) and data entries (%d) found", + iso_source_file_op_error(ds$source, sprintf("unequal number of column headers (%d) and data entries (%d) found", length(column_header_positions), length(column_data_positions))) } else if (!all(column_header_positions < column_data_positions)) { - iso_source_file_op_error(ds$binary, "found column headers not interspersed with data entries") + iso_source_file_op_error(ds$source, "found column headers not interspersed with data entries") } # read the data vendor_dt <- list() for (i in 1:length(column_header_positions)) { - ds$binary <- ds$binary %>% - move_to_pos(column_header_positions[i] + 10) %>% # skip initial / at the start of header + ds$source <- ds$source |> + move_to_pos(column_header_positions[i] + 10) |> # skip initial / at the start of header # capture column type (typically Delta or AT%) # could skip this to speed up - capture_data_till_pattern("type", "text", re_text_x(), move_past_dots = TRUE) %>% + capture_data_till_pattern("type", "text", re_text_x(), move_past_dots = TRUE) |> # capture actual colum name - capture_data_till_pattern("column", "text", re_null(4), re_block("stx")) %>% + capture_data_till_pattern("column", "text", re_null(4), re_block("stx")) |> # capture column data - move_to_pos(column_data_positions[i]) %>% - move_to_next_pattern(column_data_re, max_gap = 0) %>% # move to start of data - capture_n_data("n_values", "integer", n = 1, sensible = c(1, 1000)) %>% # NOTE: this assumes more than 1000 cycles are unrealistic in dual inlet + move_to_pos(column_data_positions[i]) |> + move_to_next_pattern(column_data_re, max_gap = 0) |> # move to start of data + capture_n_data("n_values", "integer", n = 1, sensible = c(1, 1000)) |> # NOTE: this assumes more than 1000 cycles are unrealistic in dual inlet capture_data_till_pattern("values", "double", re_text_0(), re_block("stx"), sensible = c(-1e10, 1e10)) # safety check - if (length(ds$binary$data$values) != 2 * ds$binary$data$n_values) - glue::glue("inconsistent number of data entries recovered ({length(ds$binary$data$values)}) - ", - "expected {2 * ds$binary$data$n_values} values from {ds$binary$data$n_values} cycles") %>% + if (length(ds$source$data$values) != 2 * ds$source$data$n_values) + glue::glue("inconsistent number of data entries recovered ({length(ds$source$data$values)}) - ", + "expected {2 * ds$source$data$n_values} values from {ds$source$data$n_values} cycles") |> stop(call. = FALSE) table_column <- list( list( - cycle = as.integer(ds$binary$data$values[c(TRUE, FALSE)] + 1L), - value = ds$binary$data$values[c(FALSE, TRUE)] - )) %>% rlang::set_names(str_replace(ds$binary$data$column, "\\s*$", "")) # remove trailing white spaces in column names + cycle = as.integer(ds$source$data$values[c(TRUE, FALSE)] + 1L), + value = ds$source$data$values[c(FALSE, TRUE)] + )) |> rlang::set_names(str_replace(ds$source$data$column, "\\s*$", "")) # remove trailing white spaces in column names vendor_dt <- c(vendor_dt, table_column) } @@ -185,7 +185,7 @@ extract_did_vendor_data_table <- function(ds) { # vendor table ds$vendor_data_table <- bind_cols( tibble(cycle = vendor_dt[[1]][[1]]), - lapply(vendor_dt, `[[`, 2) %>% dplyr::as_tibble()) + lapply(vendor_dt, `[[`, 2) |> dplyr::as_tibble()) attr(ds$vendor_data_table, "units") <- NULL # units do not apply return(ds) } diff --git a/R/isoread_dxf.R b/R/isoread_dxf.R index cfdb5dd4..86abb913 100644 --- a/R/isoread_dxf.R +++ b/R/isoread_dxf.R @@ -8,7 +8,7 @@ iso_read_dxf <- function(ds, options = list()) { stop("data structure must be a 'continuous_flow' iso_file", call. = FALSE) # read binary file - ds$binary <- get_ds_file_path(ds) %>% read_binary_isodat_file() + ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file() # process file info if(ds$read_options$file_info) { @@ -49,31 +49,31 @@ extract_dxf_raw_voltage_data <- function(ds) { # search gas configurations # (the larger block coming afterwards is not always present so not used as max pos here) - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify measured masses") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify measured masses") |> move_to_C_block_range("CEvalDataIntTransferPart", "CBinary") # find all gas configurations configs <- list() gas_config_name_re <- re_combine(re_text_x(), re_block("alpha"), re_text_0(), re_text_x()) - config_positions <- ds$binary %>% find_next_patterns(gas_config_name_re) - config_caps <- c(config_positions[-1], ds$binary$max_pos) + config_positions <- ds$source |> find_next_patterns(gas_config_name_re) + config_caps <- c(config_positions[-1], ds$source$max_pos) if (length(config_positions) == 0) return(ds) for(i in 1:length(config_positions)) { # find name of gas configuration - ds$binary <- ds$binary %>% - move_to_pos(config_positions[i]) %>% - move_to_next_pattern(re_text_x(), max_gap = 0) %>% + ds$source <- ds$source |> + move_to_pos(config_positions[i]) |> + move_to_next_pattern(re_text_x(), max_gap = 0) |> capture_data_till_pattern("gas", "text", re_text_0(), re_text_x()) # make sure we have the right starts and caps for each configuration - if (ds$binary$data$gas %in% names(configs)) { + if (ds$source$data$gas %in% names(configs)) { # raise cap on previous - configs[[ds$binary$data$gas]]$cap <- config_caps[i] + configs[[ds$source$data$gas]]$cap <- config_caps[i] } else { # new config - configs[[ds$binary$data$gas]] <- list(pos = config_positions[i], cap = config_caps[i], masses = c()) + configs[[ds$source$data$gas]] <- list(pos = config_positions[i], cap = config_caps[i], masses = c()) } } @@ -84,45 +84,47 @@ extract_dxf_raw_voltage_data <- function(ds) { for (config in names(configs)) { if (default(debug)) log_message("processing config '", config, "' (", configs[[config]]$pos, "-", configs[[config]]$cap, ")") - ds$binary <- ds$binary %>% - move_to_pos(configs[[config]]$pos) %>% + ds$source <- ds$source |> + move_to_pos(configs[[config]]$pos) |> cap_at_pos(configs[[config]]$cap) intensity_id <- 1 - while(!is.null(find_next_pattern(ds$binary, re_unicode(str_c("rIntensity", intensity_id))))) { - ds$binary <- ds$binary %>% - move_to_next_pattern(re_unicode(str_c("rIntensity", intensity_id))) %>% - move_to_next_pattern(re_text_x(), re_unicode("rIntensity "), max_gap = 0) %>% + while(!is.null(find_next_pattern(ds$source, re_unicode(str_c("rIntensity", intensity_id))))) { + ds$source <- ds$source |> + move_to_next_pattern(re_unicode(str_c("rIntensity", intensity_id))) |> + move_to_next_pattern(re_text_x(), re_unicode("rIntensity "), max_gap = 0) |> capture_data_till_pattern("mass", "text", re_text_x(), move_past_dots = TRUE) - configs[[config]]$masses <- c(configs[[config]]$masses, ds$binary$data$mass) + configs[[config]]$masses <- c(configs[[config]]$masses, ds$source$data$mass) intensity_id <- intensity_id + 1 } } # find gas config alternative names (sometimes set, sometimes not) - ds$binary <- ds$binary %>% + ds$source <- ds$source |> move_to_C_block_range("CPeakFindParameter", "CResultArray") - smoothing_positions <- find_next_patterns(ds$binary, re_unicode("Smoothing")) + smoothing_positions <- find_next_patterns(ds$source, re_unicode("Smoothing")) gas_name_end_re <- re_combine(re_null(4), re_direct("[\x01-\xff]", label = "x01-xff")) gas_name_re <- re_combine(re_text_x(), re_block("text0"), gas_name_end_re) for (pos in smoothing_positions) { - ds$binary <- ds$binary %>% - move_to_pos(pos, reset_cap = TRUE) %>% - { cap_at_pos(., find_next_pattern(., re_unicode("Peak Center"))) } %>% - move_to_next_pattern(gas_name_re, move_to_end = FALSE) %>% - skip_pos(4) %>% # skip the fef-x at the beginning + ds$source <- ds$source |> + move_to_pos(pos, reset_cap = TRUE) + + ds$source <- ds$source |> + cap_at_pos(find_next_pattern(ds$source, re_unicode("Peak Center"))) |> + move_to_next_pattern(gas_name_re, move_to_end = FALSE) |> + skip_pos(4) |> # skip the fef-x at the beginning capture_data_till_pattern("gas_name1", "text", gas_name_end_re, data_bytes_max = 50) - gas_name1 <- ds$binary$data$gas_name1 + gas_name1 <- ds$source$data$gas_name1 # gas name 2 - next_gas_name <- find_next_pattern(ds$binary, gas_name_re) - if (!is.null(next_gas_name) && next_gas_name < ds$binary$max_pos) { - ds$binary <- ds$binary %>% - move_to_next_pattern(gas_name_re, move_to_end = FALSE) %>% - skip_pos(4) %>% # skip the fef-x at the beginning + next_gas_name <- find_next_pattern(ds$source, gas_name_re) + if (!is.null(next_gas_name) && next_gas_name < ds$source$max_pos) { + ds$source <- ds$source |> + move_to_next_pattern(gas_name_re, move_to_end = FALSE) |> + skip_pos(4) |> # skip the fef-x at the beginning capture_data_till_pattern("gas_name2", "text", gas_name_end_re, data_bytes_max = 50) - gas_name2 <- ds$binary$data$gas_name2 + gas_name2 <- ds$source$data$gas_name2 # update config with alternative name if (gas_name2 != "" && gas_name1 != gas_name2 && @@ -130,7 +132,7 @@ extract_dxf_raw_voltage_data <- function(ds) { if (default(debug)) glue::glue( "renaming config '{gas_name1}' to '{gas_name2}' ", - "(non-standard config name)") %>% + "(non-standard config name)") |> log_message() names(configs)[config_idx] <- gas_name2 } @@ -138,10 +140,10 @@ extract_dxf_raw_voltage_data <- function(ds) { } # move to beginning of original data to get voltages - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover raw voltages") %>% - move_to_C_block_range("CAllMoleculeWeights", "CMethod") %>% - move_to_next_C_block("CStringArray") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover raw voltages") |> + move_to_C_block_range("CAllMoleculeWeights", "CMethod") |> + move_to_next_C_block("CStringArray") |> move_to_next_pattern(re_unicode("OrigDataBlock"), re_null(4), re_block("stx")) # find all data sets @@ -153,31 +155,32 @@ extract_dxf_raw_voltage_data <- function(ds) { re_text_0(), re_block("stx")) gas_config_re <- re_combine(re_text_x(), re_block("text"), re_text_0()) voltages <- tibble() - positions <- find_next_patterns(ds$binary, data_start_re) + positions <- find_next_patterns(ds$source, data_start_re) for (pos in positions) { # move to beginning of data - ds$binary <- ds$binary %>% move_to_pos(pos + data_start_re$size + 4L) # 4 byte gap before data - start_pos <- ds$binary$pos + ds$source <- ds$source |> move_to_pos(pos + data_start_re$size + 4L) # 4 byte gap before data + start_pos <- ds$source$pos # find gas configuration name - gas_data_block_end <- ds$binary %>% - move_to_next_pattern(data_end_re) %>% - move_to_next_pattern(gas_config_re, move_to_end = FALSE, max_gap = 20) %>% - skip_pos(4) %>% # skip the fef-x at the beginning - capture_data_till_pattern("gas", "text", re_text_0(), data_bytes_max = 50) %>% - { list(gas = .$data$gas, pos = .$pos) } + capture <- ds$source |> + move_to_next_pattern(data_end_re) |> + move_to_next_pattern(gas_config_re, move_to_end = FALSE, max_gap = 20) |> + skip_pos(4) |> # skip the fef-x at the beginning + capture_data_till_pattern("gas", "text", re_text_0(), data_bytes_max = 50) + + gas_data_block_end <- list(gas = capture$data$gas, pos = capture$pos) gas_config <- gas_data_block_end$gas # debug message if (default(debug)) - glue::glue("processing data for '{gas_config}' ({start_pos}-{gas_data_block_end$pos})") %>% + glue::glue("processing data for '{gas_config}' ({start_pos}-{gas_data_block_end$pos})") |> log_message() # find gas configuration if (!gas_config %in% names(configs)) glue::glue("could not find gas configuration for gas '{gas_config}', ", - "available: '{paste(names(configs), collapse = \"', '\")}'") %>% + "available: '{paste(names(configs), collapse = \"', '\")}'") |> stop(call. = FALSE) # find gas configuration masses @@ -186,11 +189,11 @@ extract_dxf_raw_voltage_data <- function(ds) { masses_columns <- str_c("v", masses, ".mV") # save voltage data - ds$binary <- ds$binary %>% + ds$source <- ds$source |> capture_data_till_pattern("voltages", c("float", rep("double", length(masses))), data_end_re) voltages <- bind_rows(voltages, - ds$binary$data$voltages %>% - dplyr::as_tibble() %>% rlang::set_names(c("time.s", masses_columns))) + ds$source$data$voltages |> + dplyr::as_tibble() |> rlang::set_names(c("time.s", masses_columns))) } # check for data @@ -198,9 +201,9 @@ extract_dxf_raw_voltage_data <- function(ds) { # add time point column ds$raw_data <- - voltages %>% arrange(.data$time.s) %>% - mutate(tp = 1:n()) %>% - select(.data$tp, .data$time.s, everything()) + voltages |> arrange(.data$time.s) |> + mutate(tp = 1:n()) |> + select("tp", "time.s", everything()) return(ds) } diff --git a/R/isoread_flow_iarc.R b/R/isoread_flow_iarc.R index 5352fb44..4314d0ae 100644 --- a/R/isoread_flow_iarc.R +++ b/R/isoread_flow_iarc.R @@ -26,7 +26,9 @@ iso_read_flow_iarc <- function(ds, options = list()) { } # unzipping iarc archive ==== - folder_name <- ds$file_info$file_path %>% basename() %>% { str_replace(., fixed(get_file_ext(.)), "") } + folder_name <- ds$file_info$file_path |> basename() + folder_name <- stringr::str_remove(folder_name, fixed(get_file_ext(folder_name))) + folder_path <- file.path(tempdir(), folder_name) if (!file.exists(folder_path)) { if (!default("quiet")) log_message("unpacking isoprime archive file...", prefix = " ") @@ -56,22 +58,22 @@ iso_read_flow_iarc <- function(ds, options = list()) { } tasks <- exec_func_with_error_catch(process_iarc_tasks_xml, task_files, method_params) col_check(c("GlobalIdentifier", "Name", "Id", "ProcessingListTypeIdentifier"), - map(tasks, "info") %>% bind_rows(), msg = "iarc tasks' information insufficient") + map(tasks, "info") |> bind_rows(), msg = "iarc tasks' information insufficient") # processing lists / gas configuration ==== all_processing_lists <- - tasks %>% map("info") %>% bind_rows() %>% - group_by(.data$ProcessingListTypeIdentifier) %>% - summarize(samples = n()) %>% - ungroup() %>% + tasks |> map("info") |> bind_rows() |> + group_by(.data$ProcessingListTypeIdentifier) |> + summarize(samples = n()) |> + ungroup() |> full_join(processing_lists, by = c("ProcessingListTypeIdentifier" = "DefinitionUniqueIdentifier")) # safety check on processing lists (make sure all processing lists defined in tasks have a ProcessingListId) if (any(is.na(all_processing_lists$ProcessingListId))) { sprintf("mismatch between processing lists in tasks ('%s') and in iarc info ('%s')", - all_processing_lists$ProcessingListTypeIdentifier[is.na(all_processing_lists$samples)] %>% str_c(collapse = "', '"), - all_processing_lists$ProcessingListTypeIdentifier[is.na(all_processing_lists$ProcessingListId)] %>% str_c(collapse = "', '") - ) %>% stop(call. = FALSE) + all_processing_lists$ProcessingListTypeIdentifier[is.na(all_processing_lists$samples)] |> str_c(collapse = "', '"), + all_processing_lists$ProcessingListTypeIdentifier[is.na(all_processing_lists$ProcessingListId)] |> str_c(collapse = "', '") + ) |> stop(call. = FALSE) } # get gas configurations @@ -106,12 +108,12 @@ process_iarc_samples <- function(iso_file_template, tasks, gas_configs, folder_p # sort task to process in order #FIXME: sorting no longer works because purrr removed the function #not sorting at all right now but revisit this - #tasks <- tasks %>% sort_by(generate_task_sample_id) + #tasks <- tasks |> sort_by(generate_task_sample_id) # loop through and process info and data sapply(tasks, function(task) { # prepare iso_file object - iso_file <- iso_file_template %>% + iso_file <- iso_file_template |> # set file path parameters set_ds_file_path( file_root = iso_file_template$file_info$file_root, @@ -121,14 +123,14 @@ process_iarc_samples <- function(iso_file_template, tasks, gas_configs, folder_p # processing info if (!default("quiet")) { - sprintf("processing sample '%s' (IRMS data '%s')", - generate_task_sample_id(task), - task$data_files %>% - dplyr::filter(!!sym("TypeIdentifier") == "Acquire") %>% - { .$DataFile } %>% - { if(length(.) > 0) str_c(., collapse = "', '") else "" } - #task$info$GlobalIdentifier - ) %>% + "processing sample '%s' (IRMS data '%s')" |> + sprintf( + generate_task_sample_id(task), + task$data_files |> + dplyr::filter(!!sym("TypeIdentifier") == "Acquire") |> + dplyr::pull(.data$DataFile) |> + if_not_empty_then(str_c, collapse = "', '", empty = "") + ) |> log_message(prefix = " ") } @@ -163,7 +165,7 @@ process_iarc_sample_info <- function(iso_file, task) { process_iarc_sample_data <- function(iso_file, task, gas_configs, folder_path) { # aquire = IRMS data - irms_data <- task$data_files %>% dplyr::filter(!!sym("TypeIdentifier") == "Acquire") + irms_data <- task$data_files |> dplyr::filter(!!sym("TypeIdentifier") == "Acquire") if (nrow(irms_data) == 0) stop("no IRMS acquisitions associated with this sample", call. = FALSE) # check for gas configurations @@ -176,7 +178,7 @@ process_iarc_sample_data <- function(iso_file, task, gas_configs, folder_path) { for (i in 1:nrow(irms_data)) { iso_file <- with(irms_data[i,], { filepath <- file.path(folder_path, DataFile) - run_time.s <- difftime(parse_datetime(AcquireEndDate, format = dt_format), parse_datetime(AcquireStartDate, format = dt_format), units = "s") %>% as.numeric() + run_time.s <- difftime(parse_datetime(AcquireEndDate, format = dt_format), parse_datetime(AcquireStartDate, format = dt_format), units = "s") |> as.numeric() read_irms_data_file(iso_file, filepath, gas_config, run_time.s, data_units = "nA", data_scaling = 1e-9) }) } @@ -198,27 +200,29 @@ read_irms_data_file <- function(iso_file, filepath, gas_config, run_time.s, data config <- gas_config$species[[dataset_attributes$Species]] # read irms data and determine which beams are used - irms_data <- rhdf5::h5read(filepath, "DataSet") %>% dplyr::as_tibble() + irms_data <- rhdf5::h5read(filepath, "DataSet") |> dplyr::as_tibble() rhdf5::H5close() # garbage collect if (!"Scan" %in% names(irms_data)) stop("Scan column missing from data file ", basename(filepath), call. = FALSE) - data_channels <- irms_data %>% names() %>% str_subset("^Beam") - config_channels <- config$channels %>% filter(.data$channel %in% data_channels) + data_channels <- irms_data |> names() |> str_subset("^Beam") + config_channels <- config$channels |> filter(.data$channel %in% data_channels) # safety check for channels (if no channels defined in the config at all, we've got problems) if ( nrow(config_channels) == 0) - stop("no channel information in gas configuration for ", data_channels %>% str_c(collapse = ", "), call. = FALSE) + stop("no channel information in gas configuration for ", data_channels |> str_c(collapse = ", "), call. = FALSE) # proceed only with the channels that are config defined irms_data <- irms_data[c("Scan", config_channels$channel)] - multiple <- config_channels %>% group_by(.data$channel) %>% summarize(n = n(), masses = str_c(.data$mass, collapse = ", ")) + multiple <- config_channels |> group_by(.data$channel) |> summarize(n = n(), masses = str_c(.data$mass, collapse = ", ")) if (any(multiple$n > 1)) { stop("cannot process beam channels, some channels assigned to more than one mass: ", - multiple %>% filter(n > 1) %>% mutate(label = paste0(.data$channel, ": ", .data$masses)) %>% - { .$label } %>% str_c(collapse = "; "), call. = FALSE) + multiple |> filter(n > 1) |> + mutate(label = paste0(.data$channel, ": ", .data$masses)) |> + dplyr::pull(.data$label) |> + str_c(collapse = "; "), call. = FALSE) } # h3 factor @@ -226,23 +230,26 @@ read_irms_data_file <- function(iso_file, filepath, gas_config, run_time.s, data iso_file$file_info$H3_factor <- config$H3_factor # rename channels - rename_dots <- config_channels %>% { rlang::set_names(.$channel, str_c("i", .$mass, ".", data_units)) } - irms_data <- irms_data %>% dplyr::rename(!!!rename_dots) + rename_dots <- rlang::set_names( + config_channels$channel, + str_c("i", config_channels$mass, ".", data_units) + ) + irms_data <- irms_data |> dplyr::rename(dplyr::all_of(rename_dots)) # scale currents scale_data <- function(x) x / data_scaling - irms_data <- irms_data %>% mutate_at(vars(starts_with("i")), scale_data) + irms_data <- irms_data |> mutate_at(vars(starts_with("i")), scale_data) # scale time dt <- run_time.s / nrow(irms_data) - irms_data <- irms_data %>% - rename(tp = .data$Scan) %>% - mutate(tp = as.integer(.data$tp), time.s = dt * .data$tp) %>% - select(.data$tp, .data$time.s, everything()) + irms_data <- irms_data |> + rename(tp = "Scan") |> + mutate(tp = as.integer(.data$tp), time.s = dt * .data$tp) |> + select("tp", "time.s", everything()) # store mass data if (nrow(iso_file$raw_data) > 0) { - existing <- iso_file$raw_data %>% select(starts_with("i")) %>% names() + existing <- iso_file$raw_data |> select(starts_with("i")) |> names() if ( any(dups <- existing %in% names(irms_data)) ) stop("same ions reported in multiple data files, cannot reconcile duplicate data: ", str_c(existing[dups], collapse = ", "), call. = FALSE) diff --git a/R/isoread_isodat.R b/R/isoread_isodat.R index a7b1690e..9e51a391 100644 --- a/R/isoread_isodat.R +++ b/R/isoread_isodat.R @@ -1,16 +1,17 @@ + # isodat file information common to multiple file types ===== # extract the datetime of the run extract_isodat_datetime <- function(ds) { # find date time - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover run datetime") %>% - move_to_C_block("CTimeObject") %>% - move_to_next_pattern(re_null(4), re_x_000()) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover run datetime") |> + move_to_C_block("CTimeObject") |> + move_to_next_pattern(re_null(4), re_x_000()) |> capture_n_data("date", "integer", 1, sensible = c(0,1000*365*24*3600)) # 1000 years as sensible limit # store as POSIXct (converting seconds from CTimeObject) - use system time zone - ds$file_info$file_datetime <- as_datetime(ds$binary$data$date, tz = Sys.timezone()) + ds$file_info$file_datetime <- as_datetime(ds$source$data$date, tz = Sys.timezone()) return(ds) } @@ -18,15 +19,15 @@ extract_isodat_datetime <- function(ds) { extract_isodat_resistors <- function(ds) { # move to resistor information - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover resistors") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover resistors") |> move_to_C_block("CEvalIntegrationUnitHWInfo") # cap depends on dxf vs did if (iso_is_continuous_flow(ds)) { - ds$binary <- cap_at_next_C_block(ds$binary, "CConfiguration") + ds$source <- cap_at_next_C_block(ds$source, "CConfiguration") } else if (iso_is_dual_inlet(ds)) { - ds$binary <- cap_at_next_C_block(ds$binary, "CGasConfiguration") + ds$source <- cap_at_next_C_block(ds$source, "CGasConfiguration") } # find resistors @@ -34,15 +35,15 @@ extract_isodat_resistors <- function(ds) { re_text_0(), re_text_0(), re_null(4), re_x_000()) R_post_re <- re_combine(re_x_000()) - positions <- find_next_patterns(ds$binary, R_pre_re, re_direct(".{20}", label = ".{20}"), R_post_re) + positions <- find_next_patterns(ds$source, R_pre_re, re_direct(".{20}", label = ".{20}"), R_post_re) resistors <- list() for (pos in positions) { - ds$binary <- ds$binary %>% - move_to_pos(pos + R_pre_re$size) %>% - capture_n_data("mass", "double", n = 1) %>% - capture_n_data("cup", "integer", n = 1) %>% + ds$source <- ds$source |> + move_to_pos(pos + R_pre_re$size) |> + capture_n_data("mass", "double", n = 1) |> + capture_n_data("cup", "integer", n = 1) |> capture_n_data("R.Ohm", "double", n = 1) - resistors <- c(resistors, list(ds$binary$data[c("cup", "R.Ohm", "mass")])) + resistors <- c(resistors, list(ds$source$data[c("cup", "R.Ohm", "mass")])) } ds$method_info$resistors <- bind_rows(resistors) if (nrow(ds$method_info$resistors) > 0) { @@ -56,19 +57,18 @@ extract_isodat_resistors <- function(ds) { # if mass data is read, double check that it's the right number of resistors if (ds$read_options$raw_data && nrow(ds$raw_data) > 0) { mass_column_pattern <- "^[vi](\\d+)\\.(.*)$" - masses <- ds$raw_data %>% - names() %>% - str_subset(mass_column_pattern) %>% - { if(length(.) == 0) return (NULL) else - str_match(., mass_column_pattern) %>% { .[,2] } - } + masses <- ds$raw_data |> + names() |> + str_subset(mass_column_pattern) + if (length(masses) == 0) masses <- NULL + else masses <- str_match(masses, mass_column_pattern)[,2] if (!setequal(masses, resistor_masses)) { ds <- register_warning( ds, func = "extract_isodat_resistors", details = sprintf("masses found for resistors (%s) do not match those found for the raw data (%s)", - resistor_masses %>% { if (length(.) > 0) str_c(., collapse = ",") else "none" }, - masses %>% { if (length(.) > 0) str_c(., collapse = ",") else "none" })) + if (length(resistor_masses) > 0) str_c(resistor_masses, collapse = ",") else "none", + if (length(masses) > 0) str_c(masses, collapse = ",") else "none")) } } return(ds) @@ -78,13 +78,13 @@ extract_isodat_resistors <- function(ds) { extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { # get secondar standard values - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover references") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover references") |> move_to_C_block("CSecondaryStandardMethodPart", reset_cap = TRUE) # run cap at function if provided if (!is.null(cap_at_fun)) { - ds$binary <- cap_at_fun(ds$binary) + ds$source <- cap_at_fun(ds$source) } # instrument reference name reg exps @@ -94,18 +94,18 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { # capture reference names capture_ref_names <- function(pos) { - bin <- ds$binary %>% - move_to_pos(pos) %>% - move_to_next_pattern(instrument_pre1, max_gap = 0) %>% + bin <- ds$source |> + move_to_pos(pos) |> + move_to_next_pattern(instrument_pre1, max_gap = 0) |> capture_data_till_pattern("ref_name", "text", instrument_pre2, move_past_dots = TRUE) instrument_post1 <- re_combine(re_block("etx"), re_text_x(), re_unicode(bin$data$ref_name), re_text_x()) # check for gas configuration name if(!is.null(pos <- find_next_pattern(bin, re_combine(instrument_post1, re_block("text"), instrument_post2), max_gap = 0))) { - bin <- bin %>% - move_to_pos(pos) %>% - move_to_next_pattern(instrument_post1, max_gap = 0) %>% + bin <- bin |> + move_to_pos(pos) |> + move_to_next_pattern(instrument_post1, max_gap = 0) |> capture_data_till_pattern("config", "text", instrument_post2) } else { bin$data$config <- "" @@ -117,46 +117,46 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { # run refs capture start_pos <- find_next_patterns( - ds$binary, re_combine(instrument_pre1, re_block("text"), instrument_pre2)) + ds$source, re_combine(instrument_pre1, re_block("text"), instrument_pre2)) if (length(start_pos) == 0) { - stop("could not find reference names at position ", ds$binary$pos, + stop("could not find reference names at position ", ds$source$pos, ", no match for search ", instrument_pre1$label, "", instrument_pre2$label, call. = FALSE) } refs <- tibble( start_pos = start_pos, data = map(.data$start_pos, capture_ref_names) - ) %>% unnest(.data$data) + ) |> unnest("data") ### deltas # get reference delta values - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover reference delta values") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover reference delta values") |> move_to_C_block("CSecondaryStandardMethodPart", reset_cap = FALSE) # find delta values delta_re <- re_combine(re_null(4), re_x_000(), re_text_x(), re_unicode("Delta ")) - positions <- find_next_patterns(ds$binary, delta_re) + positions <- find_next_patterns(ds$source, delta_re) # capture delta values capture_delta_values <- function(pos) { - bin <- ds$binary %>% - move_to_pos(pos) %>% - capture_data_till_pattern("delta_code", "text", re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("delta_name", "text", re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("delta_format", "text", re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("gas", "text", re_text_0(), re_text_x(), move_past_dots = TRUE) %>% - #capture_data_till_pattern("delta_units", "text", re_text_x(), move_past_dots = TRUE) %>% - move_to_next_pattern(re_x_000(), re_x_000()) %>% - capture_n_data("delta_value", "double", 1) %>% - move_to_next_pattern(re_block("stx"), re_text_x()) %>% + bin <- ds$source |> + move_to_pos(pos) |> + capture_data_till_pattern("delta_code", "text", re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("delta_name", "text", re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("delta_format", "text", re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("gas", "text", re_text_0(), re_text_x(), move_past_dots = TRUE) |> + #capture_data_till_pattern("delta_units", "text", re_text_x(), move_past_dots = TRUE) |> + move_to_next_pattern(re_x_000(), re_x_000()) |> + capture_n_data("delta_value", "double", 1) |> + move_to_next_pattern(re_block("stx"), re_text_x()) |> capture_data_till_pattern("reference", "text", re_null(12), re_direct("([^\\x00]{2})?", label = "[^00]{2}"), re_x_000()) # return as data frame dplyr::as_tibble( bin$data[c("gas", "delta_code", "delta_name", "delta_value", "delta_format", "reference")] - ) %>% mutate( + ) |> mutate( standard = refs$name[max(which(bin$pos > refs$pos))] #config = refs$config[max(which(bin$pos > refs$pos))], # not actually used, usually the same as the $gas ) @@ -166,35 +166,35 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { deltas <- tibble( pos = positions + delta_re$size, data = map(.data$pos, capture_delta_values) - ) %>% unnest(.data$data) %>% + ) |> unnest("data") |> # delta_code is very isodat specific and not stored in final, delta_format does not really hold additional information - select(.data$standard, .data$gas, .data$delta_name, .data$delta_value, .data$reference) + select("standard", "gas", "delta_name", "delta_value", "reference") ### ratios # get reference delta values - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot recover reference ratio values") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot recover reference ratio values") |> move_to_C_block("CSecondaryStandardMethodPart", reset_cap = FALSE) # find ratios ratio_re <- re_combine(re_null(4), re_x_000(), re_text_x(), re_unicode("Ratio ")) - positions <- find_next_patterns(ds$binary, ratio_re) + positions <- find_next_patterns(ds$source, ratio_re) # capture ratio values capture_ratio_values <- function(pos) { - bin <- ds$binary %>% - move_to_pos(pos) %>% - capture_data_till_pattern("ratio_code", "text", re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("ratio_name", "text", re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("ratio_format", "text", re_text_0(), re_text_x(), move_past_dots = TRUE) %>% - capture_data_till_pattern("element", "text", re_text_x(), move_past_dots = TRUE) %>% - move_to_next_pattern(re_x_000(), re_x_000()) %>% ### + bin <- ds$source |> + move_to_pos(pos) |> + capture_data_till_pattern("ratio_code", "text", re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("ratio_name", "text", re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("ratio_format", "text", re_text_0(), re_text_x(), move_past_dots = TRUE) |> + capture_data_till_pattern("element", "text", re_text_x(), move_past_dots = TRUE) |> + move_to_next_pattern(re_x_000(), re_x_000()) |> ### capture_n_data("ratio_value", "double", 1) # return as data frame - dplyr::as_tibble(bin$data[c("ratio_code", "element", "ratio_name", "ratio_value", "ratio_format")]) %>% + dplyr::as_tibble(bin$data[c("ratio_code", "element", "ratio_name", "ratio_value", "ratio_format")]) |> mutate( reference = refs$name[max(which(bin$pos > refs$pos))] ) @@ -205,9 +205,9 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { ratios <- tibble( pos = positions + ratio_re$size, data = map(.data$pos, capture_ratio_values) - ) %>% - unnest(.data$data) %>% - select(.data$reference, .data$element, .data$ratio_name, .data$ratio_value) + ) |> + unnest("data") |> + select("reference", "element", "ratio_name", "ratio_value") } else { # no ratios defined ratios <- tibble(reference = character(0), element = character(0), @@ -225,9 +225,9 @@ extract_isodat_reference_values <- function(ds, cap_at_fun = NULL) { extract_isodat_sequence_line_info <- function(ds) { # find sequence line information - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process sequence line info") %>% - move_to_C_block_range("CParsedEvaluationString", "CBinary") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process sequence line info") |> + move_to_C_block_range("CParsedEvaluationString", "CBinary") |> move_to_next_pattern(re_unicode("Sequence Line Information")) seq_line_info <- list() @@ -235,9 +235,9 @@ extract_isodat_sequence_line_info <- function(ds) { re_null(4), re_or(re_combine(re_not_null(2), re_block("etx")), re_block("C-block"))) - caps <- find_next_patterns(ds$binary, re_end_of_info) + caps <- find_next_patterns(ds$source, re_end_of_info) if (length(caps) == 0) stop("could not any data", call. = FALSE) - positions <- c(ds$binary$pos, head(caps, -1)) + positions <- c(ds$source$pos, head(caps, -1)) # note: fef-x block seems to be used in .dxf, nl in .did re_val_var_break <- re_or(re_text_x(), re_block("nl")) re_val_var_break$size <- 4 @@ -245,22 +245,22 @@ extract_isodat_sequence_line_info <- function(ds) { # loop through all for (i in 1:length(positions)) { # capture value - ds$binary <- ds$binary %>% - move_to_pos(positions[i], reset_cap = TRUE) %>% - cap_at_pos(caps[i]) %>% - move_to_next_pattern(re_or(re_unicode("/"), re_unicode(".")), re_text_x()) %>% + ds$source <- ds$source |> + move_to_pos(positions[i], reset_cap = TRUE) |> + cap_at_pos(caps[i]) |> + move_to_next_pattern(re_or(re_unicode("/"), re_unicode(".")), re_text_x()) |> capture_data_till_pattern("value", "text", re_val_var_break, data_bytes_max = 500, move_past_dots = TRUE) # capture info name - info_length <- (ds$binary$max_pos - ds$binary$pos)/2 + info_length <- (ds$source$max_pos - ds$source$pos)/2 if (info_length %% 1 > 0) - stop("length of sequence info for value '", ds$binary$data$value, "' is not an integer (", info_length, ")", call. = FALSE) - ds$binary <- ds$binary %>% - capture_n_data("info", "text", (ds$binary$max_pos - ds$binary$pos)/2) + stop("length of sequence info for value '", ds$source$data$value, "' is not an integer (", info_length, ")", call. = FALSE) + ds$source <- ds$source |> + capture_n_data("info", "text", (ds$source$max_pos - ds$source$pos)/2) # store info - if (!is.null(ds$binary$data$info)) - ds$file_info[[ds$binary$data$info]] <- ds$binary$data$value + if (!is.null(ds$source$data$info)) + ds$file_info[[ds$source$data$info]] <- ds$source$data$value } return(ds) @@ -270,31 +270,31 @@ extract_isodat_sequence_line_info <- function(ds) { extract_isodat_old_sequence_line_info <- function(ds) { # find sequence line information - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process sequence line info") %>% - move_to_C_block("CSequenceLineInformationGridStorage") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process sequence line info") |> + move_to_C_block("CSequenceLineInformationGridStorage") |> move_to_next_pattern(re_direct("\xff{12}", label = "ff{12}")) # block delimiter cap_pos <- find_next_pattern( - ds$binary, + ds$source, re_direct("\x86{3}\\x00\x96{3}\\x00\xCB{3}\\x00\xB2{3}\\x00\xD7{3}\\x00\xDD{3}\\x00", label = "86{3}0096{3}00cb{3}00b2{3}00d7{3}00dd{3}00") ) if (!is.null(cap_pos)) { - ds$binary <- ds$binary %>% cap_at_pos(cap_pos) - } else iso_source_file_op_error(ds$binary, "cannot find binary delimiter for end of Sequence Information") + ds$source <- ds$source |> cap_at_pos(cap_pos) + } else iso_source_file_op_error(ds$source, "cannot find binary delimiter for end of Sequence Information") # first line marker line_re <- re_combine( re_x_000(), re_direct(".{2,8}", label = ".{2,8}"), re_text_x(), re_unicode("Line")) - ds$binary <- ds$binary %>% - move_to_next_pattern(line_re, move_to_end = FALSE) %>% + ds$source <- ds$source |> + move_to_next_pattern(line_re, move_to_end = FALSE) |> capture_n_data("info_marker", "raw", 4) # regular expressions - re_entry_start <- re_control(ds$binary$data$info_marker) + re_entry_start <- re_control(ds$source$data$info_marker) label_pre_re <- re_combine(re_direct(".{2,8}", size = 8, label = ".{2,8}"), re_text_x()) # NOTE: all of these seem to be valid end blocks for text segements in this part of the file, any way to make this simpler? label_post_re <- re_or( @@ -305,21 +305,22 @@ extract_isodat_old_sequence_line_info <- function(ds) { re_combine(re_null(4), re_direct("\xff{3}\\x00", size = 4, label = "ff{3}00"))) # extract information - positions <- find_next_patterns(ds$binary, re_entry_start) + positions <- find_next_patterns(ds$source, re_entry_start) label <- value <- NULL # global vars labels <- list(list(label = "Line", label_marker = NA_character_)) values <- list() reached_values <- FALSE for (pos in positions) { - ds$binary <- ds$binary %>% move_to_pos(pos + re_entry_start$size) - if (!is.null(find_next_pattern(ds$binary, label_pre_re, max_gap = 0))) { - ds$binary <- ds$binary %>% - move_to_next_pattern(label_pre_re) %>% - { move_to_pos(., .$pos - 1) } %>% - capture_n_data("marker", "raw", 1) %>% + ds$source <- ds$source |> move_to_pos(pos + re_entry_start$size) + if (!is.null(find_next_pattern(ds$source, label_pre_re, max_gap = 0))) { + ds$source <- ds$source |> + move_to_next_pattern(label_pre_re) + ds$source <- ds$source |> + move_to_pos(ds$source$pos - 1) |> + capture_n_data("marker", "raw", 1) |> capture_data_till_pattern("text", "text", label_post_re) - text <- ds$binary$data$text - marker <- as.character(ds$binary$data$marker) + text <- ds$source$data$text + marker <- as.character(ds$source$data$marker) } else { text <- marker <- NA_character_ } @@ -350,8 +351,8 @@ extract_isodat_old_sequence_line_info <- function(ds) { # store file info file_info <- left_join( - bind_rows(labels) %>% mutate(n = 1:n()), - bind_rows(values) %>% mutate(n = 1:n()), + bind_rows(labels) |> mutate(n = 1:n()), + bind_rows(values) |> mutate(n = 1:n()), by = "n" ) @@ -371,17 +372,17 @@ extract_isodat_old_sequence_line_info <- function(ds) { extract_isodat_measurement_info <- function(ds) { # find measurement info - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process measurement info") %>% - move_to_C_block_range("CMeasurmentInfos", "CMeasurmentErrors") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process measurement info") |> + move_to_C_block_range("CMeasurmentInfos", "CMeasurmentErrors") |> move_to_next_C_block("CISLScriptMessageData") isl_info_msgs <- c() - while(!is.null(find_next_pattern(ds$binary, re_unicode("CUserInfo")))) { - ds$binary <- ds$binary %>% - move_to_next_pattern(re_x_000(), re_text_x()) %>% + while(!is.null(find_next_pattern(ds$source, re_unicode("CUserInfo")))) { + ds$source <- ds$source |> + move_to_next_pattern(re_x_000(), re_text_x()) |> capture_data_till_pattern("info", "text", re_text_x(), re_unicode("CUserInfo"), move_past_dots = TRUE) - isl_info_msgs <- c(isl_info_msgs, ds$binary$data$info) + isl_info_msgs <- c(isl_info_msgs, ds$source$data$info) } # store all in one information set @@ -393,16 +394,16 @@ extract_isodat_measurement_info <- function(ds) { # extract H3 factor info extract_H3_factor_info <- function(ds) { # H3 factor (if available) - if ("CH3FactorResult" %in% ds$binary$C_blocks$block) { + if ("CH3FactorResult" %in% ds$source$C_blocks$block) { # extract H3 factor value (note H3 stability is not present) - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot extract H3 factor") %>% - move_to_C_block("CH3FactorResult") %>% - move_to_next_pattern(re_unicode("H3 Factor")) %>% - move_to_next_pattern(re_x_000()) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot extract H3 factor") |> + move_to_C_block("CH3FactorResult") |> + move_to_next_pattern(re_unicode("H3 Factor")) |> + move_to_next_pattern(re_x_000()) |> capture_n_data("H3_factor", "double", 1) # this is a text field for compatibility with other file formats - ds$file_info$`H3 Factor` <- as.character(ds$binary$data$H3_factor) + ds$file_info$`H3 Factor` <- as.character(ds$source$data$H3_factor) } return(ds) } @@ -410,14 +411,14 @@ extract_H3_factor_info <- function(ds) { # extract MS integration time extract_MS_integration_time_info <- function(ds) { - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot extract MS integration time") %>% - move_to_C_block("CActionPeakCenter", move_to_end = FALSE) %>% - skip_pos(-5) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot extract MS integration time") |> + move_to_C_block("CActionPeakCenter", move_to_end = FALSE) |> + skip_pos(-5) |> capture_n_data("ms_integration_time", "integer", 1, sensible = c(0L, 3600000L)) # store ms integration time (should this be text for compatibility with other formats?) - ds$file_info$MS_integration_time.s <- ds$binary$data$ms_integration_time/1000 + ds$file_info$MS_integration_time.s <- ds$source$data$ms_integration_time/1000 return(ds) } @@ -426,13 +427,13 @@ extract_MS_integration_time_info <- function(ds) { extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NULL) { # find vendor data table - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process vendor identified peaks") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process vendor identified peaks") |> move_to_C_block("CGCPeakList") # run cap at function if provided if (!is.null(cap_at_fun)) { - ds$binary <- cap_at_fun(ds$binary) + ds$source <- cap_at_fun(ds$source) } ### basic peak info @@ -442,25 +443,25 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU re_direct("(\\x00|[\x01-\x1f])\\x00{3}", size = 4, label = "00|[01-1f]00{3}"), re_x_000()) rt_re <- re_combine(rt_pre_re, re_direct("..\\x00{2}", label = "..00{2}"), re_x_000()) - positions <- find_next_patterns(ds$binary, rt_re) + positions <- find_next_patterns(ds$source, rt_re) rts <- list() for (pos in positions) { - ds$binary <- ds$binary %>% - move_to_pos(pos + rt_pre_re$size) %>% - capture_n_data("mass", "integer", 1) %>% - move_to_next_pattern(re_x_000(), max_gap = 0) %>% - capture_n_data("peak", "integer", 1) %>% - skip_pos(4) %>% # random xx00 follows - capture_n_data("start", "double", 1) %>% - capture_n_data("bg", "double", 1) %>% - skip_pos(4) %>% - capture_n_data("rt", "double", 1) %>% - capture_n_data("amp", "double", 1) %>% - skip_pos(4) %>% + ds$source <- ds$source |> + move_to_pos(pos + rt_pre_re$size) |> + capture_n_data("mass", "integer", 1) |> + move_to_next_pattern(re_x_000(), max_gap = 0) |> + capture_n_data("peak", "integer", 1) |> + skip_pos(4) |> # random xx00 follows + capture_n_data("start", "double", 1) |> + capture_n_data("bg", "double", 1) |> + skip_pos(4) |> + capture_n_data("rt", "double", 1) |> + capture_n_data("amp", "double", 1) |> + skip_pos(4) |> capture_n_data("end", "double", 1) # NOTE: after this there is another unknown value similar to 'bg', then the rIntensity, then the Intensity (but we get those from the data table too) # FIXME: is this maybe the more accurate background value? main vendor data table seems to not always pull the right one - rts <- c(rts, list(c(ini = pos, pos = ds$binary$pos, ds$binary$data[c("mass", "peak", "start", "rt", "end", "amp", "bg")]))) + rts <- c(rts, list(c(ini = pos, pos = ds$source$pos, ds$source$data[c("mass", "peak", "start", "rt", "end", "amp", "bg")]))) } # NOTE: the retention time is only ALMOST repeated each time, if there are significant @@ -474,24 +475,24 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU # filter out false matches rts_df <- dplyr::filter(rts_df, .data$mass > 0) # retention times - peaks <- rts_df %>% - select(.data$peak, Start = .data$start, Rt = .data$rt, End = .data$end) %>% - distinct(.data$peak, .keep_all = TRUE) %>% + peaks <- rts_df |> + select("peak", Start = "start", Rt = "rt", End = "end") |> + distinct(.data$peak, .keep_all = TRUE) |> # add in amplitudes left_join( - rts_df %>% - select(.data$peak, Ampl = .data$mass, .data$amp) %>% + rts_df |> + select("peak", Ampl = "mass", "amp") |> spread(.data$Ampl, .data$amp, sep = " "), by = "peak" - ) %>% + ) |> # add in backgrounds left_join ( - rts_df %>% - select(.data$peak, BGD = .data$mass, .data$bg) %>% + rts_df |> + select("peak", BGD = "mass", "bg") |> spread(.data$BGD, .data$bg, sep = " "), by = "peak" - ) %>% - rename(`Nr.` = .data$peak) + ) |> + rename(`Nr.` = "peak") ### rest of data table extracted_dt <- extract_isodat_main_vendor_data_table_fast(ds, C_block = "CGCPeakList", cap_at_fun = cap_at_fun) @@ -504,15 +505,15 @@ extract_isodat_continuous_flow_vendor_data_table <- function(ds, cap_at_fun = NU if (any(is.na(data_table$Start) | any(is.na(data_table$.check)))) { ds <- register_warning(ds, details = "vendor data table has unexpected empty cells, process vendor table with care") } - ds$vendor_data_table <- select(data_table, -.data$.check) + ds$vendor_data_table <- select(data_table, -".check") # safe information on the column units attr(ds$vendor_data_table, "units") <- bind_rows( - dplyr::select(extracted_dt$columns, .data$column, units = .data$column_units), + dplyr::select(extracted_dt$columns, "column", units = "column_units"), tibble::tibble(column = c("Start", "Rt", "End"), units = "[s]"), - tibble::tibble(column = peaks %>% select(starts_with("Ampl"), starts_with("BGD")) %>% names(), units = "[mV]") - ) %>% + tibble::tibble(column = peaks |> select(starts_with("Ampl"), starts_with("BGD")) |> names(), units = "[mV]") + ) |> mutate(units = ifelse(units == " ", "", units)) # FIXME: do this directly @@ -531,21 +532,21 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL skip_row_check = function(column, value) FALSE) { # main data table - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process vendor data table") %>% - move_to_C_block(C_block, reset_cap = FALSE) %>% # important to NOT reset position cap + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process vendor data table") |> + move_to_C_block(C_block, reset_cap = FALSE) |> # important to NOT reset position cap move_to_next_C_block("CEvalDataIntTransferPart") # run cap at function if provided if (!is.null(cap_at_fun)) { - ds$binary <- cap_at_fun(ds$binary) + ds$source <- cap_at_fun(ds$source) } # find columns and row data for the whole data table pre_column_re <- re_combine( re_or(re_unicode("/"), re_unicode("-"), re_unicode(","), re_unicode("."), size = 2), re_text_0(), re_text_0(), re_null(4), re_x_000(), re_text_x()) - positions <- find_next_patterns(ds$binary, pre_column_re) + positions <- find_next_patterns(ds$source, pre_column_re) # collection variables columns <- list() @@ -555,19 +556,19 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL for(pos in positions) { # get column name - ds$binary <- ds$binary %>% - move_to_pos(pos + pre_column_re$size) %>% + ds$source <- ds$source |> + move_to_pos(pos + pre_column_re$size) |> # skip ID column since it is not unique in peak jumping files move_to_next_pattern(re_text_x()) # capture column - ds$binary <- ds$binary %>% + ds$source <- ds$source |> capture_data_till_pattern("column", "raw", re_text_x(), move_past_dots = TRUE, ignore_trailing_zeros = FALSE) # check for columns starting with delta symbol, replace with d instead of delta symbol - if (identical(ds$binary$data$column[1:2], as.raw(c(180, 03)))) - ds$binary$data$column[1:2] <- as.raw(c(100, 00)) - col <- parse_raw_data(ds$binary$data$column, "text") + if (identical(ds$source$data$column[1:2], as.raw(c(180, 03)))) + ds$source$data$column[1:2] <- as.raw(c(100, 00)) + col <- parse_raw_data(ds$source$data$column, "text") # skip columns that don't fit the include criteria right away if (!grepl(col_include, col)) next #skip @@ -583,80 +584,80 @@ extract_isodat_main_vendor_data_table <- function(ds, C_block, cap_at_fun = NULL if (skip_row) next # skip # check if have a proper units next - if (is.null(find_next_pattern(ds$binary, re_block("text"), re_text_x(), max_gap = 0))) { + if (is.null(find_next_pattern(ds$source, re_block("text"), re_text_x(), max_gap = 0))) { # this is something else, not a proper units block next # skip } # get column formatting - ds$binary <- ds$binary %>% + ds$source <- ds$source |> # retrieve format (!not always the same) capture_data_till_pattern("format", "text", re_text_x(), move_past_dots = TRUE) # skip data columns without propre formatting infromation right away - if(ds$binary$data$format %in% c("", " ") || nchar(ds$binary$data$format) > 4) { + if(ds$source$data$format %in% c("", " ") || nchar(ds$source$data$format) > 4) { next # skip } # store information about new column if not already stored if (!col %in% names(columns)) { - ds$binary <- ds$binary %>% + ds$source <- ds$source |> # skip what looks like it might be the gas configuration and an unknown piece of information - move_to_next_pattern(re_block("text0"), re_text_x(), re_block("text0"), re_text_x()) %>% + move_to_next_pattern(re_block("text0"), re_text_x(), re_block("text0"), re_text_x()) |> capture_data_till_pattern("units", "raw", re_text_x(), move_past_dots = TRUE, ignore_trailing_zeros = FALSE) # retrieve units # process isodat units - ds$binary$data$units <- process_isodat_units(ds$binary$data$units) + ds$source$data$units <- process_isodat_units(ds$source$data$units) # data format type <- - if (ds$binary$data$format == "%s") { "text" - } else if (ds$binary$data$format %in% c("%u", "%d")) { "integer" - } else if (str_detect(ds$binary$data$format, "\\%[0-9.]*f")) { "double" - } else { iso_source_file_op_error(ds$binary, + if (ds$source$data$format == "%s") { "text" + } else if (ds$source$data$format %in% c("%u", "%d")) { "integer" + } else if (str_detect(ds$source$data$format, "\\%[0-9.]*f")) { "double" + } else { iso_source_file_op_error(ds$source, sprintf("could not process data table column format '%s' for column '%s'", - ds$binary$data$format, col)) + ds$source$data$format, col)) } # store - new_col <- c(list(pos = ds$binary$pos, type = type, column = col), ds$binary$data[c("format", "units")]) + new_col <- c(list(pos = ds$source$pos, type = type, column = col), ds$source$data[c("format", "units")]) columns[[col]] <- new_col - } else if (ds$binary$data$format != columns[[col]]$format) { + } else if (ds$source$data$format != columns[[col]]$format) { # double check formatting - iso_source_file_op_error(ds$binary, + iso_source_file_op_error(ds$source, sprintf("mismatched data column format for column '%s', found '%s' but expected '%s'", - col, ds$binary$data$format, columns[[col]]$format)) + col, ds$source$data$format, columns[[col]]$format)) } # capture data if (columns[[col]]$type == "text") { - ds$binary <- - ds$binary %>% move_to_next_pattern( + ds$source <- + ds$source |> move_to_next_pattern( re_x_000(), re_direct("\\x00{4,6}", label = "00{4,6}"), - re_x_000(), re_x_000()) %>% + re_x_000(), re_x_000()) |> capture_data_till_pattern("value", "text", re_null(2), re_direct("..", label = ".."), re_block("etx")) } else { - ds$binary <- - ds$binary %>% move_to_next_pattern(re_x_000(), re_x_000()) %>% + ds$source <- + ds$source |> move_to_next_pattern(re_x_000(), re_x_000()) |> capture_data_till_pattern("value", columns[[col]]$type, re_x_000(), data_bytes_min = 4) # read at least one number # sanity checks - if (is.nan(ds$binary$data$value)) { - ds$binary$data$value <- NA # safety to catch things that are not valid numbers at all - } else if (length(ds$binary$data$value) > 1) { - iso_source_file_op_error(ds$binary, sprintf("expected one value for cell '%s' but found %d", col, length(ds$binary$data$value))) - } else if (ds$binary$data$value != 0 && (abs(ds$binary$data$value) < 1e-100 || abs(ds$binary$data$value) > 1e100)) { - iso_source_file_op_error(ds$binary, sprintf("found cell value '%s' for cell '%s' which is not a sensible numeric value", str_c(ds$binary$data$value), col)) + if (is.nan(ds$source$data$value)) { + ds$source$data$value <- NA # safety to catch things that are not valid numbers at all + } else if (length(ds$source$data$value) > 1) { + iso_source_file_op_error(ds$source, sprintf("expected one value for cell '%s' but found %d", col, length(ds$source$data$value))) + } else if (ds$source$data$value != 0 && (abs(ds$source$data$value) < 1e-100 || abs(ds$source$data$value) > 1e100)) { + iso_source_file_op_error(ds$source, sprintf("found cell value '%s' for cell '%s' which is not a sensible numeric value", str_c(ds$source$data$value), col)) } } # check whether row should be skipped - skip_row <- do.call(skip_row_check, args = list(col, ds$binary$data$value)) + skip_row <- do.call(skip_row_check, args = list(col, ds$source$data$value)) if (skip_row) { rows[[rows_i]] <- NULL } else { - rows[[rows_i]][[col]] <- ds$binary$data$value + rows[[rows_i]][[col]] <- ds$source$data$value } } @@ -682,18 +683,18 @@ process_isodat_units <- function(raw_units) { pos1 = raw_units, pos2 = c(raw_units[-1], as.raw(0x00)), idx = 1:length(.data$pos1) - ) %>% + ) |> mutate( is_permil = purrr::map2_lgl(.data$pos1, .data$pos2, ~ identical(c(.x, .y), raw_permil)), pos1 = purrr::map2(.data$pos1, .data$is_permil, ~ { if(.y) text_permil else .x }) - ) %>% + ) |> filter( !.data$idx %in% (.data$idx[.data$is_permil] + 1L) - ) %>% - dplyr::pull(.data$pos1) %>% + ) |> + dplyr::pull(.data$pos1) |> unlist() } @@ -716,14 +717,14 @@ process_isodat_units <- function(raw_units) { extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun = NULL, col_include = NULL) { # main data table - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot process vendor data table") %>% - move_to_control_block(block = C_block, reset_cap = TRUE) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot process vendor data table") |> + move_to_control_block(block = C_block, reset_cap = TRUE) |> move_to_next_control_block(block = "CEvalDataIntTransferPart") # run cap at function if provided if (!is.null(cap_at_fun)) { - ds$binary <- cap_at_fun(ds$binary) + ds$source <- cap_at_fun(ds$source) } columns <- extract_isodat_main_vendor_data_table_columns(ds, col_include = col_include) @@ -732,13 +733,13 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun = if (!all(ok <- columns$n_formats == 1)) { formats <- map_chr(columns$data[!ok], ~collapse(unique(.x$format), ", ")) problems <- glue("column {columns$column[!ok]} has multiple formats '{formats}'") - iso_source_file_op_error(ds$binary, glue("mismatched data column formats:\n{collapse(problems, '\n')}")) + iso_source_file_op_error(ds$source, glue("mismatched data column formats:\n{collapse(problems, '\n')}")) } # safety check: to make sure all formats are resolved if (!all(ok <- !is.na(columns$type))) { problems <- glue("column {columns$column[!ok]} has unknown format '{columns$column_format[!ok]}'") - iso_source_file_op_error(ds$binary, glue("unknown column formats:\n{collapse(problems, '\n')}")) + iso_source_file_op_error(ds$source, glue("unknown column formats:\n{collapse(problems, '\n')}")) } cell_values <- extract_isodat_main_vendor_data_table_values(ds, columns) @@ -747,32 +748,32 @@ extract_isodat_main_vendor_data_table_fast <- function(ds, C_block, cap_at_fun = } # extract the main (recurring) portion of the vendor data table -extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$binary$pos, max = ds$binary$max_pos, col_include = NULL) { +extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$source$pos, max = ds$source$max_pos, col_include = NULL) { # error message - ds$binary <- ds$binary %>% - set_pos_and_cap(pos = pos, max = max) %>% + ds$source <- ds$source |> + set_pos_and_cap(pos = pos, max = max) |> set_binary_file_error_prefix("cannot retrieve vendor data table columns") # find columns and row data for the whole data table pre_column_re <- re_combine( re_or(re_unicode("/"), re_unicode("-"), re_unicode(","), re_unicode("."), size = 2), re_text_0(), re_text_0(), re_null(4), re_x_000(), re_text_x()) - positions <- find_next_patterns(ds$binary, pre_column_re) + pre_column_re$size - 4L + positions <- find_next_patterns(ds$source, pre_column_re) + pre_column_re$size - 4L if (length(positions) == 0) { - iso_source_file_op_error(ds$binary, "vendor data column pattern yielded no matches, missing data table?") + iso_source_file_op_error(ds$source, "vendor data column pattern yielded no matches, missing data table?") } # make sure to skip Raw Data to Overwritten block overwritten_block <- integer(0) - raw_data_block <- fetch_block_idx(ds$binary, type = "text", block = "Raw Data", min_pos = ds$binary$pos, max_pos = ds$binary$max_pos, occurence = 1) + raw_data_block <- fetch_block_idx(ds$source, type = "text", block = "Raw Data", min_pos = ds$source$pos, max_pos = ds$source$max_pos, occurence = 1) if (length(raw_data_block) > 0) { - overwritten_block <- fetch_block_idx(ds$binary, type = "text", block = "Overwritten", - min_block_idx = raw_data_block, max_pos = ds$binary$max_pos, occurence = 1) + overwritten_block <- fetch_block_idx(ds$source, type = "text", block = "Overwritten", + min_block_idx = raw_data_block, max_pos = ds$source$max_pos, occurence = 1) } if(length(raw_data_block) > 0 && length(overwritten_block) > 0) { - filter_expr <- rlang::expr(block_idx < !!raw_data_block | block_idx > !!overwritten_block) + filter_expr <- rlang::expr(.data$block_idx < !!raw_data_block | .data$block_idx > !!overwritten_block) } else { filter_expr <- rlang::expr(TRUE) } @@ -785,23 +786,24 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$binary$po # pull out all the columns columns <- - ds$binary %>% - fetch_block_entry(filter = !!filter_expr, type = "text", min_pos = positions[1], max_pos = ds$binary$max_pos) %>% - dplyr::mutate(group = purrr::map_int(.data$start, ~sum(.x >= positions))) %>% - dplyr::group_by(group) %>% + ds$source |> + fetch_block_entry(filter = !!filter_expr, type = "text", min_pos = positions[1], max_pos = ds$source$max_pos) |> + dplyr::mutate(group = purrr::map_int(.data$start, ~sum(.x >= positions))) |> + dplyr::group_by(.data$group) |> dplyr::mutate( idx = 1:n(), continue_pos = max(.data$end[.data$idx <= nrow(cols_info)]) - ) %>% - dplyr::ungroup() %>% - dplyr::inner_join(cols_info, by = "idx") %>% - tidyr::pivot_wider(id_cols = c(.data$group, .data$continue_pos), names_from = .data$col, values_from = .data$block) %>% + ) |> + dplyr::ungroup() |> + dplyr::inner_join(cols_info, by = "idx") |> + tidyr::pivot_wider(id_cols = c("group", "continue_pos"), + names_from = "col", values_from = "block") |> # skip entries that don't have formats - filter(!is.na(format), nchar(format) > 1) %>% + filter(!is.na(format), nchar(format) > 1) |> # row numbers - dplyr::mutate(row = cumsum(.data$column == .data$column[1])) %>% + dplyr::mutate(row = cumsum(.data$column == .data$column[1])) |> # remove duplicates - dplyr::group_by(column, row) %>% + dplyr::group_by(.data$column, .data$row) |> dplyr::summarize( group = .data$group[1], continue_pos = .data$continue_pos[1], @@ -811,10 +813,10 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$binary$po units = .data$units[1], ref_frame = .data$units[1], .groups = "drop" - ) %>% - dplyr::arrange(group) %>% + ) |> + dplyr::arrange(.data$group) |> # nest by column and expand column details - tidyr::nest(data = c(-.data$column)) %>% + tidyr::nest(data = c(-"column")) |> dplyr::mutate( n_formats = purrr::map_int(.data$data, ~length(unique(.x$format))), column_format = purrr::map_chr(.data$data, ~.x$format[1]), @@ -826,20 +828,21 @@ extract_isodat_main_vendor_data_table_columns <- function(ds, pos = ds$binary$po str_detect(.data$column_format, "\\%[0-9.]*f") ~ "double", TRUE ~ NA_character_ ) - ) %>% + ) |> # naming adjustments dplyr::mutate( # avoid issues with delta symbol on different OS - column = stringr::str_replace(column, fixed("\U03B4"), "d"), + column = stringr::str_replace(.data$column, fixed("\U03B4"), "d"), # and rename per mil and \U2030 to permil column_units = - stringr::str_replace(column_units, fixed("per mil"), "permil") %>% + stringr::str_replace(.data$column_units, fixed("per mil"), "permil") |> stringr::str_replace(fixed("\U2030"), "permil") ) # apply col_include parameter if provided - if (!is.null(col_include)) columns <- filter(columns, str_detect(column, !!col_include)) + if (!is.null(col_include)) + columns <- filter(columns, str_detect(.data$column, !!col_include)) return(columns) } @@ -855,40 +858,41 @@ extract_isodat_main_vendor_data_table_values <- function(ds, columns) { # NOTES: adds about 1s to a 300 cell table) capture_table_cell_value <- function(pos, type) { # capture data - bin <- ds$binary %>% move_to_pos(pos) + bin <- ds$source |> move_to_pos(pos) if (type == "text") { bin <- - bin %>% move_to_next_pattern( + bin |> move_to_next_pattern( re_x_000(), re_direct("\\x00{4,6}", label = "00{4,6}"), - re_x_000(), re_x_000()) %>% + re_x_000(), re_x_000()) |> capture_data_till_pattern("value", "text", re_null(2), re_direct("..", label = ".."), re_x_000()) } else { bin <- - bin %>% move_to_next_pattern(re_x_000(), re_x_000()) %>% + bin |> move_to_next_pattern(re_x_000(), re_x_000()) |> capture_data_till_pattern("value", type, re_x_000(), data_bytes_min = 4) # read at least one number # sanity checks if (length(bin$data$value) > 1) { - iso_source_file_op_error(ds$binary, sprintf("expected one cell value but found %d", length(bin$data$value))) + iso_source_file_op_error(ds$source, sprintf("expected one cell value but found %d", length(bin$data$value))) } else if (!is.nan(bin$data$value) && bin$data$value != 0 && (abs(bin$data$value) < 1e-100 || abs(bin$data$value) > 1e100)) { - iso_source_file_op_error(ds$binary, sprintf("found cell value '%s' which is not a sensible numeric value", str_c(ds$binary$data$value))) + iso_source_file_op_error(ds$source, sprintf("found cell value '%s' which is not a sensible numeric value", str_c(ds$source$data$value))) } } return(bin$data$value) } # get cell values - columns %>% - filter(!is.na(type)) %>% - unnest(data) %>% - select(column, continue_pos, type, row) %>% - nest(data = c(-row)) %>% + columns |> + filter(!is.na(type)) |> + unnest("data") |> + select("column", "continue_pos", "type", "row") |> + nest(data = c(-row)) |> mutate( data = map(data, function(row) { # note: have to do it this way (by row instead of in long format) to keep correct data types for each column mapply(function(x,y,z) list(capture_table_cell_value(y, z)), row$column, row$continue_pos, row$type) }) - ) %>% - { bind_rows(.$data) } + ) |> + dplyr::pull(data) |> + bind_rows() } diff --git a/R/isoread_nu.R b/R/isoread_nu.R index 64fb1384..0e8f6a16 100644 --- a/R/isoread_nu.R +++ b/R/isoread_nu.R @@ -109,7 +109,7 @@ iso_read_nu <- function(ds, options = list()) { if (!is.null(ds$raw_data) && nrow(ds$raw_data) > 0) { if (any(n_channels <- stringr::str_detect(names(ds$raw_data), "^[iIvV]C(\\d+)"))) { # only have channel information, provide warning - ds <- ds %>% register_warning( + ds <- ds |> register_warning( glue::glue( "found {sum(n_channels)} channels but {length(options$nu_masses)} masses were specified ", "- the raw data will be reported in channels instead of masses. ", @@ -130,9 +130,10 @@ read_nu_data_file <- function(filepath) { stop("file does not exist or is a directory: ", filepath, call. = TRUE) # read file data - readLines(con = filepath) %>% - group_lines("^\\\"") %>% - return() + lines <- readLines(con = filepath) |> + group_lines("^\\\"") + + return(lines) } # parser functions ======= @@ -211,7 +212,7 @@ process_nu_parser <- function(ds, parser, options = list()) { else FALSE if (!meets_n_req) { - glue::glue("capture failed, parser expected {parser$n_req} value(s) but found {length(matches)}") %>% + glue::glue("capture failed, parser expected {parser$n_req} value(s) but found {length(matches)}") |> stop(call. = FALSE) } @@ -248,7 +249,7 @@ parse_nu_data <- function(data, n_blocks, n_channels, masses = c()) { # sanity checks on block number if (nrow(raw_data) != n_blocks) { - glue::glue("found {nrow(raw_data)} data blocks, expected {n_blocks}") %>% stop(call. = FALSE) + glue::glue("found {nrow(raw_data)} data blocks, expected {n_blocks}") |> stop(call. = FALSE) } # read in zeros @@ -256,18 +257,18 @@ parse_nu_data <- function(data, n_blocks, n_channels, masses = c()) { # prepare raw data raw_data <- - raw_data %>% - select(block = group, data) %>% + raw_data |> + select(block = "group", "data") |> # unpack the blocks mutate( n_channels = !!n_channels, - n_cycles = map_int(data, ~str_subset(.x, fixed("No_C_O_Cycles")) %>% readr::parse_number() %>% as.integer()), - cycle_length = map_int(data, ~str_subset(.x, fixed("Cycle_Length")) %>% readr::parse_number() %>% as.integer()), - zero_length = map_int(data, ~str_subset(.x, fixed("Zero_Measurement_Length")) %>% readr::parse_number() %>% as.integer()), + n_cycles = map_int(data, ~str_subset(.x, fixed("No_C_O_Cycles")) |> readr::parse_number() |> as.integer()), + cycle_length = map_int(data, ~str_subset(.x, fixed("Cycle_Length")) |> readr::parse_number() |> as.integer()), + zero_length = map_int(data, ~str_subset(.x, fixed("Zero_Measurement_Length")) |> readr::parse_number() |> as.integer()), data = map(data, group_lines, "^\\s*Gas") - ) %>% + ) |> # unpack the 'Gas...' data chunks - unnest(data) %>% + unnest("data") |> mutate( is_ref = str_detect(header, "^\\s*Gas\\s+Ref"), is_sample = str_detect(header, "^\\s*Gas\\s+Sam"), @@ -282,7 +283,7 @@ parse_nu_data <- function(data, n_blocks, n_channels, masses = c()) { # check for problems (log & return empty dta frames) if (n_problems(zero_data) > 0 || n_problems(raw_data) > 0) { - retval <- list(bgrd_data = tibble(), raw_data = tibble()) %>% + retval <- list(bgrd_data = tibble(), raw_data = tibble()) |> set_problems(combined_problems(zero_data, raw_data)) return(retval) } @@ -292,20 +293,20 @@ parse_nu_data <- function(data, n_blocks, n_channels, masses = c()) { data_blocks <- unique(raw_data$block) if (length(zero_blocks) == 1) { # single zero block for all - zero_data <- select(zero_data, -block) %>% - tidyr::crossing(select(raw_data, block) %>% unique) %>% - select(block, everything()) + zero_data <- select(zero_data, -"block") |> + tidyr::crossing(select(raw_data, "block") |> unique()) |> + select("block", everything()) } else if (!setequal(zero_blocks, data_blocks)) { - glue::glue("found {length(zero_blocks)} zero blocks, expected {length(data_blocks)}") %>% + glue::glue("found {length(zero_blocks)} zero blocks, expected {length(data_blocks)}") |> stop(call. = FALSE) } # subtract zeros from data raw_data <- - raw_data %>% - left_join(rename(zero_data, background = intensity), by = c("block", "channel")) %>% - mutate(intensity = intensity - background) %>% - select(-background) + raw_data |> + left_join(rename(zero_data, background = "intensity"), by = c("block", "channel")) |> + mutate(intensity = intensity - background) |> + select(-"background") # spread data zero_data <- tidyr::spread(zero_data, channel, intensity) @@ -323,16 +324,16 @@ parse_nu_zero_data <- function(raw_data, masses = c()) { # process raw data for zeros df <- - raw_data %>% - group_by(block) %>% - mutate(data = map(data, ~tibble(channel = seq_along(.x), intensities = stringr::str_split(.x, "\\s+")))) %>% - select(block, n_channels, zero_length, data) + raw_data |> + group_by(block) |> + mutate(data = map(data, ~tibble(channel = seq_along(.x), intensities = stringr::str_split(.x, "\\s+")))) |> + select("block", "n_channels", "zero_length", "data") # safety checks check_channels(df) # safety check on cycle length and first value - df_channels <- unnest(df, data) + df_channels <- unnest(df, "data") check_cycle_length(df_channels, "zero_length") # calculate intensities @@ -349,27 +350,27 @@ parse_nu_raw_data <- function(raw_data, masses = c()) { # process raw data df <- - raw_data %>% + raw_data |> # determine cycles - group_by(block, is_ref) %>% - mutate(cycle = 1:n() - is_ref) %>% ungroup() %>% + group_by(block, is_ref) |> + mutate(cycle = 1:n() - is_ref) |> ungroup() |> # convert string data to numeric data mutate( type = ifelse(is_ref, "standard", "sample"), data = map(data, ~tibble(channel = seq_along(.x), intensities = stringr::str_split(.x, "\\s+"))) - ) %>% - select(block, n_channels, n_cycles, cycle_length, data, cycle, type) + ) |> + select("block", "n_channels", "n_cycles", "cycle_length", "data", "cycle", "type") # safety check on cycles cycles_count <- - dplyr::count(df, block, n_cycles, type) %>% + dplyr::count(df, block, n_cycles, type) |> mutate( n = n - (type == "standard"), # the inital REF cycle add 1 to the "standard" check = n == n_cycles) if (!all(cycles_count$check)) { glue::glue( "found data for {str_c(unique(cycles_count$n), collapse = ', ')} cycles, ", - "expected {cycles_count$n_cycles[1]}") %>% + "expected {cycles_count$n_cycles[1]}") |> stop(call. = FALSE) } @@ -377,7 +378,7 @@ parse_nu_raw_data <- function(raw_data, masses = c()) { check_channels(df) # safety check on cycle length and first value - df_channels <- unnest(df, data) + df_channels <- unnest(df, "data") check_cycle_length(df_channels, "cycle_length") # calculate intensities @@ -393,12 +394,12 @@ check_channels <- function(df) { data <- channel_n <- n_channels <- NULL channels_count <- - mutate(df, channel_n = map_int(data, nrow)) %>% + mutate(df, channel_n = map_int(data, nrow)) |> mutate(check = channel_n == n_channels) if (!all(channels_count$check)) { glue::glue( "found data for {str_c(unique(channels_count$channel_n), collapse = ', ')} channels, ", - "expected {channels_count$n_channels[1]}") %>% + "expected {channels_count$n_channels[1]}") |> stop(call. = FALSE) } } @@ -409,7 +410,7 @@ check_cycle_length <- function(df_channels, length_column) { # global vars intensities <- intensities_n <- first_value_0 <- NULL - intensities_check <- df_channels %>% + intensities_check <- df_channels |> mutate( intensities_n = map_int(intensities, length), check = intensities_n == !!sym(length_column) + 1L, @@ -420,14 +421,14 @@ check_cycle_length <- function(df_channels, length_column) { if (!all(intensities_check$check)) { glue::glue( "found {str_c(unique(intensities_check$intensities_n - 1L), collapse = ', ')} measurements, ", - "expected {intensities_check[[length_column]][1]}") %>% + "expected {intensities_check[[length_column]][1]}") |> stop(call. = FALSE) } if (!all(intensities_check$first_value_0)) { - wrong_first_value <- filter(intensities_check, !first_value_0)$intensities %>% map_chr(~.x[1]) %>% unique() + wrong_first_value <- filter(intensities_check, !first_value_0)$intensities |> map_chr(~.x[1]) |> unique() glue::glue( "found {str_c(wrong_first_value, collapse = ', ')} as first value(s), ", - "expected 0.000000E+00") %>% + "expected 0.000000E+00") |> stop(call. = FALSE) } } @@ -436,11 +437,11 @@ check_cycle_length <- function(df_channels, length_column) { calculate_intensities <- function(df_channels, grouping, masses = c()) { # calculate raw data intensities - df_intensities <- df_channels %>% - unnest(.data$intensities) %>% - mutate(intensities = as.numeric(.data$intensities)) %>% - group_by(!!!map(grouping, sym)) %>% - summarize(intensity = mean(.data$intensities[-1])) %>% + df_intensities <- df_channels |> + unnest("intensities") |> + mutate(intensities = as.numeric(.data$intensities)) |> + group_by(!!!map(grouping, sym)) |> + summarize(intensity = mean(.data$intensities[-1])) |> ungroup() # convert channels to masses @@ -452,14 +453,14 @@ calculate_intensities <- function(df_channels, grouping, masses = c()) { channel = 1L:n_channels, mass = paste0("i", unname(!!masses), ".A") ) - df_intensities <- df_intensities %>% - left_join(masses, by = "channel") %>% - select(-.data$channel) %>% - rename(channel = .data$mass) + df_intensities <- df_intensities |> + left_join(masses, by = "channel") |> + select(-"channel") |> + rename(channel = "mass") } else { # don't have the right number - df_intensities <- df_intensities %>% + df_intensities <- df_intensities |> mutate(channel = sprintf("iC%d.A", .data$channel)) } @@ -478,9 +479,9 @@ group_lines <- function(lines, group_regexp) { lines = lines, line = seq_along(lines), group = cumsum(str_detect(lines, group_regexp)) - ) %>% - filter(group > 0) %>% - group_by(group) %>% + ) |> + filter(group > 0) |> + group_by(group) |> summarize( start = min(line), end = max(line), diff --git a/R/isoread_rds.R b/R/isoread_rds.R index 2ea274c7..ab956544 100644 --- a/R/isoread_rds.R +++ b/R/isoread_rds.R @@ -14,7 +14,7 @@ iso_read_rds <- function(ds, options = list()) { if (!is.list(isofiles)) isofiles <- as.list(isofiles) is_old_isofile <- map_lgl(isofiles, ~is(.x, "isofile")) isofiles[is_old_isofile] <- map(isofiles[is_old_isofile], function(isofile) { - class(isofile) <- class(isofile) %>% { .[.!="isofile"] } %>% c("iso_file") + class(isofile) <- class(isofile)[class(isofile)!="isofile"] |> c("iso_file") return(isofile) }) iso_files <- iso_as_file_list(isofiles) @@ -24,14 +24,14 @@ iso_read_rds <- function(ds, options = list()) { iso_files <- iso_as_file_list(iso_files) # make sure all are the appropriate classes - if (!all(ok <- lapply(iso_files, class) %>% sapply(identical, class(ds)))) + if (!all(ok <- lapply(iso_files, class) |> sapply(identical, class(ds)))) sprintf("Mismatched file types, expected '%s' but encountered '%s'", - str_c(class(ds)[1]), str_c(iso_files[!ok] %>% sapply(function(i) class(i)[1]) %>% unique(), collapse = ", ")) %>% + str_c(class(ds)[1]), str_c(iso_files[!ok] |> sapply(function(i) class(i)[1]) |> unique(), collapse = ", ")) |> stop(call. = FALSE) # information if (!default("quiet")) { - sprintf("loaded %d data files from R Data Storage", length(iso_files)) %>% + sprintf("loaded %d data files from R Data Storage", length(iso_files)) |> log_message() } diff --git a/R/isoread_scn.R b/R/isoread_scn.R index 42c72644..940ae208 100644 --- a/R/isoread_scn.R +++ b/R/isoread_scn.R @@ -8,7 +8,7 @@ iso_read_scn <- function(ds, options = list()) { stop("data structure must be a 'scan' iso_file", call. = FALSE) # read binary file - ds$binary <- get_ds_file_path(ds) %>% read_binary_isodat_file() + ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file() # get scan file type ds <- exec_func_with_error_catch(extract_scn_file_type, ds) @@ -26,12 +26,12 @@ iso_read_scn <- function(ds, options = list()) { } # process raw data - if (ds$read_option$raw_data && !is.null(ds$binary$data$config)) { + if (ds$read_option$raw_data && !is.null(ds$source$data$config)) { ds <- exec_func_with_error_catch(extract_scn_raw_voltage_data, ds) } # process method info - if (ds$read_options$method_info && !is.null(ds$binary$data$config)) { + if (ds$read_options$method_info && !is.null(ds$source$data$config)) { ds <- exec_func_with_error_catch(extract_scn_resistors, ds) } @@ -41,31 +41,31 @@ iso_read_scn <- function(ds, options = list()) { # extract scan file type extract_scn_file_type <- function(ds) { # find type (= x-axis label) - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify scan type") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify scan type") |> move_to_C_block_range("CPlotInfo", "CTraceInfo") - ds$binary <- ds$binary %>% - move_to_next_pattern(re_text_x(), re_unicode("Arial"), re_text_x()) %>% + ds$source <- ds$source |> + move_to_next_pattern(re_text_x(), re_unicode("Arial"), re_text_x()) |> capture_data_till_pattern("type", "text", re_text_x()) - ds$file_info$type <- ds$binary$data$type + ds$file_info$type <- ds$source$data$type return(ds) } # extract file info in scn file extract_scn_file_info <- function(ds) { # find comment - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot extrat comment") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot extrat comment") |> move_to_C_block_range("CScanStorage", "CBinary") - ds$binary <- ds$binary %>% + ds$source <- ds$source |> move_to_next_pattern(re_x_000(), re_text_x()) - end_pos <- ds$binary %>% find_next_pattern(re_direct("\xff\xff")) + end_pos <- ds$source |> find_next_pattern(re_direct("\xff\xff")) # comment - if ((text_length <- end_pos - ds$binary$pos - 8) > 0) { - ds$file_info$comment <- ds$binary %>% - capture_n_data("comment", "text", text_length/2) %>% - { .$data$comment } + if ((text_length <- end_pos - ds$source$pos - 8) > 0) { + ds$file_info$comment <- ds$source |> + capture_n_data("comment", "text", text_length/2) |> + purrr::pluck("comment") } else { ds$file_info$comment <- NA_character_ } @@ -76,43 +76,43 @@ extract_scn_file_info <- function(ds) { # extract mass and cup info extract_scn_mass_cup_info <- function(ds){ # find masses and cups - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify masses/cups") %>% - move_to_C_block("^CPlotRange", regexp_match = TRUE) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify masses/cups") |> + move_to_C_block("^CPlotRange", regexp_match = TRUE) |> cap_at_next_pattern(re_unicode("Administrator")) # masses - mass_positions <- find_next_patterns(ds$binary, re_unicode("Mass")) + mass_positions <- find_next_patterns(ds$source, re_unicode("Mass")) masses <- c() cups <- c() if (length(mass_positions) > 0) { for (pos in mass_positions) { - ds$binary <- ds$binary %>% move_to_pos(pos) %>% + ds$source <- ds$source |> move_to_pos(pos) |> capture_data_till_pattern("mass", "text", re_not_null(2)) - masses <- c(masses, ds$binary$data$mass) + masses <- c(masses, ds$source$data$mass) } cups <- c(stringr::str_extract(masses, "C\\d+")) } else { # cups - cup_positions <- find_next_patterns(ds$binary, re_unicode("Cup")) + cup_positions <- find_next_patterns(ds$source, re_unicode("Cup")) for (pos in cup_positions) { - ds$binary <- ds$binary %>% move_to_pos(pos) %>% + ds$source <- ds$source |> move_to_pos(pos) |> capture_data_till_pattern("cup", "text", re_not_null(2)) - cups <- c(cups, ds$binary$data$cup) + cups <- c(cups, ds$source$data$cup) } masses <- rep(NA_character_, length(cups)) } - ds$binary$data$config <- tibble( - cup = parse_number(cups) %>% as.integer(), - mass = parse_number(masses) %>% as.character(), + ds$source$data$config <- tibble( + cup = parse_number(cups) |> as.integer(), + mass = parse_number(masses) |> as.character(), mass_column = ifelse( !is.na(.data$mass), sprintf("v%s.mV", .data$mass), # Note: okay to designate cups in this way? sprintf("vC%s.mV", .data$cup) ) - ) %>% filter(!is.na(.data$cup)) + ) |> filter(!is.na(.data$cup)) return(ds) } @@ -121,69 +121,69 @@ extract_scn_mass_cup_info <- function(ds){ extract_scn_raw_voltage_data <- function(ds) { # data points - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify number of scan points") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify number of scan points") |> move_to_C_block_range("CScanStorage", "CBinary") - ds$binary <- ds$binary %>% + ds$source <- ds$source |> move_to_next_pattern(re_x_000(), re_text_x()) - end_pos <- ds$binary %>% find_next_pattern(re_direct("\xff\xff", label = "xffxff")) + end_pos <- ds$source |> find_next_pattern(re_direct("\xff\xff", label = "xffxff")) - ds$binary <- ds$binary %>% - skip_pos(end_pos - ds$binary$pos - 8) %>% # skip comment - capture_n_data("n_points", "integer", 1) %>% + ds$source <- ds$source |> + skip_pos(end_pos - ds$source$pos - 8) |> # skip comment + capture_n_data("n_points", "integer", 1) |> capture_n_data("n_traces", "integer", 1) # find units - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify scan units") %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify scan units") |> move_to_C_block_range("CVisualisationData", "CIntegrationUnitScanPart") - ds$binary <- ds$binary %>% - move_to_next_pattern(re_unicode("Arial")) %>% + ds$source <- ds$source |> + move_to_next_pattern(re_unicode("Arial")) |> move_to_next_pattern( # seems to begin with this unique 88 c3 40 sequence re_direct("\x88\xc3\x40", label = "x88xc3x40") - ) %>% + ) |> move_to_next_pattern( # but this could be sufficient too if the above turns too specific re_x_000(), re_text_x() - ) %>% + ) |> capture_data_till_pattern("units", "text", re_null(4), re_not_null(1)) # range - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot identify scan range") %>% - move_to_C_block("^CPlotRange", regexp_match = TRUE, move_to_end = FALSE) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot identify scan range") |> + move_to_C_block("^CPlotRange", regexp_match = TRUE, move_to_end = FALSE) |> skip_pos(16) - ds$binary <- ds$binary %>% - capture_n_data("min", "float", 1) %>% + ds$source <- ds$source |> + capture_n_data("min", "float", 1) |> capture_n_data("max", "float", 1) # raw data (=voltages) - ds$binary <- ds$binary %>% - set_binary_file_error_prefix("cannot read raw data") %>% - move_to_C_block_range("CBinary", "CPlotInfo") %>% - skip_pos(16) %>% + ds$source <- ds$source |> + set_binary_file_error_prefix("cannot read raw data") |> + move_to_C_block_range("CBinary", "CPlotInfo") |> + skip_pos(16) |> capture_n_data( - "voltages", c("float", rep("double", ds$binary$data$n_traces)), - ds$binary$data$n_points + "voltages", c("float", rep("double", ds$source$data$n_traces)), + ds$source$data$n_points ) - voltages <- dplyr::as_tibble(ds$binary$data$voltages) + voltages <- dplyr::as_tibble(ds$source$data$voltages) # safety check - if (ncol(voltages) - 1L != nrow(ds$binary$data$config)) { + if (ncol(voltages) - 1L != nrow(ds$source$data$config)) { if (default("debug")) { log_message("voltages:\n", voltages, prefix = "DEBUG: ") - log_message("config:\n", ds$binary$data$config, prefix = "DEBUG: ") + log_message("config:\n", ds$source$data$config, prefix = "DEBUG: ") } glue::glue( "inconsistent number of data traces ({ncol(voltages) - 1L}) ", - "and raw data masses/cups ({nrow(ds$binary$data$config)}) recovered") %>% + "and raw data masses/cups ({nrow(ds$source$data$config)}) recovered") |> stop(call. = FALSE) } # set column names - voltages <- rlang::set_names(voltages, c("step", ds$binary$data$config$mass_column)) + voltages <- rlang::set_names(voltages, c("step", ds$source$data$config$mass_column)) # calculate x values from step convert_step_to_x <- function(step) { @@ -197,21 +197,21 @@ extract_scn_raw_voltage_data <- function(ds) { } else { # calculate based on max and min return( - ds$binary$data$min + (step - min(step)) / - diff(range(step)) * (ds$binary$data$max - ds$binary$data$min) + ds$source$data$min + (step - min(step)) / + diff(range(step)) * (ds$source$data$max - ds$source$data$min) ) } } # set raw data - ds$raw_data <- voltages %>% + ds$raw_data <- voltages |> dplyr::mutate( # calculate x values x = convert_step_to_x(.data$step), # set x units - x_units = ds$binary$data$units - ) %>% - dplyr::select(.data$step, .data$x, .data$x_units, everything()) + x_units = ds$source$data$units + ) |> + dplyr::select("step", "x", "x_units", everything()) return(ds) @@ -222,32 +222,32 @@ extract_scn_raw_voltage_data <- function(ds) { # not the same format as other isodat files extract_scn_resistors <- function(ds) { - ds$binary <- ds$binary %>% + ds$source <- ds$source |> move_to_C_block_range("CCupHardwarePart", "CChannelHardwarePart") - cup_positions <- find_next_patterns(ds$binary, re_text_x(), re_unicode("Cup")) - cup_caps <- c(cup_positions[-1], ds$binary$max_pos) + cup_positions <- find_next_patterns(ds$source, re_text_x(), re_unicode("Cup")) + cup_caps <- c(cup_positions[-1], ds$source$max_pos) cups <- c() ohms <- c() for (i in 1:length(cup_positions)) { - ds$binary <- ds$binary %>% move_to_pos(cup_positions[i]) %>% - cap_at_pos(cup_caps[i]) %>% - skip_pos(4) %>% - capture_data_till_pattern("cup", "text", re_x_000()) %>% - move_to_next_pattern(re_null(16), re_direct("(\xff\xfe\xff\\x00)?"), re_x_000(), re_not_null(1)) %>% + ds$source <- ds$source |> move_to_pos(cup_positions[i]) |> + cap_at_pos(cup_caps[i]) |> + skip_pos(4) |> + capture_data_till_pattern("cup", "text", re_x_000()) |> + move_to_next_pattern(re_null(16), re_direct("(\xff\xfe\xff\\x00)?"), re_x_000(), re_not_null(1)) |> capture_n_data("R.Ohm", "double", 1) - cups <- c(cups, ds$binary$data$cup) - ohms <- c(ohms, ds$binary$data$R.Ohm) + cups <- c(cups, ds$source$data$cup) + ohms <- c(ohms, ds$source$data$R.Ohm) } ds$method_info$resistors <- tibble::tibble( - cup = parse_number(cups) %>% as.integer(), + cup = parse_number(cups) |> as.integer(), R.Ohm = ohms - ) %>% + ) |> dplyr::right_join( - select(ds$binary$data$config, .data$cup, .data$mass), + select(ds$source$data$config, "cup", "mass"), by = "cup" ) diff --git a/R/isosave.R b/R/isosave.R index be5ff553..61dab119 100644 --- a/R/isosave.R +++ b/R/isosave.R @@ -19,7 +19,7 @@ iso_save <- function(iso_files, filepath, quiet = default(quiet)) { if (!quiet) { sprintf("Info: exporting data from %d iso_files into R Data Storage '%s'", length(iso_as_file_list(iso_files)), - str_replace(filepath, "^\\.(/|\\\\)", "")) %>% message() + str_replace(filepath, "^\\.(/|\\\\)", "")) |> message() } saveRDS(iso_files, file = filepath) diff --git a/R/nse.R b/R/nse.R index eac0ab54..88d5d80d 100644 --- a/R/nse.R +++ b/R/nse.R @@ -13,16 +13,19 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df df_name <- force(df_name) df <- force(df) if (!is.data.frame(df)) - sprintf("parameter '%s' is not a data frame", df_name) %>% stop(call. = FALSE) + sprintf("parameter '%s' is not a data frame", df_name) |> stop(call. = FALSE) # get colum name expressions from ... - cols_exps <- list(...) %>% + cols_exps <- list(...) |> # make sure to evaluate calls to default - resolve_defaults() %>% + resolve_defaults() |> # convert quos to expressions (to ensure evaluation inside the df data frame to avoid name conflicts) - map(~{if (rlang::is_quosure(.x)) rlang::quo_get_expr(.x) else .x}) %>% - # naming - { if(is.null(names(.))) rlang::set_names(., rep("", length(.))) else . } + map(~{if (rlang::is_quosure(.x)) rlang::quo_get_expr(.x) else .x}) + + # naming + cols_exps <- + if(is.null(names(cols_exps))) rlang::set_names(cols_exps, rep("", length(cols_exps))) + else cols_exps # find column positions pos_results <- map(cols_exps, safe_local_eval_select, data = df) @@ -34,9 +37,9 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df map2_chr(names(cols_exps)[!ok], cols_exps[!ok], function(var, val) { if (nchar(var) > 0 && var != rlang::as_label(val)) str_c(var, " = ", rlang::as_label(val)) else rlang::as_label(val) - }) %>% + }) |> collapse("', '", last = "' and '") - errors <- map_chr(pos_results[!ok], ~stringr::str_replace(.x$error, "\n", " ")) %>% + errors <- map_chr(pos_results[!ok], ~stringr::str_replace(.x$error, "\n", " ")) |> paste(collapse = "\n- ") # check for unique names error @@ -62,11 +65,15 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df # check on the number requirements for each column match cols <- map(pos_results, ~eval_select_pos_to_cols(.x$result, data = df)) if (any(missing <- !names(n_reqs) %in% names(cols))) - glue("column requirements for unknow parameter(s) provided: {collapse(names(n_reqs[missing]), ', ')}") %>% + glue("column requirements for unknow parameter(s) provided: {collapse(names(n_reqs[missing]), ', ')}") |> stop(call. = FALSE) ## reqs labels - all_n_reqs <- rep(1, length(cols)) %>% as.list() %>% rlang::set_names(names(cols)) %>% modifyList(n_reqs) %>% { .[names(cols)] } + all_n_reqs <- rep(1, length(cols)) |> as.list() |> + rlang::set_names(names(cols)) |> + modifyList(n_reqs) + all_n_reqs <- all_n_reqs[names(cols)] + n_req_types <- c("*" = "any number", "+" = "at least one", "?" = "none or one", "integer" = "the specified number") all_n_req_types <- map_chr(all_n_reqs, function(req) { if (is_integerish(req)) return("integer") @@ -75,7 +82,7 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df }) if ( any(unknown <- map_lgl(all_n_req_types, is.na))) { n_req_unknown <- map_chr(all_n_reqs[unknown], as.character) - glue("unknown number requirement specification(s): '{collapse(n_req_unknown, \"', '\")}'. Allowed are: {collapse(names(n_req_types), ', ')}") %>% + glue("unknown number requirement specification(s): '{collapse(n_req_unknown, \"', '\")}'. Allowed are: {collapse(names(n_req_types), ', ')}") |> stop(call. = FALSE) } @@ -90,15 +97,16 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df ## report missing columns if (!all(col_meets_n_reqs)) { + missing_cols_names <- names(cols_exps)[!col_meets_n_reqs] n_errors <- sprintf("'%s%s' refers to %d column(s) instead of %s (%s)", - names(cols_exps)[!col_meets_n_reqs] %>% { ifelse(nchar(.) > 0, str_c(., " = "), .) }, + ifelse(nchar(missing_cols_names) > 0, str_c(missing_cols_names, " = "), missing_cols_names), map_chr(cols_exps[!col_meets_n_reqs], rlang::as_label), map_int(cols[!col_meets_n_reqs], length), n_req_types[all_n_req_types[!col_meets_n_reqs]], - map_chr(all_n_reqs[!col_meets_n_reqs], as.character)) %>% + map_chr(all_n_reqs[!col_meets_n_reqs], as.character)) |> collapse("\n- ") - glue("not all parameters refer to the correct number of columns in data frame '{df_name}':\n- {n_errors}") %>% + glue("not all parameters refer to the correct number of columns in data frame '{df_name}':\n- {n_errors}") |> stop(call. = FALSE) } @@ -111,12 +119,14 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df character = "text ()", logical = "logical ()") if (!all(ok <- unlist(type_reqs) %in% names(types))) { type_req_unknown <- unlist(type_reqs)[!ok] - glue("unknown type requirement specification(s): '{collapse(type_req_unknown, \"', '\")}'. Allowed are: {collapse(names(types), ', ')}") %>% + glue("unknown type requirement specification(s): '{collapse(type_req_unknown, \"', '\")}'. Allowed are: {collapse(names(types), ', ')}") |> stop(call. = FALSE) } # find type requirement problems - all_type_reqs <- rep(NA_character_, length(cols)) %>% as.list() %>% rlang::set_names(names(cols)) %>% modifyList(type_reqs) %>% { .[names(cols)] } + all_type_reqs <- rep(NA_character_, length(cols)) |> as.list() |> + rlang::set_names(names(cols)) |> modifyList(type_reqs) + all_type_reqs <- all_type_reqs[names(cols)] all_df_types <- map_chr(df, ~class(.x)[1]) col_meets_type_reqs <- map2_lgl(cols, all_type_reqs, function(col, req) { if (is.na(req)) return(TRUE) @@ -128,14 +138,15 @@ get_column_names <- function(df, ..., df_name = rlang::as_label(rlang::enexpr(df ## report type mismatches if (!all(col_meets_type_reqs)) { + mismatch_cols_names <- names(cols_exps)[!col_meets_type_reqs] n_errors <- sprintf("'%s%s' refers to column(s) of type '%s' instead of '%s'", - names(cols_exps)[!col_meets_type_reqs] %>% { ifelse(nchar(.) > 0, str_c(., " = "), .) }, + ifelse(nchar(mismatch_cols_names) > 0, str_c(mismatch_cols_names, " = "), mismatch_cols_names), map_chr(cols_exps[!col_meets_type_reqs], rlang::as_label), map_chr(cols[!col_meets_type_reqs], ~collapse(all_df_types[.x], "/")), - map_chr(all_type_reqs[!col_meets_type_reqs], ~types[.x])) %>% + map_chr(all_type_reqs[!col_meets_type_reqs], ~types[.x])) |> collapse("\n- ") - glue("not all parameters refer to the correct column types in data frame '{df_name}':\n- {n_errors}") %>% + glue("not all parameters refer to the correct column types in data frame '{df_name}':\n- {n_errors}") |> stop(call. = FALSE) } diff --git a/R/package.R b/R/package.R index c8e08765..7d83fea6 100644 --- a/R/package.R +++ b/R/package.R @@ -23,11 +23,6 @@ rlang::`!!` #' @export rlang::`!!!` -# re-export magrittr functions -#' @importFrom magrittr %>% -#' @export -magrittr::`%>%` - # re-export select/rename functions #' @export tidyselect::everything @@ -59,4 +54,10 @@ release_questions <- function() { c( "Is it passing travis, appveyor and win-builder?" ) -} \ No newline at end of file +} + +# deprecated documentation block +#' These functions have been deprecated and will be removed in future versions of isoreader. +#' @name deprecated +#' @param ... deprecated (passed on to new function if superseded) +NULL \ No newline at end of file diff --git a/R/plotting.R b/R/plotting.R deleted file mode 100644 index 2f96e473..00000000 --- a/R/plotting.R +++ /dev/null @@ -1,25 +0,0 @@ -# plotting functions (deprecated) ===== - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_plot_raw_data <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_plot_continuous_flow_data <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_plot_dual_inlet_data <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} diff --git a/R/problems.R b/R/problems.R index afcdddee..170b28ed 100644 --- a/R/problems.R +++ b/R/problems.R @@ -48,7 +48,7 @@ iso_get_problems <- function(iso_files, select = everything()) { select_cols <- get_column_names(probs, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select if (!"file_id" %in% select_cols) select_cols <- c("file_id", select_cols) # file info always included - return(dplyr::select(probs, !!!select_cols)) + return(dplyr::select(probs, dplyr::all_of(select_cols))) } #' @importFrom readr stop_for_problems @@ -72,13 +72,13 @@ iso_get_problems_summary <- function(iso_files, problem_files_only = TRUE, inclu # tally up problems probs_templ <- tibble(file_id = character(0), error = integer(0), warning = integer(0)) if (n_problems(iso_files) > 0) { - probs <- problems(iso_files) %>% + probs <- problems(iso_files) |> # tally up number of warnings/errors per file - group_by(.data$file_id, .data$type) %>% - tally() %>% - spread(.data$type, .data$n) %>% + group_by(.data$file_id, .data$type) |> + tally() |> + spread(.data$type, .data$n) |> # to ensure these columns exists - bind_rows(probs_templ) %>% + bind_rows(probs_templ) |> ungroup() } else { probs <- probs_templ @@ -88,12 +88,12 @@ iso_get_problems_summary <- function(iso_files, problem_files_only = TRUE, inclu # merge with file list to get all listed probs <- tibble( file_id = names(iso_files) - ) %>% + ) |> left_join(probs, by = "file_id") } # finalize data frame - probs <- probs %>% + probs <- probs |> mutate( warning = ifelse(!is.na(.data$warning), .data$warning, 0L), error = ifelse(!is.na(.data$error), .data$error, 0L) @@ -114,14 +114,14 @@ warn_problems <- function(x, cutoff = 5L, width = getOption("width")) { n <- n_problems(x) if (n == 0) return(invisible(x)) has_many_problems <- n > cutoff - probs <- iso_get_problems(x) %>% - dplyr::mutate(i = as.character(dplyr::row_number())) %>% - dplyr::select(c("i", "file_id", "type", "func", "details")) + probs <- iso_get_problems(x) |> + dplyr::mutate(i = as.character(dplyr::row_number())) |> + dplyr::select("i", "file_id", "type", "func", "details") probs_list <- base::rbind( c("#", "FILE", "PROBLEM", "OCCURRED IN", "DETAILS"), if (has_many_problems) utils::head(probs, cutoff - 1L) else probs - ) %>% as.list() + ) |> as.list() # add .... entries at end if there are too many problems if (has_many_problems) { @@ -135,7 +135,7 @@ warn_problems <- function(x, cutoff = 5L, width = getOption("width")) { probs_f <- purrr::map(probs_list, format, justify = "left") # format lines to account for max display width - probs_lines <- do.call(paste, c(probs_f, list(sep = " | "))) %>% + probs_lines <- do.call(paste, c(probs_f, list(sep = " | "))) |> stringr::str_trim(side = "right") line_widths <- purrr::map_int(probs_lines, nchar) probs_lines[line_widths > width] <- @@ -153,13 +153,17 @@ warn_problems <- function(x, cutoff = 5L, width = getOption("width")) { return(invisible(x)) } -#' Renamed to iso_filter_files_with_problems -#' -#' This function has been renamed to \link{iso_filter_files_with_problems} for naming consistency. -#' @param ... deprecated +#' @rdname deprecated +#' @details \code{iso_omit_files_with_problems}: use \link{iso_filter_files_with_problems} instead #' @export iso_omit_files_with_problems <- function(...) { - warning("iso_filter_files_with_problems() was renamed and will be removed in a future version of the isoreader package. Please use iso_filter_files_with_problems() directly instead to make your code future-proof.", immediate. = TRUE, call. = FALSE) + lifecycle::deprecate_warn( + "1.3.0", + "iso_omit_files_with_problems()", + "iso_filter_files_with_problems()", + details = "Function renamed for simplification.", + always = TRUE + ) iso_filter_files_with_problems(...) } @@ -181,9 +185,10 @@ iso_filter_files_with_problems <- function(iso_files, remove_files_with_errors = iso_files <- iso_as_file_list(iso_files) # find trouble file ids - trouble_files <- problems(iso_files) %>% - filter(.data$type %in% types) %>% - { unique(.$file_id) } + trouble_files <- problems(iso_files) |> + filter(.data$type %in% types) |> + dplyr::pull(.data$file_id) |> + unique() # exclude exclude <- names(iso_files) %in% trouble_files @@ -191,7 +196,7 @@ iso_filter_files_with_problems <- function(iso_files, remove_files_with_errors = sprintf("Info: removing %d/%d files that have any %s (keeping %d)", sum(exclude), length(iso_files), collapse(types, ", ", last = " or "), - length(iso_files) - sum(exclude)) %>% message() + length(iso_files) - sum(exclude)) |> message() } return(iso_files[!exclude]) } @@ -218,7 +223,7 @@ register_problem <- function(obj, type = NA_character_, details = NA_character_, } else { all_problems <- suppressWarnings(bind_rows(get_problems(obj), problem)) if (!keep_duplicates) all_problems <- unique(all_problems) - obj <- obj %>% set_problems(all_problems) + obj <- obj |> set_problems(all_problems) } return(obj) } @@ -261,7 +266,7 @@ get_problems <- function(x) { combined_problems <- function(...) { objs <- list(...) suppressWarnings( - lapply(objs, get_problems) %>% + lapply(objs, get_problems) |> bind_rows() ) } diff --git a/R/settings.R b/R/settings.R index 3afee8d5..7813c287 100644 --- a/R/settings.R +++ b/R/settings.R @@ -31,7 +31,7 @@ resolve_defaults <- function(q) { # set package setting, internal function, not exported set_default <- function(name, value, overwrite = TRUE) { if (overwrite || !str_c("isoreader.", name) %in% names(options())) - options(list(value) %>% rlang::set_names(str_c("isoreader.", name))) + options(list(value) |> rlang::set_names(str_c("isoreader.", name))) return(invisible(value)) } @@ -50,7 +50,7 @@ get_temp <- function(name, allow_null = TRUE) { #' @param value value of the temporary option #' @export set_temp <- function(name, value) { - options(list(value) %>% rlang::set_names(str_c("isoreader_temp.", name))) + options(list(value) |> rlang::set_names(str_c("isoreader_temp.", name))) return(invisible(value)) } @@ -71,12 +71,13 @@ get_all_options <- function(with_temp = FALSE) { #' @family settings functions #' @export iso_get_default_reader_parameters <- function() { - c("quiet", "cache", "cache_dir", "read_raw_data", "read_file_info", "read_method_info", "read_vendor_data_table") %>% - sapply(function(x) list(default(!!x))) %>% - { - tibble(parameter = names(.), - value = as.character(unlist(.))) - } + params <- c("quiet", "cache", "cache_dir", "read_raw_data", "read_file_info", "read_method_info", "read_vendor_data_table") |> + sapply(function(x) list(default(!!x))) + + tibble( + parameter = names(params), + value = as.character(unlist(params)) + ) } #' Turn caching on/off @@ -124,14 +125,14 @@ iso_set_default_read_parameters <- function(data = NULL, read_raw_data, read_fil # safety check if (!all(ok <- map_lgl(read_params, is.logical))){ - glue("read parameters must be TRUE or FALSE, provided: {collapse(as.character(unlist(read_params[!ok])), ', ')}") %>% + glue("read parameters must be TRUE or FALSE, provided: {collapse(as.character(unlist(read_params[!ok])), ', ')}") |> stop(call. = FALSE) } # info message if(!quiet) { params <- sprintf("%s = %s", names(read_params), read_params) - glue("Info: setting read parameter(s) '{collapse(params, \"', '\", last = \"' and '\")}'") %>% message() + glue("Info: setting read parameter(s) '{collapse(params, \"', '\", last = \"' and '\")}'") |> message() } # set values @@ -211,7 +212,7 @@ iso_turn_debug_on <- function(data = NULL, catch_errors = TRUE, cache = FALSE) { glue( "Info: debug mode turned on, ", "error catching turned {if(catch_errors) 'on' else 'off'}, ", - "caching turned {if(cache) 'on' else 'off'}") %>% + "caching turned {if(cache) 'on' else 'off'}") |> message() if (!missing(data)) return(data) } diff --git a/R/unit_conversion.R b/R/unit_conversion.R deleted file mode 100644 index f93625a5..00000000 --- a/R/unit_conversion.R +++ /dev/null @@ -1,20 +0,0 @@ -# unit conversion functions (deprecated) ---- - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_convert_time <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} - -#' moved to isoprocessor -#' -#' @param ... deprecated -#' @export -iso_convert_signals <- function(...) { - show_isoprocessor_migration_message(match.call()[[1]]) -} - - - diff --git a/R/units.R b/R/units.R index feb023c8..2615da7e 100644 --- a/R/units.R +++ b/R/units.R @@ -120,7 +120,7 @@ iso_make_units_explicit <- function(df, prefix = " [", suffix = "]") { #' iso_make_units_implicit(df) #' #' # convert back and forth -#' iso_make_units_implicit(df) %>% iso_make_units_explicit() +#' iso_make_units_implicit(df) |> iso_make_units_explicit() #' #' # implicit units from custom prefix & suffix #' df <- tibble(peak = 1:5, height.V = 1:5) @@ -139,8 +139,8 @@ iso_make_units_implicit <- function(df, prefix = " [", suffix = "]") { ends_with_suffix <- rep(TRUE, length(col_names)) col_names <- stringr::str_sub(col_names, 1L, -1L - ends_with_suffix * nchar(suffix)) prefix <- stringr::str_locate_all(col_names, fixed(prefix)) - prefix_start <- prefix %>% purrr::map(~.x[,1]) %>% purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) - prefix_end <- prefix %>% purrr::map(~.x[,2]) %>% purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) + prefix_start <- prefix |> purrr::map(~.x[,1]) |> purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) + prefix_end <- prefix |> purrr::map(~.x[,2]) |> purrr::map_int(~if(length(.x) == 0) { NA_integer_ } else { max(.x) }) has_units <- ends_with_suffix & !is.na(prefix_end) # update units @@ -162,7 +162,7 @@ check_units_identical <- function(x, y, warn_if_not = FALSE) { if (!check && warn_if_not) { glue::glue( "don't know how to reconcile different units '{iso_get_units(x)}' and ", - "'{iso_get_units(y)}', converting to double without units to continue") %>% + "'{iso_get_units(y)}', converting to double without units to continue") |> warning(call. = FALSE, immediate. = TRUE) } return(check) @@ -320,7 +320,7 @@ downcast_for_unknown_op <- function(op, x, y, warn = TRUE) { glue::glue( "don't know how to calculate <{vctrs::vec_ptype_full(x)}> {op} <{vctrs::vec_ptype_full(y)}>, ", "converting to double without units to continue" - ) %>% warning(call. = FALSE, immediate. = TRUE) + ) |> warning(call. = FALSE, immediate. = TRUE) } vctrs::vec_arith_base(op, x, y) } @@ -400,31 +400,31 @@ convert_df_units_attr_to_implicit_units <- function(df) { } # process units - units <- units %>% + units <- units |> # find out which columns are numeric dplyr::left_join( - purrr::map_lgl(df, is.numeric) %>% tibble::enframe("column", "numeric"), + purrr::map_lgl(df, is.numeric) |> tibble::enframe("column", "numeric"), by = "column" - ) %>% + ) |> filter(nchar(units) > 0) # info check if (nrow(problematic <- filter(units, !numeric)) > 0) { glue::glue("encountered non-numeric data table columns with units: ", "{paste(problematic$units, collapse = ', ')}. Only numeric column ", - "units can be preserved.") %>% + "units can be preserved.") |> warning(immediate. = TRUE, call. = FALSE) } # convert columns into double_with_units - units <- dplyr::filter(units, numeric) %>% - dplyr::mutate(units = stringr::str_remove(units, "^\\[") %>% stringr::str_remove("\\]$")) + units <- dplyr::filter(units, numeric) |> + dplyr::mutate(units = stringr::str_remove(units, "^\\[") |> stringr::str_remove("\\]$")) # construct the conversion quos unit_quos <- with(units, purrr::map2(column, units, - ~quo(iso_double_with_units(!!sym(.x), units = !!.y))) %>% + ~quo(iso_double_with_units(!!sym(.x), units = !!.y))) |> rlang::set_names(column)) # convert the units @@ -480,7 +480,7 @@ iso_format <- function(..., signif = 3, format_names = "%s: ", format_units="%s" # full text return( - do.call(paste, args = c(values, list(sep = sep))) %>% + do.call(paste, args = c(values, list(sep = sep))) |> stringr::str_replace_all(fixed("permil"), "\u2030") ) } diff --git a/R/utils.R b/R/utils.R index 1e082ce9..8c0b1244 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,24 @@ +# pipe (|>) compatible syntax helpers ======= + +# if x is not empty, apply `then` (with ... params), otherwise return `empty` value +if_not_empty_then <- function(x, then = identity, ..., empty) { + stopifnot(!missing(x), rlang::is_function(then), !missing(empty)) + if(!rlang::is_empty(x)) { + # not empty, run thention with it + return(then(x, ...)) + } else { + # return empty + return(empty) + } +} + +# if y is true, apply true_func (with ... params), if y is false, apply false func (no params) +if_y_then <- function(x, y, ..., true_func = identity, false_func = identity) { + stopifnot(!missing(x), !missing(y), rlang::is_function(true_func), rlang::is_function(false_func)) + if(y) return(true_func(x, ...)) + else return(false_func(x)) +} + # general helper functions =========== # check if a column is in a data frame @@ -27,7 +48,7 @@ col_check <- function(cols, data, fun = sys.call(-1), msg = "You may have to cha # helper function for showing a message via progress bar or logging it in log file (parallel) log_message <- function(..., type = "info", prefix = "Info: ", quiet = default(quiet)) { if (!quiet) { - msg <- purrr::map_chr(list(...), ~paste(format(.x), collapse = "\n")) %>% paste(collapse = "") + msg <- purrr::map_chr(list(...), ~paste(format(.x), collapse = "\n")) |> paste(collapse = "") pb <- get_temp("progress_bar", allow_null = TRUE) process <- get_temp("parallel_process", allow_null = FALSE) if (!is.na(process)) { @@ -35,7 +56,7 @@ log_message <- function(..., type = "info", prefix = "Info: ", quiet = default(q log_file <- get_temp("parallel_log_file") if (!is.null(log_file)) { sprintf("\"%s\",%d,\"%s\"\n", type, process, - str_replace_all(msg, fixed("\""), "\\\"")) %>% + str_replace_all(msg, fixed("\""), "\\\"")) |> cat(file = log_file, append = TRUE) } } else if (!is.null(pb) && !pb$finished) { @@ -86,7 +107,7 @@ setup_parallel_logs <- function() { if (default(debug)) { glue::glue("\n\nDEBUG (log files will not be deleted afer run):\n\t", - "log file path '{log}'\n\tprogress file path '{progress}'") %>% + "log file path '{log}'\n\tprogress file path '{progress}'") |> message() } } @@ -142,15 +163,15 @@ process_parallel_logs <- function(status) { } else if (nrow(logs) > 0) { # display logs status$log_n <- status$log_n + nrow(logs) - logs %>% + display_logs <- logs |> mutate( X2 = as.character(.data$X2), prefix = case_when( X1 == "info" ~ sprintf("Info (process %s): ", .data$X2), X1 == "warning" ~ sprintf("Warning (process %s): ", .data$X2), TRUE ~ sprintf("Process %s: ", .data$X2) - )) %>% - { purrr::walk2(.$X3, .$prefix, ~log_message(.x, prefix = .y)) } + )) + purrr::walk2(display_logs$X3, display_logs$prefix, ~log_message(.x, prefix = .y)) } } @@ -187,7 +208,7 @@ cleanup_parallel_logs <- function() { iso_get_reader_example <- function(filename) { filepath <- system.file(package = "isoreader", "extdata", filename) if(!file.exists(filepath)) - sprintf("The example file '%s' does not exist. Please use iso_get_reader_examples() to see a list of all available example files.", filename) %>% + sprintf("The example file '%s' does not exist. Please use iso_get_reader_examples() to see a list of all available example files.", filename) |> stop(call. = FALSE) return(filepath) } @@ -200,11 +221,11 @@ iso_get_reader_example <- function(filename) { iso_get_reader_examples <- function() { file_types <- iso_get_supported_file_types() iso_expand_paths( - ".", extensions = file_types$extension, root = system.file(package = "isoreader", "extdata")) %>% - mutate(filename = basename(.data$path)) %>% - match_to_supported_file_types(file_types) %>% - arrange(.data$type, .data$extension, .data$filename) %>% - select(.data$filename, .data$type, .data$software, .data$description) + ".", extensions = file_types$extension, root = system.file(package = "isoreader", "extdata")) |> + mutate(filename = basename(.data$path)) |> + match_to_supported_file_types(file_types) |> + arrange(.data$type, .data$extension, .data$filename) |> + select("filename", "type", "software", "description") } #' @rdname iso_get_reader_example @@ -312,16 +333,16 @@ find_common_different_from_start <- function(vectors, empty = character(0)) { map2( 1:length(vectors), vectors, ~tibble(v = .x, i = 1:length(.y), entry = .y) - ) %>% + ) |> bind_rows() # common segments - commons <- vectors %>% - filter(i <= min_length) %>% - group_by(i) %>% - summarize(same = all(entry == entry[1])) %>% - arrange(i) %>% - mutate(diff = cumsum(abs(c(same[1] == FALSE,diff(!same))))) %>% + commons <- vectors |> + filter(i <= min_length) |> + group_by(i) |> + summarize(same = all(entry == entry[1])) |> + arrange(i) |> + mutate(diff = cumsum(abs(c(same[1] == FALSE,diff(!same))))) |> filter(diff == 0) # common vector @@ -330,20 +351,20 @@ find_common_different_from_start <- function(vectors, empty = character(0)) { # differences vector different <- - filter(vectors, !i %in% commons$i) %>% - select(v, entry) %>% - nest(data = c(-v)) %>% + filter(vectors, !i %in% commons$i) |> + select("v", "entry") |> + nest(data = c(-v)) |> full_join(tibble( v = unique(vectors$v), - empty = list(entry = empty)), by = "v") %>% + empty = list(entry = empty)), by = "v") |> mutate( missing = map_lgl(data, is.null), data = map2(missing, data, ~if(.x) { NULL } else { .y$entry }), result = ifelse(missing, empty, data) - ) %>% - select(v, result) %>% - arrange(v) %>% - tibble::deframe() %>% + ) |> + select("v", "result") |> + arrange(v) |> + tibble::deframe() |> unname() return( @@ -372,7 +393,7 @@ get_path_segments <- function(path) { # unlist paths unlist_paths <- function(path_list) { if (!all(ok <- purrr::map_lgl(path_list, is.character))) { - not_ok <- path_list[!ok] %>% purrr::map_chr(~class(.x)[1]) + not_ok <- path_list[!ok] |> purrr::map_chr(~class(.x)[1]) stop("paths must be character vectors, encountered: ", paste(not_ok, collapse = ", "), call. = FALSE) } unlist(path_list, use.names = FALSE) @@ -401,8 +422,9 @@ iso_expand_paths <- function(path, extensions = c(), root = ".") { # extensions check if(length(extensions) == 0) stop("no extensions provided for retrieving file paths", call. = FALSE) - pattern <- extensions %>% str_replace_all("\\.", "\\\\.") %>% str_c(collapse = "|") %>% { str_c("(", ., ")$") } - paths <- paths %>% + pattern <- extensions |> str_replace_all("\\.", "\\\\.") |> str_c(collapse = "|") + pattern <- str_c("(", pattern, ")$") + paths <- paths |> mutate( is_dir = dir.exists(full_path), has_ext = ifelse(is_dir, TRUE, str_detect(basename(full_path), pattern)) @@ -410,15 +432,15 @@ iso_expand_paths <- function(path, extensions = c(), root = ".") { if (!all(paths$has_ext)) { stop("some file(s) do not have one of the supported extensions (", str_c(extensions, collapse = ", "), - "):\n\t", with(paths, path[!has_ext]) %>% str_c(collapse = "\n\t"), call. = FALSE) + "):\n\t", with(paths, path[!has_ext]) |> str_c(collapse = "\n\t"), call. = FALSE) } # retrieve all the files filepaths <- - paths %>% - filter(is_dir) %>% - mutate(file = map(full_path, ~list.files(.x, pattern = pattern, recursive = TRUE, include.dirs = FALSE))) %>% - unnest(file) + paths |> + filter(is_dir) |> + mutate(file = map(full_path, ~list.files(.x, pattern = pattern, recursive = TRUE, include.dirs = FALSE))) |> + unnest("file") if (nrow(filepaths) > 0) filepaths <- mutate(filepaths, path = file.path(path, file)) @@ -427,10 +449,10 @@ iso_expand_paths <- function(path, extensions = c(), root = ".") { paths <- bind_rows( filter(paths, !is_dir), - select(filepaths, i, root, path) - ) %>% - arrange(i) %>% - select(root, path) %>% + select(filepaths, "i", "root", "path") + ) |> + arrange(i) |> + select("root", "path") |> unique() # make sure all unique files # double check that filenames are unique @@ -438,7 +460,7 @@ iso_expand_paths <- function(path, extensions = c(), root = ".") { if (anyDuplicated(filenames)) { dups <- duplicated(filenames) | duplicated(filenames, fromLast = T) warning("some files from different folders have identical file names:\n\t", - paths$path[dups] %>% str_c(collapse = "\n\t"), immediate. = TRUE, call. = FALSE) + paths$path[dups] |> str_c(collapse = "\n\t"), immediate. = TRUE, call. = FALSE) } return(paths) @@ -496,39 +518,39 @@ iso_shorten_relative_paths <- function(path, root = ".") { root = root, absolute = R.utils::isAbsolutePath(path), path_folders = map(path, get_path_segments) - ) %>% + ) |> # put roots into working directory context if possible - group_by(root) %>% + group_by(root) |> mutate( root_folders_all = map(root[1], get_path_segments), root_folders_rel = find_common_different_from_start(c(list(wd_folders), root_folders_all[1]))$different[-1], root_folders = if (has_common_start(root_folders_all[1], wd_folders)) root_folders_rel else root_folders_all - ) %>% + ) |> ungroup() # shorten relative paths - rel_paths <- paths %>% filter(!absolute) + rel_paths <- paths |> filter(!absolute) if (nrow(rel_paths) > 0) { - rel_paths <- rel_paths %>% + rel_paths <- rel_paths |> # shorten with most possible overlap - group_by(root, path) %>% + group_by(root, path) |> mutate( root_folders = list(find_common_different_from_start(c(root_folders[1], path_folders[1]))$common), path_folders = find_common_different_from_start(c(root_folders[1], path_folders[1]))$different[-1] - ) %>% - ungroup() %>% + ) |> + ungroup() |> # assmple paths mutate( - path = path_folders %>% map_chr( + path = path_folders |> map_chr( ~if(length(.x) == 0) { "." } else { do.call(file.path, args = as.list(.x))}) ) } # return all - paths <- bind_rows(rel_paths, filter(paths, absolute)) %>% arrange(i) %>% + paths <- bind_rows(rel_paths, filter(paths, absolute)) |> arrange(i) |> # simplify root path - mutate(root = root_folders %>% map_chr(~if(length(.x) == 0) { "." } else { do.call(file.path, args = as.list(.x))})) - return(select(paths, root, path)) + mutate(root = root_folders |> map_chr(~if(length(.x) == 0) { "." } else { do.call(file.path, args = as.list(.x))})) + return(select(paths, "root", "path")) } #' Find roots for absolute paths @@ -551,15 +573,15 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU paths <- get_paths_data_frame(path, root, check_existence = check_existence) # process absolute paths - abs_paths <- paths %>% filter(absolute) + abs_paths <- paths |> filter(absolute) if (nrow(abs_paths) > 0) { # determine root folders - abs_paths <- abs_paths %>% + abs_paths <- abs_paths |> # get path folders - mutate(path_folders = ifelse(is_dir, full_path, dirname(full_path)) %>% map(get_path_segments)) %>% + mutate(path_folders = ifelse(is_dir, full_path, dirname(full_path)) |> map(get_path_segments)) |> # get root folders - group_by(root) %>% + group_by(root) |> mutate( rel_root_folders = map(root, get_path_segments), abs_root_folders = map2( @@ -567,15 +589,15 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU ~if(R.utils::isAbsolutePath(.x)) { .y } else { get_path_segments(file.path(getwd(), .x)) } ), has_rel_root = has_common_start(path_folders, abs_root_folders[[1]]) - ) %>% + ) |> ungroup() # absolute paths that share relative root - abs_rel_paths <- abs_paths %>% filter(has_rel_root) + abs_rel_paths <- abs_paths |> filter(has_rel_root) if (nrow(abs_rel_paths) > 0) { - abs_rel_paths <- abs_rel_paths %>% - group_by(root) %>% - mutate(new_path = find_common_different_from_start(c(abs_root_folders[1], path_folders))$different[-1]) %>% + abs_rel_paths <- abs_rel_paths |> + group_by(root) |> + mutate(new_path = find_common_different_from_start(c(abs_root_folders[1], path_folders))$different[-1]) |> ungroup() } @@ -583,7 +605,7 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU abs_paths <- filter(abs_paths, !has_rel_root) if (nrow(abs_paths) > 0) { common_diff <- find_common_different_from_start(abs_paths$path_folders) - abs_paths <- abs_paths %>% + abs_paths <- abs_paths |> mutate( new_path = common_diff$different, root = do.call(file.path, args = as.list(common_diff$common)) @@ -592,7 +614,7 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU # reassemble absolute paths abs_paths <- - bind_rows(abs_paths, abs_rel_paths) %>% + bind_rows(abs_paths, abs_rel_paths) |> # expand the paths mutate( path = @@ -607,16 +629,16 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU return(".") # current directory else return(path) - }) %>% + }) |> # combine into file path map_chr(~do.call(file.path, args = as.list(.x))) ) } # combine all - paths <- bind_rows(abs_paths, filter(paths, !absolute)) %>% arrange(i) + paths <- bind_rows(abs_paths, filter(paths, !absolute)) |> arrange(i) - return(select(paths, root, path)) + return(select(paths, "root", "path")) } # file extensions ====== @@ -624,14 +646,14 @@ iso_find_absolute_path_roots <- function(path, root = ".", check_existence = TRU # get file extension get_file_ext <- function(filepath) { - basename(filepath) %>% str_extract("\\.[^.]+$") + basename(filepath) |> str_extract("\\.[^.]+$") } # match file extension # returns the longest extension that matches match_file_ext <- function(filepath, extensions) { - exts_regexp <- extensions %>% stringr::str_to_lower() %>% - stringr::str_replace_all("\\.", "\\\\.") %>% str_c("$") + exts_regexp <- extensions |> stringr::str_to_lower() |> + stringr::str_replace_all("\\.", "\\\\.") |> str_c("$") exts <- extensions[str_detect(stringr::str_to_lower(filepath), exts_regexp)] if (length(exts) == 0) return(NA_character_) else return(exts[stringr::str_length(exts) == max(stringr::str_length(exts))][1]) @@ -648,21 +670,21 @@ match_to_supported_file_types <- function(filepaths_df, extensions_df) { path <- .ext_exists <- NULL files <- - filepaths_df %>% - mutate(extension = map_chr(path, match_file_ext, extensions_df$extension)) %>% + filepaths_df |> + mutate(extension = map_chr(path, match_file_ext, extensions_df$extension)) |> left_join(mutate(extensions_df, .ext_exists = TRUE), by = "extension") # safety check if ( nrow(missing <- dplyr::filter(files, is.na(.ext_exists))) > 0) { - exts <- missing$path %>% get_file_ext() %>% unique() %>% str_c(collapse = ", ") + exts <- missing$path |> get_file_ext() |> unique() |> str_c(collapse = ", ") glue::glue( "unexpected file extension(s): {exts} ", "(expected one of the following: ", - "{str_c(extensions_df$extension, collapse = ', ')})") %>% + "{str_c(extensions_df$extension, collapse = ', ')})") |> stop(call. = FALSE) } - return(dplyr::select(files, -.ext_exists)) + return(dplyr::select(files, -".ext_exists")) } # function execution with error catching ===== @@ -673,7 +695,7 @@ match_to_supported_file_types <- function(filepaths_df, extensions_df) { # @note: maybe could use tidyverse::safely for this at some point? exec_func_with_error_catch <- function(func, obj, ..., env = asNamespace("isoreader"), msg_prefix = "") { if (is.character(func)) func_name <- func - else func_name <- substitute(func) %>% deparse() + else func_name <- substitute(func) |> deparse() if (!default("catch_errors")) { # debug mode, don't catch any errors obj <- do.call(func, args = c(list(obj), list(...)), envir = env) @@ -697,7 +719,7 @@ find_parent_call <- function(current_func) { calls <- sapply(calls, as.character) is_trycatch <- sapply(calls, function(x) any(str_detect(x, "tryCatch"))) calls <- calls[!is_trycatch] - has_func <- sapply(calls, function(x) any(str_detect(x, current_func))) %>% which() + has_func <- sapply(calls, function(x) any(str_detect(x, current_func))) |> which() if (has_func[1] == 1) return("") # called from top-level calls[[has_func[1] - 1]][1] } @@ -748,19 +770,6 @@ get_info_message_concat <- function(variable, prefix = "", suffix = "", empty = } -# migration to isoprocessor ==== - -# migration message for function moved to isoprocessor -show_isoprocessor_migration_message <- function(func) { - glue::glue("as of isoreader version 1.0, '{func}' has moved to ", - "the isoprocessor package (isoprocessor.isoverse.org) to re-focus ", - "isoreader on its core functionality. Please install and load ", - "isoprocessor to access this function:\n", - "--> devtools::install_github(\"isoverse/isoprocessor\") # install\n", - "--> library(isoprocessor) # load") %>% - stop(call. = FALSE) -} - # testing utilities ==== # utility function to get a test file path diff --git a/R/utils_binary_files.R b/R/utils_binary_files.R index 28349483..e3c2d9a1 100644 --- a/R/utils_binary_files.R +++ b/R/utils_binary_files.R @@ -111,7 +111,7 @@ cap_at_pos <- function(bfile, pos) { # set pos and cap set_pos_and_cap <- function(bfile, pos, max) { - move_to_pos(bfile, pos = pos, reset_cap = TRUE) %>% + move_to_pos(bfile, pos = pos, reset_cap = TRUE) |> cap_at_pos(pos = max) } diff --git a/R/utils_binary_isodat_files.R b/R/utils_binary_isodat_files.R index 914fa5ae..36b9c049 100644 --- a/R/utils_binary_isodat_files.R +++ b/R/utils_binary_isodat_files.R @@ -22,9 +22,9 @@ read_binary_isodat_file <- function(filepath) { # read file bfile <- read_binary_file(filepath, bfile = template_binary_isodat_file_object()) - # find C_blocks + # find structure blocks bfile$blocks <- find_isodat_structure_blocks(bfile) - bfile$C_blocks <- dplyr::filter(bfile$blocks, type == "C block") + bfile$C_blocks <- dplyr::filter(bfile$blocks, .data$type == "C block") return(bfile) } @@ -73,16 +73,16 @@ fetch_block_idx <- function(bfile, filter = NULL, type = NULL, block = NULL, blo # get block ids block_idx <- - bfile$blocks %>% - { if(!is.null(min_pos)) dplyr::filter(., .data$start >= min_pos) else . } %>% - { if(!is.null(max_pos)) dplyr::filter(., .data$end <= max_pos) else . } %>% - { if(!is.null(min_block_idx)) dplyr::filter(., .data$block_idx >= min_block_idx) else . } %>% - { if(!is.null(max_block_idx)) dplyr::filter(., .data$block_idx <= max_block_idx) else . } %>% - { if (!rlang::quo_is_null(filter_quo)) dplyr::filter(., !!filter_quo) else . } %>% - { if (!is.null(type)) dplyr::filter(., .data$type %in% !!type) else . } %>% - { if (!is.null(block) && !block_regex_match) dplyr::filter(., .data$block %in% !!block) else . } %>% - { if (!is.null(block) && block_regex_match) dplyr::filter(., stringr::str_detect(.data$block, !!block)) else . } %>% - { if (!is.null(occurence)) dplyr::filter(., dplyr::row_number() %in% occurence) else . } %>% + bfile$blocks |> + if_y_then(!is.null(min_pos), true_func = dplyr::filter, .data$start >= !!min_pos) |> + if_y_then(!is.null(max_pos), true_func = dplyr::filter, .data$end <= !!max_pos) |> + if_y_then(!is.null(min_block_idx), true_func = dplyr::filter, .data$block_idx >= !!min_block_idx) |> + if_y_then(!is.null(max_block_idx), true_func = dplyr::filter, .data$block_idx <= !!max_block_idx) |> + if_y_then(!rlang::quo_is_null(filter_quo), true_func = dplyr::filter, !!filter_quo) |> + if_y_then(!is.null(type), true_func = dplyr::filter, .data$type %in% !!type) |> + if_y_then(!is.null(block) && !block_regex_match, true_func = dplyr::filter, .data$block %in% !!block) |> + if_y_then(!is.null(block) && block_regex_match, true_func = dplyr::filter, stringr::str_detect(.data$block, !!block)) |> + if_y_then(!is.null(occurence), true_func = dplyr::filter, dplyr::row_number() %in% !!occurence) |> dplyr::pull(block_idx) # check requirements @@ -120,7 +120,7 @@ fetch_block_idx <- function(bfile, filter = NULL, type = NULL, block = NULL, blo # get the block where the binary file is currently at fetch_current_block_idx <- function(bfile, pos = bfile$pos) { - fetch_block_idx(bfile, filter = start <= !!pos & !!pos <= end, occurence = 1) + fetch_block_idx(bfile, filter = .data$start <= !!pos & !!pos <= .data$end, occurence = 1) } # retrieve block entry/entries @@ -138,8 +138,8 @@ fetch_block_entry <- function(bfile, ..., block_idx = NULL) { # @param reset_cap whether to reset the cap # @param update_current_nav_block whether to update the navigation block position (typically/default yes) # @param ... additional parameters passed to fetch_block_idx (e.g. for using regex matching or a broader filter criterion) -# move_to_control_block(my_test$binary, "NO") -# move_to_control_block(my_test$binary, "CData", occurence = 2) +# move_to_control_block(my_test$source, "NO") +# move_to_control_block(my_test$source, "CData", occurence = 2) move_to_control_block <- function(bfile, block = NULL, type = "C block", min_pos = 1, occurence = 1, require_n = 1, move_to_end = FALSE, reset_cap = TRUE, update_current_nav_block = TRUE, ...) { # fetch right C block @@ -199,8 +199,8 @@ fetch_C_block <- function(bfile, C_block, min_pos = 1, occurence = NULL, regexp_ # @inheritParams fetch_C_block # @param reset_cap whether to reset the cap # @FIXME: testing -# move_to_C_block(my_test$binary, "NO") -# move_to_C_block(my_test$binary, "CData", occurence = 2) +# move_to_C_block(my_test$source, "NO") +# move_to_C_block(my_test$source, "CData", occurence = 2) move_to_C_block <- function(bfile, C_block, min_pos = 1, occurence = 1, move_to_end = TRUE, reset_cap = TRUE, regexp_match = FALSE) { # fetch right C block cblock <- fetch_C_block(bfile, C_block, min_pos = min_pos, occurence = occurence, regexp_match = regexp_match) @@ -246,8 +246,8 @@ move_to_C_block_range <- function(bfile, from_C_block, to_C_block, min_pos = 1){ } # move to blocks - bfile %>% - move_to_C_block(from_C_block, min_pos = min_pos, reset_cap = TRUE) %>% + bfile |> + move_to_C_block(from_C_block, min_pos = min_pos, reset_cap = TRUE) |> cap_at_C_block(to_C_block, min_pos = min_pos) } @@ -340,7 +340,7 @@ re_control <- function(raw) { structure( list( label = sprintf("{%s}", str_c(ctrls, collapse = " ")), - regexp = hex[ctrls] %>% str_c(collapse = ""), + regexp = hex[ctrls] |> str_c(collapse = ""), size = length(raw) ), class = "binary_regexp") @@ -355,7 +355,7 @@ re_unicode <- function(text) { structure( list( label = sprintf("{%s}", text), - regexp = hex[as.character(charToRaw(text))] %>% str_c("\\x00") %>% str_c(collapse = ""), + regexp = hex[as.character(charToRaw(text))] |> str_c("\\x00") |> str_c(collapse = ""), size = 2*nchar(text) ), class = "binary_regexp") @@ -460,10 +460,12 @@ move_to_next_pattern <- function(bfile, ..., max_gap = NULL, move_to_end = TRUE) sprintf("could not find '%s'%s in search interval %.0f to %.0f, found '%s...'", regexps$label, gap_text, bfile$pos, bfile$max_pos, - bfile %>% - map_binary_structure(length = regexps$size + - (if(!is.null(max_gap)) max_gap else 50) + 10) %>% - generate_binary_structure_map_printout())) + bfile |> + iso_print_source_file_structure( + length = regexps$size + (if(!is.null(max_gap)) max_gap else 50) + 10 + ) + ) + ) } # cap at next regular expression pattern @@ -491,10 +493,12 @@ cap_at_next_pattern <- function(bfile, ..., max_gap = NULL) { sprintf("could not find '%s'%s in search interval %.0f to %.0f, found '%s...'", regexps$label, gap_text, bfile$pos, bfile$max_pos, - bfile %>% - map_binary_structure(length = regexps$size + - (if(!is.null(max_gap)) max_gap else 50) + 10) %>% - generate_binary_structure_map_printout())) + bfile |> + iso_print_source_file_structure( + length = regexps$size + (if(!is.null(max_gap)) max_gap else 50) + 10 + ) + ) + ) } # capture data block data in specified type @@ -627,7 +631,7 @@ find_C_blocks <- function(raw) { id2 = as.character(readBin(x[5], "raw")), block = str_c(readBin(x[7:length(x)], "character", n = length(x) - 7), collapse = "") ) - }) %>% bind_rows() %>% + }) |> bind_rows() |> # byte positions mutate( start = re_positions, @@ -688,17 +692,15 @@ get_ctrl_blocks_config <- function() { # helper function to get the control blokcs as a data frame get_ctrl_blocks_config_df <- function() { - get_ctrl_blocks_config() %>% - { - tibble( - block = names(.), - regexp = map_chr(., "regexp"), - hexadecimal = map_chr( - ., - ~.x$regexp %>% charToRaw() %>% as.character() %>% paste(collapse = " ") - ) - ) - } + blocks <- get_ctrl_blocks_config() + tibble( + block = names(blocks), + regexp = map_chr(blocks, "regexp"), + hexadecimal = map_chr( + blocks, + ~.x$regexp |> charToRaw() |> as.character() |> paste(collapse = " ") + ) + ) } # get configuration information for the data blocks @@ -794,13 +796,16 @@ parse_raw_data <- function(raw, type, n = full_raw(), ignore_trailing_zeros = FA non_text_pos <- grepRaw("([\x20-\xff][\x01-\xff])|(\\x00\\x00)", raw_trim) actual_text <- intToUtf8(raw_trim[1:(non_text_pos - 1)]) if (!is.null(errors)) { + err_raw <- raw_trim[(non_text_pos + 1):length(raw_trim)] stop( sprintf("%sexpected unicode data for %.0f bytes but found only %.0f ('%s'), non-text raw data afterwards: %s", error_prefix, n*2, non_text_pos, actual_text, - raw_trim[(non_text_pos + 1):length(raw_trim)] %>% { - if (length(.) > 10) c(as.character(head(., 10)), sprintf("+%d more", length(.) - 10)) - else as.character(.) - } %>% paste(collapse = " ") + if (length(err_raw) > 10) + c(as.character(head(err_raw, 10)), sprintf("+%d more", length(err_raw) - 10)) |> + paste(collapse = " ") + else + as.character(err_raw) |> + paste(collapse = " ") ), call. = FALSE) } @@ -809,12 +814,12 @@ parse_raw_data <- function(raw, type, n = full_raw(), ignore_trailing_zeros = FA } # process data - type_bytes <- seq_along(dbc) %>% rep(times = map_int(dbc, "size")) %>% rep(times = n) + type_bytes <- seq_along(dbc) |> rep(times = map_int(dbc, "size")) |> rep(times = n) data <- list() for (i in 1:length(dbc)) { if (dbc[[i]]$type == "character") { - parsed_data <- raw[type_bytes == i] %>% intToUtf8() + parsed_data <- raw[type_bytes == i] |> intToUtf8() } else { parsed_data <- readBin(raw[type_bytes == i], what = dbc[[i]]$type, size = dbc[[i]]$size, n = n) } @@ -832,8 +837,8 @@ parse_raw_data <- function(raw, type, n = full_raw(), ignore_trailing_zeros = FA # check all data for (i in 1:length(type)) { - if (class(data[[i]]) != class(sensible[[i]]) && - class(data[[i]]) != "integer" && class(sensible[[i]]) != "numeric" ) # allow integer to numeric comparison + if (!inherits(data[[i]], class(sensible[[i]])) && + !is(data[[i]], "integer") && !is(sensible[[i]], "numeric") ) # allow integer to numeric comparison stop(sprintf("%scannot compare data (%s) to expected values (%s), data type mismatch", error_prefix, class(data[[i]]), class(sensible[[i]])), call. = FALSE) if (is.character(sensible[[i]]) && !all(good_data <- str_detect(sensible[[i]], data[[i]]))) @@ -865,7 +870,7 @@ remove_trailing_zeros <- function(raw, size) { # find number of trailing 0s (i.e. everything after the highest byte that is not 00) is_null_block <- raw == as.raw(0) if (any(is_null_block == FALSE)) - trailing_00s <- is_null_block %>% { length(.) - max(which(. == FALSE)) } + trailing_00s <- length(is_null_block) - max(which(is_null_block == FALSE)) else trailing_00s <- length(is_null_block) @@ -893,35 +898,37 @@ remove_trailing_zeros <- function(raw, size) { # @TODO: write tests # @NOTE: speed optimized -#' returns a tibble with control blacks for isodat files -#' @return tibble with control blocks for isodat +# returns a tibble with control blacks for isodat files +# @return tibble with control blocks for isodat get_isodat_control_blocks_config <- function() { + # global vars + regex <- NULL bind_rows( # C blocks list( type = "C block", regex = "\xff\xff(\\x00|[\x01-\x0f])\\x00.\\x00\x43[\x20-\x7e]", - start_expr = rlang::exprs(pos), + start_expr = rlang::exprs(.data$pos), len_expr = rlang::exprs( - 6L + readBin(raw[rep(start, each = 2) + c(4,5)], "int", size = 2, n = length(start)) + 6L + readBin(raw[rep(.data$start, each = 2) + c(4,5)], "int", size = 2, n = length(.data$start)) ), - data_len_expr = rlang::exprs(len - 6L), + data_len_expr = rlang::exprs(.data$len - 6L), block_expr = rlang::exprs( - purrr::map2_chr(start + 5L, len - 6L, ~intToUtf8(raw[.x+c(1L:.y)])) + purrr::map2_chr(.data$start + 5L, .data$len - 6L, ~intToUtf8(raw[.x+c(1L:.y)])) ) ), # text blocks list( type = "text", regex = "\xff\xfe\xff", - start_expr = rlang::exprs(pos), + start_expr = rlang::exprs(.data$pos), len_expr = rlang::exprs( - 4L + readBin(raw[start + 3L], "int", size = 1,n = length(start)) * 2L + 4L + readBin(raw[.data$start + 3L], "int", size = 1,n = length(.data$start)) * 2L ), - data_len_expr = rlang::exprs((len - 4L)/2L), + data_len_expr = rlang::exprs((.data$len - 4L)/2L), block_expr = rlang::exprs( purrr::map2_chr( - start + 3L, len - 4L, + .data$start + 3L, .data$len - 4L, ~if (.y > 0L) { intToUtf8(readBin(raw[.x + c(1:.y)], "int", n = .y/2L, size = 2)) } else { @@ -934,32 +941,32 @@ get_isodat_control_blocks_config <- function() { list( type = "x-000", regex = "[\x01-\x1f]\\x00{3}", - start_expr = rlang::exprs(pos), + start_expr = rlang::exprs(.data$pos), len_expr = rlang::exprs(4L), data_len_expr = rlang::exprs(0L), - block_expr = rlang::exprs(sprintf("%s-000", raw[start])) + block_expr = rlang::exprs(sprintf("%s-000", raw[.data$start])) ), # 0000+ blocks (zeros in multiples of 2, at least 4 at a time ending in a non-zero) list( type = "0000+", regex = "(\\x00\\x00){2,}[\x01-\xff]", - start_expr = rlang::exprs(pos), + start_expr = rlang::exprs(.data$pos), len_expr = rlang::exprs(lengths(grepRaw(regex, raw, all = TRUE, value = TRUE)) - 1L), data_len_expr = rlang::exprs(0L), - block_expr = rlang::exprs(sprintf("%dx00", len)) + block_expr = rlang::exprs(sprintf("%dx00", .data$len)) ) ) } # @TODO: write tests -#' find regular expression pattern and turn into a block tibble -#' @param raw binary vector -#' @param regular expression to match -#' @param start_expr expression to calculate starting point (relative to the regexp match var 'pos') -#' @param len_expr expression to calculate the length of the block -#' @param data_len_expr expression to calculate the length of the data in the block -#' @param block_expr expression to construct the block text +# find regular expression pattern and turn into a block tibble +# @param raw binary vector +# @param regex regular expression to match +# @param start_expr expression to calculate starting point (relative to the regexp match var 'pos') +# @param len_expr expression to calculate the length of the block +# @param data_len_expr expression to calculate the length of the data in the block +# @param block_expr expression to construct the block text find_pattern_blocks <- function(raw, regex, start_expr, len_expr, data_len_expr, block_expr) { # safety checks stopifnot(rlang::is_expression(start_expr)) @@ -975,45 +982,44 @@ find_pattern_blocks <- function(raw, regex, start_expr, len_expr, data_len_expr, pos = re_positions, start = rlang::eval_tidy(start_expr), len = rlang::eval_tidy(len_expr), - end = start + len - 1L, + end = .data$start + .data$len - 1L, data_len = rlang::eval_tidy(data_len_expr), block = rlang::eval_tidy(block_expr) - ) %>% - filter(len > 0) + ) |> + filter(.data$len > 0) } # @TODO: write tests -# @NOTE: speed optimized -#' find unknown patterns and turn into a block tibble -#' @param raw binary vector -#' @param blocks tibble with identified blocks, must have columns start & end +# find unknown patterns and turn into a block tibble (speed optimized) +# @param raw binary vector +# @param blocks tibble with identified blocks, must have columns start & end find_unknown_blocks <- function(raw, blocks) { # blocks inbetween the identified ones - blocks <- arrange(blocks, start) - max <- 8 + blocks <- arrange(blocks, .data$start) tibble( type = "unknown", start = c(1L, blocks$end + 1L), end = c(blocks$start - 1L, length(raw)), - len = end - start + 1L, - data_len = len, + len = .data$end - .data$start + 1L, + data_len = .data$len, priority = max(blocks$priority) + 1L, block = NA_character_ - ) %>% - filter(len > 0) + ) |> + filter(.data$len > 0) } -#' updates block information for unknown blocks +# updates block information for unknown blocks +# @param unknown_block_n_chars how many characters before abbreviating with ... get_unknown_blocks_text <- function(blocks, raw, unknown_block_n_chars = 8L) { # block text for unknown blocks - blocks %>% + blocks |> mutate( block = dplyr::case_when( type == "unknown" & start + unknown_block_n_chars < length(raw) ~ rlang::eval_tidy(rlang::expr(paste(!!!map( 0:(unknown_block_n_chars - 1L), ~ rlang::expr(raw[start+!!.x]) - ), "..."))) %>% + ), "..."))) |> stringr::str_sub(end = data_len * 3L - 1L), TRUE ~ block ) @@ -1022,64 +1028,65 @@ get_unknown_blocks_text <- function(blocks, raw, unknown_block_n_chars = 8L) { # @TODO: write tests -#' find all isodat structure blocks -#' @param bfile the isodat binary file object (must have $raw set) -#' @param unknown_block_n_chars the number of chars to preview as 'block' text in the resulting tibble +# find all isodat structure blocks - main function called by read_binary_isodat_file +# @param bfile the isodat binary file object (must have $raw set) +# @param unknown_block_n_chars the number of chars to preview as 'block' text in the resulting tibble find_isodat_structure_blocks <- function(bfile, unknown_block_n_chars = 8L) { # safety checks if (!is(bfile, "binary_isodat_file")) stop("this function is for isodat binary files only", call. = FALSE) ctrl_blocks <- - get_isodat_control_blocks_config() %>% + get_isodat_control_blocks_config() |> mutate( priority = dplyr::row_number(), blocks = purrr::pmap( list( - regex = regex, - start_expr = start_expr, - len_expr = len_expr, - data_len_expr = data_len_expr, - block_expr = block_expr + regex = .data$regex, + start_expr = .data$start_expr, + len_expr = .data$len_expr, + data_len_expr = .data$data_len_expr, + block_expr = .data$block_expr ), find_pattern_blocks, raw = bfile$raw ) - ) %>% - dplyr::select(-start_expr, -block_expr, -len_expr) %>% - tidyr::unnest(blocks) + ) |> + dplyr::select(-"start_expr", -"block_expr", -"len_expr") |> + tidyr::unnest("blocks") unknown_blocks <- - find_unknown_blocks(raw = bfile$raw, blocks = ctrl_blocks) %>% + find_unknown_blocks(raw = bfile$raw, blocks = ctrl_blocks) |> get_unknown_blocks_text(raw = bfile$raw, unknown_block_n_chars = unknown_block_n_chars) all_blocks <- dplyr::bind_rows( ctrl_blocks, unknown_blocks - ) %>% - dplyr::arrange(start) %>% + ) |> + dplyr::arrange(.data$start) |> dplyr::mutate( block_idx = dplyr::row_number() - ) %>% - dplyr::select(block_idx, start, end, len, data_len, type, priority, block) %>% + ) |> + dplyr::select("block_idx", "start", "end", "len", "data_len", "type", "priority", "block") + return(all_blocks) } # @TODO: write tests -#' format isodat structure blocks for printout -#' @param bfile the isodat binary file object (must have $raw set) -#' @param new_line_blocks expression when to create a new line -#' @param indent_blocks expression when to indent a line (only if also matched by new_line_blocks) -#' @param unknown_block_n_chars the number of chars to preview as 'block' text in the resulting tibble -#' @param data_blocks expression to mark data blocks -#' @param data_highlight expression to insert a 'HIGHLIGHT' marker in the text, example `len > 1000` to highlight large data blocks -#' @param pos_info whether to include position information for each line (highly recommended) +# format isodat structure blocks for printout +# @param bfile the isodat binary file object (must have $raw set) +# @param new_line_blocks expression when to create a new line +# @param indent_blocks expression when to indent a line (only if also matched by new_line_blocks) +# @param unknown_block_n_chars the number of chars to preview as 'block' text in the resulting tibble +# @param data_blocks expression to mark data blocks +# @param data_highlight expression to insert a 'HIGHLIGHT' marker in the text, example `len > 1000` to highlight large data blocks +# @param pos_info whether to include position information for each line (highly recommended) format_isodat_structure_blocks <- function( bfile, - new_line_blocks = type %in% c("C block", "x-000"), - indent_blocks = type == "x-000", + new_line_blocks = .data$type %in% c("C block", "x-000"), + indent_blocks = .data$type == "x-000", unknown_block_n_chars = 8L, - data_blocks = type %in% c("text", "unknown"), + data_blocks = .data$type %in% c("text", "unknown"), data_highlight = FALSE, pos_info = TRUE) { @@ -1095,95 +1102,34 @@ format_isodat_structure_blocks <- function( # generate printout indent_width <- 2 - blocks_formatted <- bfile$blocks %>% - get_unknown_blocks_text(raw = bfile$raw, unknown_block_n_chars = unknown_block_n_chars) %>% + blocks_formatted <- bfile$blocks |> + get_unknown_blocks_text(raw = bfile$raw, unknown_block_n_chars = unknown_block_n_chars) |> mutate( # new lines nl_block = !!new_line_blocks_expr, - nl_text = ifelse(c(FALSE, nl_block[-1]), "\n", ""), + nl_text = ifelse(c(FALSE, .data$nl_block[-1]), "\n", ""), # indents indent_block = !!indent_blocks_expr, - indent_text = ifelse(nl_block & indent_block, sprintf("%%%ds", indent_width) %>% sprintf(""), ""), + indent_text = ifelse(.data$nl_block & .data$indent_block, sprintf("%%%ds", !!indent_width) |> sprintf(""), ""), # position markers - pos_text = ifelse(!!pos_info & nl_block, sprintf("%07d: ", start), ""), + pos_text = ifelse(!!pos_info & .data$nl_block, sprintf("%07d: ", .data$start), ""), # block text data_block = !!data_blocks_expr, data_highlight = !!data_highlight_expr, block_text = case_when( - data_block & data_highlight ~ sprintf("{HIGHLIGHT: '%s'; %d: '%s'}", data_highlight_text, len, block), - data_block ~ sprintf("{%s-%d: '%s'}", type, data_len, block), - TRUE ~ sprintf("<%s>", block) + .data$data_block & .data$data_highlight ~ + sprintf("{HIGHLIGHT: '%s'; %d: '%s'}", data_highlight_text, .data$len, .data$block), + .data$data_block ~ + sprintf("{%s-%d: '%s'}", .data$type, .data$data_len, .data$block), + TRUE ~ sprintf("<%s>", .data$block) ), # everything - block_formatted = sprintf("%s%s%s%s", nl_text, pos_text, indent_text, block_text) + block_formatted = sprintf( + "%s%s%s%s", + .data$nl_text, .data$pos_text, .data$indent_text, .data$block_text + ) ) return(blocks_formatted) } -# Print Source File Structure ====== - -#' Print source file structure -#' -#' Debugging function to print a representation of the structure of the source file underlying an isofile (if there is one). -#' -#' @param object the object for which to print the source file structure. -#' @param ... additional parameters depending on source file types -#' @param save_to_file whether to save the source file structure to a text file (provide file path, will overwrite anything already in the file!) -#' @return the source file structure (invisibly if it is also saved to a file via \code{save_to_file}) -#' @export -iso_print_source_file_structure <- function(object, ..., save_to_file = NULL) { - UseMethod("iso_print_source_file_structure") -} - -#' @export -iso_print_source_file_structure.default <- function(object, ..., save_to_file = NULL) { - stop("this function is not defined for objects of type '", - class(object)[1], "'", call. = FALSE) -} - -#' @export -iso_print_source_file_structure.iso_file <- function(object, ..., save_to_file = NULL) { - # FIXME: should be $source instead of $binary!! - check_bfile(object$binary) - iso_print_source_file_structure(object$binary, ..., save_to_file = save_to_file) -} - -#' @rdname iso_print_source_file_structure -#' @param start starting position in the binary file to print from (prints the first block that spans this range) -#' @param length length in the binary file to print to (by default \code{NULL}, which means print everything) -#' @param end until which point in the binary file to print to. If provided, overrides whatever is specified in \code{length} -#' @export -iso_print_source_file_structure.binary_isodat_file <- function(object, start = 1, length = NULL, end = start + length, ..., save_to_file = NULL) { - if(rlang::is_empty(end)) end <- max(object$blocks$end) - partial <- start > 1 | end < max(object$blocks$end, 1) - if(end <= start) stop("'end' cannot be smaller than 'start'", call. = FALSE) - object$blocks <- filter(object$blocks, start >= !!start | (start < !!start & end > !!start), start <= !!end) - file_structure <- format_isodat_structure_blocks(object, ...) - - # save to file - if (!is.null(save_to_file)) { - sprintf("Writing binary isodat file structure to file '%s'... ", save_to_file) %>% cat() - cat(file_structure$block_formatted, sep = "", file = save_to_file) - cat("complete.") - } else { - if (partial) - sprintf( - "# Textual representation of the partial structure (bytes %d - %d) of the isodat file.\n# Print more/less by specifying the 'start', 'length' or 'end' parameters.\n", - min(file_structure$start), max(file_structure$end)) %>% cat() - else - cat("# Textual representation of the complete structure of the isodat file\n") - cat(file_structure$block_formatted, sep = "") - } -} - -#' Print formatted isodat structure, first 200 bytes by default -#' @param x object to show -#' @param start where to start to print (set by default by the current position of the file) -#' @param length how much to print -#' @param ... additional parameters (passed to iso_print_source_file_structure) -#' @export -print.binary_isodat_file <- function(x, start = x$pos, length = 200, ...) { - iso_print_source_file_structure(x, start = start, length = length, ...) -} - diff --git a/R/utils_files.R b/R/utils_files.R index 5376f441..33a18b3a 100644 --- a/R/utils_files.R +++ b/R/utils_files.R @@ -20,7 +20,7 @@ extract_os_file_creation_datetime <- function(ds) { # last modification time the only info that's available if (default("datetime_warnings")) { # report warning if requested - ds <- ds %>% register_warning( + ds <- ds |> register_warning( paste0( "file creation date cannot be accessed on this Linux system, using last modified time for file_datetime instead" ), @@ -38,9 +38,9 @@ extract_os_file_creation_datetime <- function(ds) { cmd <- paste0('stat -f "%DB" "', path, '"') # use BSD stat command ds$file_info$file_datetime <- # retrieve birth date in seconds from start of epoch (%DB) - system(cmd, intern=TRUE, ignore.stderr = TRUE) %>% as.integer() %>% + system(cmd, intern=TRUE, ignore.stderr = TRUE) |> as.integer() |> # convert to POSIXct - as.POSIXct(origin = "1970-01-01", tz = "") %>% + as.POSIXct(origin = "1970-01-01", tz = "") |> # force local timezone as_datetime(tz = Sys.timezone()) ds diff --git a/R/utils_source_information.R b/R/utils_source_information.R new file mode 100644 index 00000000..ec0b79d6 --- /dev/null +++ b/R/utils_source_information.R @@ -0,0 +1,97 @@ +# Get Source File Structure ======== + +#' Get source file and structure +#' +#' If an iso file is read with the \link[=iso_turn_debug_on]{debug mode on}, the source data (e.g. binary file) is stored with the file. By default this is not the case because it makes file objects unnecessarily large. Use these functions to retrieve and explore source structures. +#' +#' Typically these functions are used for debugging purposes only. +#' +#' @param iso_file iso file object +#' @examples +#' isoreader:::iso_turn_debug_on() +#' iso_get_reader_example("dual_inlet_example.did") |> +#' iso_read_dual_inlet() |> +#' iso_get_source_file_structure() |> +#' iso_print_source_file_structure(length = 500) +#' \dontrun{ +#' isoreader:::iso_turn_debug_on() +#' iso_get_reader_example("dual_inlet_example.did") |> +#' iso_read_dual_inlet() |> +#' iso_get_source_file_structure() |> +#' iso_print_source_file_structure(save_to_file = "structure.txt") +#' } +#' @export +iso_get_source_file_structure <- function(iso_file) { + + # checks + stopifnot( + "`iso_file` has to be an iso file object" = !missing(iso_file) && iso_is_file(iso_file), + "the provided `iso_file` does not have any source information. If it should, make sure to turn debug mode on to preserve it during file read." = !is.null(iso_file$source) + ) + + # reset position + if (is.list(iso_file$source) && !is.null(iso_file$source$pos)) + iso_file$source$pos <- 1L + + return(iso_file$source) +} + +# Print Source File Structure ====== + +#' @rdname iso_get_source_file_structure +#' @param x the object for which to print the source file structure. +#' @param ... additional parameters depending on source file types +#' @param save_to_file whether to save the source file structure to a text file (provide file path, will overwrite anything already in the file!) in addition to printing it out +#' @export +iso_print_source_file_structure <- function(x, ..., save_to_file = NULL) { + UseMethod("iso_print_source_file_structure") +} + +#' @export +iso_print_source_file_structure.default <- function(x, ..., save_to_file = NULL) { + stop("this function is not defined for objects of type '", + class(x)[1], "'", call. = FALSE) +} + +#' @rdname iso_get_source_file_structure +#' @export +iso_print_source_file_structure.iso_file <- function(x, ..., save_to_file = NULL) { + # FIXME: should be $source instead of $source!! + check_bfile(x$source) + iso_print_source_file_structure(x$source, ..., save_to_file = save_to_file) +} + +#' @rdname iso_get_source_file_structure +#' @param start starting position in the binary file to print from (prints the first block that spans this range) +#' @param length length in the binary file to print to (by default \code{NULL}, which means print everything) +#' @param end until which point in the binary file to print to. If provided, overrides whatever is specified in \code{length} +#' @export +iso_print_source_file_structure.binary_isodat_file <- function(x, start = 1, length = NULL, end = start + length, ..., save_to_file = NULL) { + if(rlang::is_empty(end)) end <- max(x$blocks$end) + partial <- start > 1 | end < max(x$blocks$end, 1) + if(end <= start) stop("'end' cannot be smaller than 'start'", call. = FALSE) + x$blocks <- filter(x$blocks, start >= !!start | (start < !!start & end > !!start), start <= !!end) + file_structure <- format_isodat_structure_blocks(x, ...) + + # save to file + if (!is.null(save_to_file)) { + sprintf("Writing binary isodat file structure to file '%s'... ", save_to_file) |> cat() + cat(file_structure$block_formatted, sep = "", file = save_to_file) + cat("complete.") + } else { + if (partial) + sprintf( + "# Textual representation of the partial structure (bytes %d - %d) of the isodat file.\n# Print more/less by specifying the 'start', 'length' or 'end' parameters.\n", + min(file_structure$start), max(file_structure$end)) |> cat() + else + cat("# Textual representation of the complete structure of the isodat file\n") + cat(file_structure$block_formatted, sep = "") + } +} + +#' @rdname iso_get_source_file_structure +#' @export +print.binary_isodat_file <- function(x, start = x$pos, length = 200, ...) { + iso_print_source_file_structure(x, start = start, length = length, ...) +} + diff --git a/R/utils_xml_files.R b/R/utils_xml_files.R index 9b4b66ec..58483178 100644 --- a/R/utils_xml_files.R +++ b/R/utils_xml_files.R @@ -2,23 +2,25 @@ # maps nodes' children as text map_xml_children <- function(nodes, select = NULL) { - nodes %>% + nodes |> map_df(function(node) { - xml2::as_list(node) %>% - # if select is specific, only take the children specific - { if(is.null(select)) . else .[select[select %in% names(.)]] } %>% + nodes_list <- xml2::as_list(node) + # if select is specific, only take the children specific + if(!is.null(select)) + nodes_list <- nodes_list[select[select %in% names(nodes_list)]] + nodes_list |> # map all as text ignoring everything that does not have exactly 1 value - map_chr(function(x) if (length(x) == 1) x[[1]] else NA_character_) %>% + map_chr(function(x) if (length(x) == 1) x[[1]] else NA_character_) |> # convert to data frame - as.list() %>% dplyr::as_tibble() + as.list() |> dplyr::as_tibble() }) } # retrieve Identifier/Value pairs from 'container' type children of current node xml_fetch_container_value <- function(xml, ids, container = "PersistedPropertyBagProperty") { sapply(ids, function(id) { - xml %>% xml2::xml_find_all(str_c(".//", container, "[Identifier[.='", id, "']]")) %>% - xml2::xml_child("Value") %>% xml2::xml_text() %>% list() + xml |> xml2::xml_find_all(str_c(".//", container, "[Identifier[.='", id, "']]")) |> + xml2::xml_child("Value") |> xml2::xml_text() |> list() }) } @@ -27,12 +29,12 @@ xml_fetch_container_value <- function(xml, ids, container = "PersistedPropertyBa # process iarc info xml file process_iarc_info_xml <- function(filepath) { info_xml <- xml2::read_xml(filepath, encoding = "UTF-8") - info_version <- info_xml %>% xml2::xml_child("Version") %>% xml2::xml_text() + info_version <- info_xml |> xml2::xml_child("Version") |> xml2::xml_text() # retrieve processing lists information processing_lists <- - info_xml %>% xml2::xml_child("ProcessingLists") %>% - xml2::xml_children() %>% + info_xml |> xml2::xml_child("ProcessingLists") |> + xml2::xml_children() |> map_xml_children() # version safety check @@ -54,7 +56,7 @@ process_iarc_info_xml <- function(filepath) { if (!default("quiet")) { sprintf("found %d processing list(s) in .iarc: '%s'", nrow(processing_lists), - str_c("ProcessingList_", processing_lists$ProcessingListId, collapse = "', '")) %>% + str_c("ProcessingList_", processing_lists$ProcessingListId, collapse = "', '")) |> log_message(prefix = " ") } @@ -67,26 +69,26 @@ process_iarc_methods_xml <- function(filepaths) { if (length(filepaths) == 0) return(tibble()) method_params <- - filepaths %>% + filepaths |> lapply(function(methods_file) { method_xml <- xml2::read_xml(methods_file, encoding = "UTF-8") # id - method_id <- method_xml %>% xml2::xml_child("Id") %>% xml2::xml_text() + method_id <- method_xml |> xml2::xml_child("Id") |> xml2::xml_text() # method parameters - method_xml %>% - xml2::xml_find_all(".//SerialisedFlowParameter") %>% - map_xml_children() %>% + method_xml |> + xml2::xml_find_all(".//SerialisedFlowParameter") |> + map_xml_children() |> mutate(MethodId = method_id, MethodFile = basename(methods_file)) - }) %>% + }) |> bind_rows() # info if (!default("quiet")) { - method_files <- method_params$MethodFile %>% unique() + method_files <- method_params$MethodFile |> unique() sprintf("found %d method(s) in .iarc: '%s'", - method_files %>% length(), - str_c(method_files, collapse = "', '")) %>% + method_files |> length(), + str_c(method_files, collapse = "', '")) |> log_message(prefix = " ") } @@ -107,20 +109,20 @@ process_iarc_tasks_xml <- function(filepaths, method_parameters) { task_info <- c("GlobalIdentifier", "Name", "Id", "AcquisitionStartDate", "AcquisitionEndDate", # not sure these are useful - "CompletionState", "MethodId", "ProcessingListTypeIdentifier") %>% - sapply(function(child) task_xml %>% xml2::xml_child(child) %>% xml2::xml_text() %>% list()) + "CompletionState", "MethodId", "ProcessingListTypeIdentifier") |> + sapply(function(child) task_xml |> xml2::xml_child(child) |> xml2::xml_text() |> list()) # retrieve task values based on methods information (if there is any) if (nrow(method_parameters) > 0) { task_values <- - task_xml %>% - xml2::xml_find_all(".//SerialisableTaskValue") %>% - map_xml_children() %>% + task_xml |> + xml2::xml_find_all(".//SerialisableTaskValue") |> + map_xml_children() |> # link with parameters defined in methods mutate( MethodId = task_info[["MethodId"]], GlobalIdentifier = task_info[["GlobalIdentifier"]] - ) %>% + ) |> left_join(method_parameters, by = c("MethodId" = "MethodId", "ParameterIdentifier" = "Id")) } else { task_values <- tibble() @@ -130,45 +132,49 @@ process_iarc_tasks_xml <- function(filepaths, method_parameters) { # retrieve task data (where the real information is recorded) task_data <- - task_xml %>% - xml2::xml_find_all(".//SerialisableDataSet") %>% + task_xml |> + xml2::xml_find_all(".//SerialisableDataSet") |> map_xml_children( - select = c("Id", "AcquireDataStatus", "AcquireStartDate", "AcquireEndDate", "TypeIdentifier")) %>% + select = c("Id", "AcquireDataStatus", "AcquireStartDate", "AcquireEndDate", "TypeIdentifier")) |> mutate( GlobalIdentifier = task_info[["GlobalIdentifier"]], DataFile = str_c(Id, ".hdf5") - ) %>% - select(-Id) + ) |> + select(-"Id") # prepare return Value <- NULL # global variables + + # task info tibble + task_info_tibble <- task_info |> dplyr::as_tibble() + if (nrow(task_values) > 0) { + task_info_tibble <- task_info_tibble |> + left_join( + # wide format for task values + task_values |> select("GlobalIdentifier", "DisplayName", "Value") |> + group_by(!!sym("GlobalIdentifier"), !!sym("DisplayName")) |> + summarize(Value = str_c(Value, collapse = ", ")) |> # make sure multiple values are collapsed properly + ungroup() |> + spread("DisplayName", "Value"), + by = "GlobalIdentifier" + ) + } + + # return list list( filename = basename(task_file), # combine task info with task values - info = - task_info %>% dplyr::as_tibble() %>% - { - if (nrow(task_values) > 0) { - left_join(., - # wide format for task values - task_values %>% select("GlobalIdentifier", "DisplayName", "Value") %>% - group_by(!!sym("GlobalIdentifier"), !!sym("DisplayName")) %>% - summarize(Value = str_c(Value, collapse = ", ")) %>% # make sure multiple values are collapsed properly - ungroup() %>% - spread("DisplayName", "Value"), - by = "GlobalIdentifier") - } else . - }, + info = task_info_tibble, # task data data_files = task_data ) } # for all task files, run the processing function - tasks <- filepaths %>% lapply(process_iarc_task_xml) + tasks <- filepaths |> lapply(process_iarc_task_xml) if (!default("quiet")) { - sprintf("found %d sample(s) in .iarc", length(tasks)) %>% + sprintf("found %d sample(s) in .iarc", length(tasks)) |> log_message(prefix = " ") } @@ -180,7 +186,7 @@ process_iarc_tasks_xml <- function(filepaths, method_parameters) { process_iarc_processing_xml <- function(processing_list_id, filepath) { if (!file.exists(filepath)) stop("invalid processing list file path: ", filepath, call. = FALSE) if (!default("quiet")) { - sprintf("searching processing list '%s' for gas configurations...", basename(filepath)) %>% + sprintf("searching processing list '%s' for gas configurations...", basename(filepath)) |> log_message(prefix = " ") } @@ -189,47 +195,47 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) { # read file xml <- xml2::read_xml(filepath, encoding = "UTF-8") - global_id <- xml %>% xml2::xml_child("DefinitionUniqueIdentifier") %>% xml2::xml_text() + global_id <- xml |> xml2::xml_child("DefinitionUniqueIdentifier") |> xml2::xml_text() # safety check if (global_id != processing_list_id) { sprintf("mismatch between Info processing list ID ('%s') and processing list file id ('%s')", - processing_list_id, global_id) %>% stop(call. = FALSE) + processing_list_id, global_id) |> stop(call. = FALSE) } ## helper functions ## # find the species xml_find_species <- function(node) { # potentially useful(?): DetectionBeamChannel - node %>% xml2::xml_child("SerialisedPropertyBagProperties") %>% - xml_fetch_container_value("Species") %>% { .$Species } + node |> xml2::xml_child("SerialisedPropertyBagProperties") |> + xml_fetch_container_value("Species") |> purrr::pluck("Species") } # find the channel masses from the beam ratio definitions xml_find_channel_masses <- function(node) { # find the beam ratio definitions ratio_defs <- - node %>% xml2::xml_child("SerialisedChildPropertyBags") %>% - xml2::xml_find_all(".//SerialisablePropertyBag[Identifier[.='{42D28191-A6E9-4B7B-8C3D-0F0037624F7D}']]") %>% - map(xml_fetch_container_value, c("NumeratorBeamChannel", "DenominatorBeamChannel", "Label")) %>% + node |> xml2::xml_child("SerialisedChildPropertyBags") |> + xml2::xml_find_all(".//SerialisablePropertyBag[Identifier[.='{42D28191-A6E9-4B7B-8C3D-0F0037624F7D}']]") |> + map(xml_fetch_container_value, c("NumeratorBeamChannel", "DenominatorBeamChannel", "Label")) |> bind_rows() if (nrow(ratio_defs) == 0) return (tibble(channel = character(), mass = character())) # derive channel defintions channel_defs <- - ratio_defs %>% + ratio_defs |> # find masses from label mutate( - numerator_mass = str_match(Label, "^(\\d+)/") %>% {.[,2]}, - denominator_mass = str_match(Label, "/(\\d+)$") %>% {.[,2]} - ) %>% + numerator_mass = str_match(Label, "^(\\d+)/")[,2], + denominator_mass = str_match(Label, "/(\\d+)$")[,2] + ) + + channel_defs <- # channel to mass matches - { - bind_rows( - select(., channel=NumeratorBeamChannel, mass=numerator_mass), - select(., channel=DenominatorBeamChannel, mass=denominator_mass) - ) - } %>% + bind_rows( + select(channel_defs, channel="NumeratorBeamChannel", mass="numerator_mass"), + select(channel_defs, channel="DenominatorBeamChannel", mass="denominator_mass") + ) |> unique() return(channel_defs) } @@ -237,7 +243,7 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) { # find the H3 factor xml_find_H3_factor <- function(node) { H3_factor <- - node %>% xml2::xml_child("SerialisedPropertyBagProperties") %>% + node |> xml2::xml_child("SerialisedPropertyBagProperties") |> xml_fetch_container_value(c("ApplyH3CorrectionFactor", "H3CorrectionFactor")) if (!is.na(H3_factor$ApplyH3CorrectionFactor) && H3_factor$ApplyH3CorrectionFactor == "True") return(as.numeric(H3_factor$H3CorrectionFactor)) @@ -245,8 +251,8 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) { } # process channel configurations - species_config <- xml %>% - xml2::xml_find_all("//SerialisablePropertyBag[Identifier[.='10DC1602-5ED4-4D62-BAB0-2693E3FBC3AF']]") %>% + species_config <- xml |> + xml2::xml_find_all("//SerialisablePropertyBag[Identifier[.='10DC1602-5ED4-4D62-BAB0-2693E3FBC3AF']]") |> sapply(function(node) { species <- xml_find_species(node) if (is.null(species) || is.na(species)) # no species definition found @@ -255,13 +261,13 @@ process_iarc_processing_xml <- function(processing_list_id, filepath) { config <- list(channels = xml_find_channel_masses(node)) if (!is.null(H3_factor <- xml_find_H3_factor(node))) config$H3_factor <- H3_factor - config %>% list() %>% rlang::set_names(species) + config |> list() |> rlang::set_names(species) }) # info if (!default("quiet")) { sprintf("found configurations for '%s'", - species_config %>% names() %>% str_c(collapse = "', '")) %>% + species_config |> names() |> str_c(collapse = "', '")) |> log_message(prefix = " ") } diff --git a/README.Rmd b/README.Rmd index e5d80e16..829c03b6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,11 +16,9 @@ version <- as.character(packageVersion("isoreader")) # isoreader -[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/isoreader)](https://cran.r-project.org/package=isoreader) -[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) [![R build status](https://github.com/isoverse/isoreader/workflows/R-CMD-check/badge.svg)](https://github.com/isoverse/isoreader/actions?workflow=R-CMD-check) -[![Binder](https://img.shields.io/badge/explore%20online-in%20RStudio-blue.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=rstudio) -[![Binder](https://img.shields.io/badge/explore%20online-in%20Jupyter-orange.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=lab) +[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) +[![DOI](https://joss.theoj.org/papers/10.21105/joss.02878/status.svg)](https://doi.org/10.21105/joss.02878) ## About @@ -30,8 +28,6 @@ This package is intended as a unified one-stop command line interface to all com ## Installation -You can install the latest release of isoreader from [CRAN](https://cran.r-project.org/package=isoreader): - ```{r cran-installation, eval = FALSE} # Note: isoreader is temporarily not available on CRAN because of a missing dependency, please install directly from GitHub using the commands below # install.packages("isoreader") @@ -77,8 +73,8 @@ Currently supported file types: ```{r, echo=FALSE, warning=FALSE, message=FALSE} library(isoreader) -iso_get_supported_file_types() %>% - dplyr::select(-call) %>% +iso_get_supported_file_types() |> + dplyr::select(-"call") |> knitr::kable() ``` diff --git a/README.md b/README.md index 92523d96..26330583 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,10 @@ # isoreader -[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/isoreader)](https://cran.r-project.org/package=isoreader) -[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) [![R build status](https://github.com/isoverse/isoreader/workflows/R-CMD-check/badge.svg)](https://github.com/isoverse/isoreader/actions?workflow=R-CMD-check) -[![Binder](https://img.shields.io/badge/explore%20online-in%20RStudio-blue.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=rstudio) -[![Binder](https://img.shields.io/badge/explore%20online-in%20Jupyter-orange.svg)](https://mybinder.org/v2/gh/isoverse/isoreader/binder?urlpath=lab) +[![Documentation](https://img.shields.io/badge/docs-online-green.svg)](https://isoreader.isoverse.org/) +[![DOI](https://joss.theoj.org/papers/10.21105/joss.02878/status.svg)](https://doi.org/10.21105/joss.02878) ## About @@ -45,9 +43,6 @@ with [tidyverse](https://www.tidyverse.org/) packages such as ## Installation -You can install the latest release of isoreader from -[CRAN](https://cran.r-project.org/package=isoreader): - ``` r # Note: isoreader is temporarily not available on CRAN because of a missing dependency, please install directly from GitHub using the commands below # install.packages("isoreader") @@ -101,17 +96,17 @@ iso_files <- iso_read_scan(data_folder) #> Info: reading file 'full_scan_example.scn' with '.scn' reader... #> Info: reading file 'peak_shape_scan_example.scn' with '.scn' reader... #> Info: reading file 'time_scan_example.scn' with '.scn' reader... -#> Info: finished reading 4 files in 2.40 secs +#> Info: finished reading 4 files in 1.30 secs iso_files #> Data from 4 scan iso files: -#> # A tibble: 4 × 5 -#> file_id raw_data file_info method_info file_path -#> -#> 1 background_scan_example.scn 525 measurements,… 8 entries resistors backgrou… -#> 2 full_scan_example.scn 799 measurements,… 8 entries resistors full_sca… -#> 3 peak_shape_scan_example.scn 220 measurements,… 8 entries resistors peak_sha… -#> 4 time_scan_example.scn 5532 measurements… 8 entries resistors time_sca… +#> # A tibble: 4 × 6 +#> file_id file_path_ file_subpath raw_data file_info method_info +#> +#> 1 background_scan_exampl… backgroun… 525 mea… 8 entries resistors +#> 2 full_scan_example.scn full_scan… 799 mea… 8 entries resistors +#> 3 peak_shape_scan_exampl… peak_shap… 220 mea… 7 entries resistors +#> 4 time_scan_example.scn time_scan… 5532 me… 8 entries resistors ``` ## Supported File Types diff --git a/_pkgdown.yml b/_pkgdown.yml index 44db328a..282aaec2 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -96,8 +96,8 @@ reference: These functions provide tools to save the data to specific file formats. contents: - iso_save - - iso_export_to_excel - - iso_export_to_feather + - iso_export_files_to_excel + - iso_export_files_to_feather - title: Package information and default parameters desc: > @@ -136,37 +136,30 @@ reference: contents: - iso_read_files - iso_register_dual_inlet_file_reader + - iso_register_continuous_flow_file_reader + - iso_register_scan_file_reader - iso_is_file - iso_is_file_list - iso_is_continuous_flow - iso_is_dual_inlet + - iso_is_scan - iso_debug_mode + - iso_source_file_op_error + - iso_get_source_file_structure + - iso_print_source_file_structure - read_iso_file - reread_iso_files - set_temp - - map_binary_structure - - print.binary_structure_map + - print.binary_isodat_file - print.iso_file_list - print.iso_file - vec_arith.iso_double_with_units - vec_cast.iso_double_with_units - vec_ptype2.iso_double_with_units -- title: Moved to isoprocessor or deprecated +- title: Deprecated contents: - - iso_get_data - - iso_get_resistors_info - - iso_get_standards_info - - iso_omit_files_with_problems - - iso_calculate_ratios - - iso_convert_signals - - iso_convert_time - - iso_plot_continuous_flow_data - - iso_plot_dual_inlet_data - - iso_plot_raw_data - - isoread - - + - deprecated # additional modification notes on # https://www.r-bloggers.com/building-a-website-with-pkgdown-a-short-guide/ diff --git a/man/deprecated.Rd b/man/deprecated.Rd new file mode 100644 index 00000000..c14a75ac --- /dev/null +++ b/man/deprecated.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_data.R, R/export.R, R/package.R, +% R/problems.R +\name{iso_get_data} +\alias{iso_get_data} +\alias{iso_get_standards_info} +\alias{iso_get_resistors_info} +\alias{iso_export_to_excel} +\alias{iso_export_to_feather} +\alias{deprecated} +\alias{iso_omit_files_with_problems} +\title{These functions have been deprecated and will be removed in future versions of isoreader.} +\usage{ +iso_get_data(...) + +iso_get_standards_info(...) + +iso_get_resistors_info(...) + +iso_export_to_excel(...) + +iso_export_to_feather(...) + +iso_omit_files_with_problems(...) +} +\arguments{ +\item{...}{deprecated (passed on to new function if superseded)} +} +\description{ +These functions have been deprecated and will be removed in future versions of isoreader. +} +\details{ +\code{iso_get_data}: use \link{iso_get_all_data} instead + +\code{iso_get_standards_info}: use \link{iso_get_standards} instead + +\code{iso_get_resistors_info}: use \link{iso_get_resistors} instead + +\code{iso_export_to_excel}: use \link{iso_export_files_to_excel} instead + +\code{iso_export_to_feather}: use \link{iso_export_files_to_feather} instead + +\code{iso_omit_files_with_problems}: use \link{iso_filter_files_with_problems} instead +} diff --git a/man/find_isodat_structure_blocks.Rd b/man/find_isodat_structure_blocks.Rd deleted file mode 100644 index 609ca43a..00000000 --- a/man/find_isodat_structure_blocks.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{find_isodat_structure_blocks} -\alias{find_isodat_structure_blocks} -\title{find all isodat structure blocks} -\usage{ -find_isodat_structure_blocks(bfile, unknown_block_n_chars = 8L) -} -\arguments{ -\item{bfile}{the isodat binary file object (must have $raw set)} - -\item{unknown_block_n_chars}{the number of chars to preview as 'block' text in the resulting tibble} -} -\description{ -find all isodat structure blocks -} diff --git a/man/find_pattern_blocks.Rd b/man/find_pattern_blocks.Rd deleted file mode 100644 index cf0e9a7a..00000000 --- a/man/find_pattern_blocks.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{find_pattern_blocks} -\alias{find_pattern_blocks} -\title{find regular expression pattern and turn into a block tibble} -\usage{ -find_pattern_blocks( - raw, - regex, - start_expr, - len_expr, - data_len_expr, - block_expr -) -} -\arguments{ -\item{raw}{binary vector} - -\item{start_expr}{expression to calculate starting point (relative to the regexp match var 'pos')} - -\item{len_expr}{expression to calculate the length of the block} - -\item{data_len_expr}{expression to calculate the length of the data in the block} - -\item{block_expr}{expression to construct the block text} - -\item{regular}{expression to match} -} -\description{ -find regular expression pattern and turn into a block tibble -} diff --git a/man/find_unknown_blocks.Rd b/man/find_unknown_blocks.Rd deleted file mode 100644 index 35e0ef20..00000000 --- a/man/find_unknown_blocks.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{find_unknown_blocks} -\alias{find_unknown_blocks} -\title{find unknown patterns and turn into a block tibble} -\usage{ -find_unknown_blocks(raw, blocks) -} -\arguments{ -\item{raw}{binary vector} - -\item{blocks}{tibble with identified blocks, must have columns start & end} -} -\description{ -find unknown patterns and turn into a block tibble -} diff --git a/man/format_isodat_structure_blocks.Rd b/man/format_isodat_structure_blocks.Rd deleted file mode 100644 index a074f5ec..00000000 --- a/man/format_isodat_structure_blocks.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{format_isodat_structure_blocks} -\alias{format_isodat_structure_blocks} -\title{format isodat structure blocks for printout} -\usage{ -format_isodat_structure_blocks( - bfile, - new_line_blocks = type \%in\% c("C block", "x-000"), - indent_blocks = type == "x-000", - unknown_block_n_chars = 8L, - data_blocks = type \%in\% c("text", "unknown"), - data_highlight = FALSE, - pos_info = TRUE -) -} -\arguments{ -\item{bfile}{the isodat binary file object (must have $raw set)} - -\item{new_line_blocks}{expression when to create a new line} - -\item{indent_blocks}{expression when to indent a line (only if also matched by new_line_blocks)} - -\item{unknown_block_n_chars}{the number of chars to preview as 'block' text in the resulting tibble} - -\item{data_blocks}{expression to mark data blocks} - -\item{data_highlight}{expression to insert a 'HIGHLIGHT' marker in the text, example `len > 1000` to highlight large data blocks} - -\item{pos_info}{whether to include position information for each line (highly recommended)} -} -\description{ -format isodat structure blocks for printout -} diff --git a/man/get_isodat_control_blocks_config.Rd b/man/get_isodat_control_blocks_config.Rd deleted file mode 100644 index 29f27dc7..00000000 --- a/man/get_isodat_control_blocks_config.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{get_isodat_control_blocks_config} -\alias{get_isodat_control_blocks_config} -\title{returns a tibble with control blacks for isodat files} -\usage{ -get_isodat_control_blocks_config() -} -\value{ -tibble with control blocks for isodat -} -\description{ -returns a tibble with control blacks for isodat files -} diff --git a/man/get_unknown_blocks_text.Rd b/man/get_unknown_blocks_text.Rd deleted file mode 100644 index 13c85b4d..00000000 --- a/man/get_unknown_blocks_text.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{get_unknown_blocks_text} -\alias{get_unknown_blocks_text} -\title{updates block information for unknown blocks} -\usage{ -get_unknown_blocks_text(blocks, raw, unknown_block_n_chars = 8L) -} -\description{ -updates block information for unknown blocks -} diff --git a/man/iso_calculate_ratios.Rd b/man/iso_calculate_ratios.Rd deleted file mode 100644 index bee95234..00000000 --- a/man/iso_calculate_ratios.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculate_ratios.R -\name{iso_calculate_ratios} -\alias{iso_calculate_ratios} -\title{moved to isoprocessor} -\usage{ -iso_calculate_ratios(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_convert_signals.Rd b/man/iso_convert_signals.Rd deleted file mode 100644 index d674f436..00000000 --- a/man/iso_convert_signals.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/unit_conversion.R -\name{iso_convert_signals} -\alias{iso_convert_signals} -\title{moved to isoprocessor} -\usage{ -iso_convert_signals(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_convert_time.Rd b/man/iso_convert_time.Rd deleted file mode 100644 index 03aaca74..00000000 --- a/man/iso_convert_time.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/unit_conversion.R -\name{iso_convert_time} -\alias{iso_convert_time} -\title{moved to isoprocessor} -\usage{ -iso_convert_time(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_export_to_excel.Rd b/man/iso_export_files_to_excel.Rd similarity index 96% rename from man/iso_export_to_excel.Rd rename to man/iso_export_files_to_excel.Rd index 01edd794..bcd2c5a4 100644 --- a/man/iso_export_to_excel.Rd +++ b/man/iso_export_files_to_excel.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/export.R -\name{iso_export_to_excel} -\alias{iso_export_to_excel} +\name{iso_export_files_to_excel} +\alias{iso_export_files_to_excel} \title{Export data to Excel} \usage{ -iso_export_to_excel( +iso_export_files_to_excel( iso_files, filepath, include_file_info = everything(), @@ -52,7 +52,7 @@ This function exports the passed in iso_files to Excel. The different kinds of d } \seealso{ Other export functions: -\code{\link{iso_export_to_feather}()}, +\code{\link{iso_export_files_to_feather}()}, \code{\link{iso_save}()} } \concept{export functions} diff --git a/man/iso_export_to_feather.Rd b/man/iso_export_files_to_feather.Rd similarity index 96% rename from man/iso_export_to_feather.Rd rename to man/iso_export_files_to_feather.Rd index bf3283e3..6ec38cac 100644 --- a/man/iso_export_to_feather.Rd +++ b/man/iso_export_files_to_feather.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/export.R -\name{iso_export_to_feather} -\alias{iso_export_to_feather} +\name{iso_export_files_to_feather} +\alias{iso_export_files_to_feather} \title{Export to feather} \usage{ -iso_export_to_feather( +iso_export_files_to_feather( iso_files, filepath_prefix, include_file_info = everything(), @@ -49,7 +49,7 @@ This function exports the passed in iso_files to the Python and R shared feather } \seealso{ Other export functions: -\code{\link{iso_export_to_excel}()}, +\code{\link{iso_export_files_to_excel}()}, \code{\link{iso_save}()} } \concept{export functions} diff --git a/man/iso_get_data.Rd b/man/iso_get_data.Rd deleted file mode 100644 index 5b638ef6..00000000 --- a/man/iso_get_data.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_data.R -\name{iso_get_data} -\alias{iso_get_data} -\title{DEPRECATED} -\usage{ -iso_get_data(...) -} -\arguments{ -\item{...}{forwarded to \link{iso_get_all_data}} -} -\description{ -Please use \link{iso_get_all_data} instead. -} diff --git a/man/iso_get_resistors_info.Rd b/man/iso_get_resistors_info.Rd deleted file mode 100644 index 74dc1cf5..00000000 --- a/man/iso_get_resistors_info.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_data.R -\name{iso_get_resistors_info} -\alias{iso_get_resistors_info} -\title{DEPRECATED} -\usage{ -iso_get_resistors_info(...) -} -\arguments{ -\item{...}{forwarded to \link{iso_get_resistors}} -} -\description{ -Please use \link{iso_get_resistors} instead. -} diff --git a/man/iso_get_source_file_structure.Rd b/man/iso_get_source_file_structure.Rd new file mode 100644 index 00000000..3e3ecaec --- /dev/null +++ b/man/iso_get_source_file_structure.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_source_information.R +\name{iso_get_source_file_structure} +\alias{iso_get_source_file_structure} +\alias{iso_print_source_file_structure} +\alias{iso_print_source_file_structure.iso_file} +\alias{iso_print_source_file_structure.binary_isodat_file} +\alias{print.binary_isodat_file} +\title{Get source file and structure} +\usage{ +iso_get_source_file_structure(iso_file) + +iso_print_source_file_structure(x, ..., save_to_file = NULL) + +\method{iso_print_source_file_structure}{iso_file}(x, ..., save_to_file = NULL) + +\method{iso_print_source_file_structure}{binary_isodat_file}( + x, + start = 1, + length = NULL, + end = start + length, + ..., + save_to_file = NULL +) + +\method{print}{binary_isodat_file}(x, start = x$pos, length = 200, ...) +} +\arguments{ +\item{iso_file}{iso file object} + +\item{x}{the object for which to print the source file structure.} + +\item{...}{additional parameters depending on source file types} + +\item{save_to_file}{whether to save the source file structure to a text file (provide file path, will overwrite anything already in the file!) in addition to printing it out} + +\item{start}{starting position in the binary file to print from (prints the first block that spans this range)} + +\item{length}{length in the binary file to print to (by default \code{NULL}, which means print everything)} + +\item{end}{until which point in the binary file to print to. If provided, overrides whatever is specified in \code{length}} +} +\description{ +If an iso file is read with the \link[=iso_turn_debug_on]{debug mode on}, the source data (e.g. binary file) is stored with the file. By default this is not the case because it makes file objects unnecessarily large. Use these functions to retrieve and explore source structures. +} +\details{ +Typically these functions are used for debugging purposes only. +} +\examples{ +isoreader:::iso_turn_debug_on() +iso_get_reader_example("dual_inlet_example.did") |> + iso_read_dual_inlet() |> + iso_get_source_file_structure() |> + iso_print_source_file_structure(length = 500) +\dontrun{ +isoreader:::iso_turn_debug_on() +iso_get_reader_example("dual_inlet_example.did") |> + iso_read_dual_inlet() |> + iso_get_source_file_structure() |> + iso_print_source_file_structure(save_to_file = "structure.txt") +} +} diff --git a/man/iso_get_standards_info.Rd b/man/iso_get_standards_info.Rd deleted file mode 100644 index 2596f470..00000000 --- a/man/iso_get_standards_info.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_data.R -\name{iso_get_standards_info} -\alias{iso_get_standards_info} -\title{DEPRECATED} -\usage{ -iso_get_standards_info(...) -} -\arguments{ -\item{...}{forwarded to \link{iso_get_standards}} -} -\description{ -Please use \link{iso_get_standards} instead. -} diff --git a/man/iso_make_units_implicit.Rd b/man/iso_make_units_implicit.Rd index d4ef41b1..7cc92046 100644 --- a/man/iso_make_units_implicit.Rd +++ b/man/iso_make_units_implicit.Rd @@ -22,7 +22,7 @@ df <- tibble(peak = 1:5, `height [V]` = 1:5) iso_make_units_implicit(df) # convert back and forth -iso_make_units_implicit(df) \%>\% iso_make_units_explicit() +iso_make_units_implicit(df) |> iso_make_units_explicit() # implicit units from custom prefix & suffix df <- tibble(peak = 1:5, height.V = 1:5) diff --git a/man/iso_omit_files_with_problems.Rd b/man/iso_omit_files_with_problems.Rd deleted file mode 100644 index b998467a..00000000 --- a/man/iso_omit_files_with_problems.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/problems.R -\name{iso_omit_files_with_problems} -\alias{iso_omit_files_with_problems} -\title{Renamed to iso_filter_files_with_problems} -\usage{ -iso_omit_files_with_problems(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -This function has been renamed to \link{iso_filter_files_with_problems} for naming consistency. -} diff --git a/man/iso_plot_continuous_flow_data.Rd b/man/iso_plot_continuous_flow_data.Rd deleted file mode 100644 index 33bc617d..00000000 --- a/man/iso_plot_continuous_flow_data.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{iso_plot_continuous_flow_data} -\alias{iso_plot_continuous_flow_data} -\title{moved to isoprocessor} -\usage{ -iso_plot_continuous_flow_data(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_plot_dual_inlet_data.Rd b/man/iso_plot_dual_inlet_data.Rd deleted file mode 100644 index 5c5547e7..00000000 --- a/man/iso_plot_dual_inlet_data.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{iso_plot_dual_inlet_data} -\alias{iso_plot_dual_inlet_data} -\title{moved to isoprocessor} -\usage{ -iso_plot_dual_inlet_data(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_plot_raw_data.Rd b/man/iso_plot_raw_data.Rd deleted file mode 100644 index de544382..00000000 --- a/man/iso_plot_raw_data.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{iso_plot_raw_data} -\alias{iso_plot_raw_data} -\title{moved to isoprocessor} -\usage{ -iso_plot_raw_data(...) -} -\arguments{ -\item{...}{deprecated} -} -\description{ -moved to isoprocessor -} diff --git a/man/iso_print_source_file_structure.Rd b/man/iso_print_source_file_structure.Rd deleted file mode 100644 index e54223ce..00000000 --- a/man/iso_print_source_file_structure.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{iso_print_source_file_structure} -\alias{iso_print_source_file_structure} -\alias{iso_print_source_file_structure.binary_isodat_file} -\title{Print source file structure} -\usage{ -iso_print_source_file_structure(object, ..., save_to_file = NULL) - -\method{iso_print_source_file_structure}{binary_isodat_file}( - object, - start = 1, - length = NULL, - end = start + length, - ..., - save_to_file = NULL -) -} -\arguments{ -\item{object}{the object for which to print the source file structure.} - -\item{...}{additional parameters depending on source file types} - -\item{save_to_file}{whether to save the source file structure to a text file (provide file path, will overwrite anything already in the file!)} - -\item{start}{starting position in the binary file to print from (prints the first block that spans this range)} - -\item{length}{length in the binary file to print to (by default \code{NULL}, which means print everything)} - -\item{end}{until which point in the binary file to print to. If provided, overrides whatever is specified in \code{length}} -} -\value{ -the source file structure (invisibly if it is also saved to a file via \code{save_to_file}) -} -\description{ -Debugging function to print a representation of the structure of the source file underlying an isofile (if there is one). -} diff --git a/man/iso_reread_files.Rd b/man/iso_reread_files.Rd index f4a0b899..f1b30514 100644 --- a/man/iso_reread_files.Rd +++ b/man/iso_reread_files.Rd @@ -83,14 +83,14 @@ iso_turn_reader_caching_off() saved_files_path <- "saved_isofile.scan.rds" # create saved collection -iso_get_reader_examples_folder() \%>\% - iso_read_scan() \%>\% +iso_get_reader_examples_folder() |> + iso_read_scan() |> iso_save(saved_files_path) # load collection -iso_read_scan(saved_files_path) \%>\% +iso_read_scan(saved_files_path) |> # reread outdated files (alternatively "_all_" or "_changed_") - iso_reread_outdated_files() \%>\% + iso_reread_outdated_files() |> # re-save collection to its original location iso_save(saved_files_path) diff --git a/man/iso_save.Rd b/man/iso_save.Rd index f6f7493f..45f5f158 100644 --- a/man/iso_save.Rd +++ b/man/iso_save.Rd @@ -21,7 +21,7 @@ This function saves the passed in iso_files to an R Data Storage (.rds) file, wh } \seealso{ Other export functions: -\code{\link{iso_export_to_excel}()}, -\code{\link{iso_export_to_feather}()} +\code{\link{iso_export_files_to_excel}()}, +\code{\link{iso_export_files_to_feather}()} } \concept{export functions} diff --git a/man/isoread.Rd b/man/isoread.Rd deleted file mode 100644 index 702ad8bc..00000000 --- a/man/isoread.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/isoread.R -\name{isoread} -\alias{isoread} -\title{Read isotope data file} -\usage{ -isoread(...) -} -\arguments{ -\item{...}{original isoread parameters} -} -\description{ -This function from the original isoread package is deprecated, please use \link{iso_read_dual_inlet}, \link{iso_read_continuous_flow} and \link{iso_read_scan} instead. -} diff --git a/man/print.binary_isodat_file.Rd b/man/print.binary_isodat_file.Rd deleted file mode 100644 index e57292d4..00000000 --- a/man/print.binary_isodat_file.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_binary_isodat_files.R -\name{print.binary_isodat_file} -\alias{print.binary_isodat_file} -\title{Print formatted isodat structure, first 200 bytes by default} -\usage{ -\method{print}{binary_isodat_file}(x, start = x$pos, length = 200, ...) -} -\arguments{ -\item{x}{object to show} - -\item{start}{where to start to print (set by default by the current position of the file)} - -\item{length}{how much to print} - -\item{...}{additional parameters (passed to iso_print_source_file_structure)} -} -\description{ -Print formatted isodat structure, first 200 bytes by default -} diff --git a/man/reexports.Rd b/man/reexports.Rd index 85179b3d..44f1d057 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -10,7 +10,6 @@ \alias{parse_datetime} \alias{!!} \alias{!!!} -\alias{\%>\%} \alias{everything} \alias{starts_with} \alias{ends_with} @@ -28,8 +27,6 @@ below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr]{filter}}} - \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} - \item{readr}{\code{\link[readr]{parse_datetime}}, \code{\link[readr:parse_atomic]{parse_double}}, \code{\link[readr:parse_atomic]{parse_integer}}, \code{\link[readr:parse_atomic]{parse_logical}}, \code{\link[readr]{parse_number}}, \code{\link[readr]{problems}}, \code{\link[readr:problems]{stop_for_problems}}} \item{rlang}{\code{\link[rlang:injection-operator]{!!}}, \code{\link[rlang:splice-operator]{!!!}}} diff --git a/tests/testthat/test-aggregate-data.R b/tests/testthat/test-aggregate-data.R index ce630c12..bfb2723d 100644 --- a/tests/testthat/test-aggregate-data.R +++ b/tests/testthat/test-aggregate-data.R @@ -65,8 +65,8 @@ test_that("test that unnesting of aggregated data works properly", { expect_true((unnest_aggregated_data_frame(tibble(dt = list(dt)))$dt - dt) < 10) # unnest even with NULLs present expect_equal( - bind_rows(df, select(df, -int)) %>% unnest_aggregated_data_frame(), - bind_rows(unnest(df, cols = everything()), unnest(select(df, -int), cols = everything())) + bind_rows(df, select(df, -"int")) |> unnest_aggregated_data_frame(), + bind_rows(unnest(df, cols = everything()), unnest(select(df, -"int"), cols = everything())) ) # don't unnest mixed type columns (throw warning instead) expect_warning( @@ -86,19 +86,19 @@ test_that("test that unnesting of aggregated data works properly", { ) # replace missing entries with NA instead (for string) expect_equal( - unnest_aggregated_data_frame(bind_rows(select(df, -chr), df2))$chr, + unnest_aggregated_data_frame(bind_rows(select(df, -"chr"), df2))$chr, c(NA_character_, df2$chr) ) # replace missing entries with NA instead (for integer) df2 <- mutate(df, int = map(int, ~c(1L,2L))) expect_equal( - unnest_aggregated_data_frame(bind_rows(select(df, -int), df2))$int, + unnest_aggregated_data_frame(bind_rows(select(df, -"int"), df2))$int, c(NA_integer_, df2$int) ) # replace missing entries with NA instead (for double) df2 <- mutate(df, dbl = map(dbl, ~c(1.0, 4.2))) expect_equal( - unnest_aggregated_data_frame(bind_rows(select(df, -dbl), df2))$dbl, + unnest_aggregated_data_frame(bind_rows(select(df, -"dbl"), df2))$dbl, c(NA_real_, df2$dbl) ) @@ -136,9 +136,9 @@ test_that("test that aggregating file info works", { expect_silent(agg <- iso_get_file_info(c(iso_file1, iso_file2), quiet = TRUE)) expect_equal(names(agg), unique(names(iso_file1$file_info), names(iso_file2$file_info))) expect_true(identical( - iso_get_file_info(c(iso_file1, iso_file2)) %>% unnest(multi_value), + iso_get_file_info(c(iso_file1, iso_file2)) |> unnest(multi_value), bind_rows(unnest(iso_file1$file_info, multi_value), - unnest(iso_file2$file_info, multi_value)) %>% + unnest(iso_file2$file_info, multi_value)) |> mutate(file_datetime = as_datetime(file_datetime, tz = Sys.timezone())) )) @@ -192,17 +192,17 @@ test_that("test that aggregeting raw data works", { expect_silent(iso_get_raw_data(c(iso_file1, iso_file2), quiet = TRUE)) expect_equal(iso_get_raw_data(c(iso_file1, iso_file2)), data <- bind_rows(mutate(iso_file1$raw_data, file_id="a"), - mutate(iso_file2$raw_data, file_id = "b")) %>% - select(file_id, everything())) + mutate(iso_file2$raw_data, file_id = "b")) |> + select("file_id", everything())) expect_equal(iso_get_raw_data(c(iso_file1, iso_file2), gather = TRUE), - data %>% tidyr::pivot_longer(matches("^[virdx]"), names_to = "column", values_to = "value", values_drop_na = TRUE) %>% + data |> tidyr::pivot_longer(dplyr::matches("^[virdx]"), names_to = "column", values_to = "value", values_drop_na = TRUE) |> left_join(tibble( column = c("v44.mV", "v46.mV", "i47.mA", "vC1.mV", "r46/44", "d46/44.permil", "x45.mA", "v45.mV"), category = c("mass", "mass", "mass", "channel", "ratio", "delta", "other", "mass"), data = c("44", "46", "47", "1", "46/44", "d46/44", "x45", "45"), units = c("mV", "mV", "mA", "mV", NA_character_, "permil", "mA", "mV") - ), by = "column") %>% + ), by = "column") |> select(file_id, tp, time.s, data, units, value, category) ) @@ -216,7 +216,7 @@ test_that("test that aggregeting raw data works", { expect_equal( suppressWarnings(iso_get_raw_data( c(make_iso_file_data_structure("NA"), iso_file1, iso_file2), - include_file_info = c("test_info")))$test_info %>% unique(), + include_file_info = c("test_info")))$test_info |> unique(), c("x", "y") ) }) @@ -263,7 +263,7 @@ test_that("test that aggregating of methods standards works", { # select_specific columns expect_equal(iso_get_standards(c(iso_file1, iso_file2), select = file_id:reference), data <- bind_rows(mutate(iso_file1$method_info$standards, file_id="a"), - mutate(iso_file2$method_info$standards, file_id="b")) %>% + mutate(iso_file2$method_info$standards, file_id="b")) |> select(file_id, everything())) expect_equal(iso_get_standards(c(iso_file1, iso_file2)), left_join(data, ref_ratios, by = "reference")) @@ -282,7 +282,7 @@ test_that("test that aggregating of methods standards works", { expect_equal( suppressWarnings(iso_get_standards( c(make_di_data_structure("NA"), iso_file1, iso_file2), - include_file_info = c("test_info")))$test_info %>% unique(), + include_file_info = c("test_info")))$test_info |> unique(), c("x", "y") ) @@ -309,7 +309,7 @@ test_that("test that aggregating of resistors works", { expect_silent(iso_get_resistors (c(iso_file1, iso_file2), quiet = TRUE)) expect_equal(iso_get_resistors (c(iso_file1, iso_file2)), data <- bind_rows(mutate(iso_file1$method_info$resistors, file_id="a"), - mutate(iso_file2$method_info$resistors, file_id="b")) %>% + mutate(iso_file2$method_info$resistors, file_id="b")) |> select(file_id, everything())) # select specific columns @@ -328,7 +328,7 @@ test_that("test that aggregating of resistors works", { expect_equal( suppressWarnings(iso_get_resistors ( c(make_iso_file_data_structure("NA"), iso_file1, iso_file2), - include_file_info = c("test_info")))$test_info %>% unique(), + include_file_info = c("test_info")))$test_info |> unique(), c("x", "y") ) @@ -372,12 +372,12 @@ test_that("test that aggregating of vendor data table works", { expect_message(agg <- iso_get_vendor_data_table(c(iso_file1, iso_file2), with_explicit_units = TRUE, quiet = FALSE), "aggregating") expect_equal(agg, vctrs::vec_rbind(mutate(iso_file1$vendor_data_table, file_id="a"), - mutate(iso_file2$vendor_data_table, file_id="b")) %>% - iso_make_units_explicit() %>% + mutate(iso_file2$vendor_data_table, file_id="b")) |> + iso_make_units_explicit() |> select(file_id, everything())) expect_equal(iso_get_vendor_data_table(c(iso_file1, iso_file2), with_explicit_units = FALSE), vctrs::vec_rbind(mutate(iso_file1$vendor_data_table, file_id="a"), - mutate(iso_file2$vendor_data_table, file_id="b")) %>% + mutate(iso_file2$vendor_data_table, file_id="b")) |> select(file_id, everything())) # selecting/renaming specific columns @@ -401,7 +401,7 @@ test_that("test that aggregating of vendor data table works", { expect_equal( suppressWarnings(iso_get_vendor_data_table( c(make_cf_data_structure("NA"), iso_file1, iso_file2), - include_file_info = c("test_info")))$test_info %>% unique(), + include_file_info = c("test_info")))$test_info |> unique(), c("x", "y") ) @@ -463,62 +463,62 @@ test_that("test that total data aggregation works", { # file_info expect_warning(iso_get_all_data(c(iso_file1, iso_file2), include_file_info = x), "unknown column") iso_file1$file_info$a <- 42 - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_file_info = c(x = a)) %>% unnest(file_info) %>% dplyr::pull(x), c(42, NA_real_)) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_file_info = c(x = a)) |> unnest(file_info) |> dplyr::pull(x), c(42, NA_real_)) # raw data - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(raw_data) %>% nrow(), 0L) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(raw_data) |> nrow(), 0L) iso_file1$raw_data <- tibble(tp = 1:10, time.s = tp*0.2, v44.mV = runif(10), v46.mV = runif(10), `r46/44` = v46.mV/v44.mV) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(raw_data) %>% dplyr::pull(file_id) %>% unique(), "a") + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(raw_data) |> dplyr::pull(file_id) |> unique(), "a") iso_file2$raw_data <- tibble(tp = 1:10, time.s = tp*0.2, v44.mV = runif(10), v46.mV = runif(10), v45.mV = runif(10)) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(raw_data) %>% dplyr::pull(file_id) %>% unique(), c("a", "b")) - expect_false("v44.mV" %in% (iso_get_all_data(c(iso_file1, iso_file2), include_raw_data = c(-v44.mV)) %>% unnest(raw_data) %>% names())) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(raw_data) |> dplyr::pull(file_id) |> unique(), c("a", "b")) + expect_false("v44.mV" %in% (iso_get_all_data(c(iso_file1, iso_file2), include_raw_data = c(-v44.mV)) |> unnest(raw_data) |> names())) # standards - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(standards) %>% nrow(), 0L) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(standards) |> nrow(), 0L) iso_file1 <- modifyList(iso_file, list(file_info = list(file_id = "a"), method_info = list(standards = tibble(standard = "test a")))) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(standards) %>% dplyr::pull(file_id) %>% unique(), "a") + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(standards) |> dplyr::pull(file_id) |> unique(), "a") iso_file2 <- modifyList(iso_file, list(file_info = list(file_id = "b"), method_info = list(standards = tibble(standard = "test a")))) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(standards) %>% dplyr::pull(file_id) %>% unique(), c("a", "b")) - expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(standards))) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(standards) |> dplyr::pull(file_id) |> unique(), c("a", "b")) + expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(standards))) expect_equal(select(out, standard), bind_rows(iso_file1$method_info$standards, iso_file2$method_info$standards)) - expect_false("standard" %in% names(iso_get_all_data(c(iso_file1, iso_file2), include_standards = c(-standard)) %>% unnest(standards))) + expect_false("standard" %in% names(iso_get_all_data(c(iso_file1, iso_file2), include_standards = c(-standard)) |> unnest(standards))) # resistors - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(resistors) %>% nrow(), 0L) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(resistors) |> nrow(), 0L) iso_file1 <- modifyList(iso_file, list(file_info = list(file_id = "a"), method_info = list(resistors = tibble(cup = 1:3, R.Ohm = c(1e9, 1e10, 1e11))))) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(resistors) %>% dplyr::pull(file_id) %>% unique(), "a") + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(resistors) |> dplyr::pull(file_id) |> unique(), "a") iso_file2 <- modifyList(iso_file, list(file_info = list(file_id = "b"), method_info = list(resistors = tibble(cup = 1:3, R.Ohm = c(3e9, 1e11, 1e12))))) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(resistors) %>% dplyr::pull(file_id) %>% unique(), c("a", "b")) - expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(resistors))) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(resistors) |> dplyr::pull(file_id) |> unique(), c("a", "b")) + expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(resistors))) expect_equal(select(out, cup, R.Ohm), bind_rows(iso_file1$method_info$resistors, iso_file2$method_info$resistors)) - expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2), include_resistors = c(-cup)) %>% unnest(resistors))) + expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2), include_resistors = c(-cup)) |> unnest(resistors))) expect_false("cup" %in% names(out)) # vendor data table - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(vendor_data_table) %>% nrow(), 0L) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(vendor_data_table) |> nrow(), 0L) iso_file1$vendor_data_table <- tibble(column1 = "col1 a", column2 = "col2 a") - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(vendor_data_table) %>% dplyr::pull(file_id) %>% unique(), "a") + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(vendor_data_table) |> dplyr::pull(file_id) |> unique(), "a") iso_file2$vendor_data_table <- tibble(column1 = "col1 b", column2 = "col2 b") - expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(vendor_data_table) %>% dplyr::pull(file_id) %>% unique(), c("a", "b")) - expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) %>% unnest(vendor_data_table))) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(vendor_data_table) |> dplyr::pull(file_id) |> unique(), c("a", "b")) + expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2)) |> unnest(vendor_data_table))) expect_equal(select(out,column1, column2), bind_rows(iso_file1$vendor_data_table, iso_file2$vendor_data_table) ) - expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2), include_vendor_data_table = c(x = column1)) %>% unnest(vendor_data_table))) + expect_true(is_tibble(out <- iso_get_all_data(c(iso_file1, iso_file2), include_vendor_data_table = c(x = column1)) |> unnest(vendor_data_table))) expect_equal(out$x, bind_rows(iso_file1$vendor_data_table, iso_file2$vendor_data_table)$column1) # problems - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) %>% unnest(problems) %>% nrow(), 0L) - iso_file1 <- iso_file1 %>% register_error("test", warn = FALSE) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) %>% unnest(problems) %>% dplyr::pull(file_id) %>% unique(), "a") - iso_file2 <- iso_file2 %>% register_error("test2", warn = FALSE) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) %>% unnest(problems) %>% dplyr::pull(file_id) %>% unique(), c("a", "b")) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) %>% select(file_id, problems) %>% unnest(problems), + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) |> unnest(problems) |> nrow(), 0L) + iso_file1 <- iso_file1 |> register_error("test", warn = FALSE) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) |> unnest(problems) |> dplyr::pull(file_id) |> unique(), "a") + iso_file2 <- iso_file2 |> register_error("test2", warn = FALSE) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) |> unnest(problems) |> dplyr::pull(file_id) |> unique(), c("a", "b")) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = everything()) |> select(file_id, problems) |> unnest(problems), iso_get_problems(c(iso_file1, iso_file2))) - expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = c(test = type)) %>% select(file_id, problems) %>% unnest(problems), - iso_get_problems(c(iso_file1, iso_file2)) %>% select(file_id, test = type)) + expect_equal(iso_get_all_data(c(iso_file1, iso_file2), include_problems = c(test = type)) |> select(file_id, problems) |> unnest(problems), + iso_get_problems(c(iso_file1, iso_file2)) |> select(file_id, test = type)) }) diff --git a/tests/testthat/test-continuous-flow.R b/tests/testthat/test-continuous-flow.R index b5ba249b..fb7d0992 100644 --- a/tests/testthat/test-continuous-flow.R +++ b/tests/testthat/test-continuous-flow.R @@ -1,11 +1,13 @@ context("Continuous flow") +# data structures ======== + test_that("test that supported cf files are correct", { initialize_options() expect_is(exts <- get_supported_cf_files(), "data.frame") expect_equal(exts$extension, c(".cf", ".cf.rds", ".dxf", ".iarc")) - expect_true(all(exts$func %>% sapply(class) == "character")) - expect_true(all(exts$func %>% map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) + expect_true(all(exts$func |> sapply(class) == "character")) + expect_true(all(exts$func |> map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) }) test_that("test that parameter checks are performed", { @@ -18,12 +20,18 @@ test_that("test that parameter checks are performed", { }) +# example files ======== + test_that("test that continous flow files can be read", { - # skip on CRAN to reduce checktime to below 10 minutes + # skip on CRAN to reduce check time to below 10 minutes skip_on_cran() - # test specific files + # skip if flag is set + if (identical(getOption("isoreader.skip_file_tests"), TRUE)) + skip("Currently not testing continuous flow example files.") + + # start tests for files iso_turn_reader_caching_off() # .cf file @@ -60,7 +68,7 @@ test_that("test that continous flow files can be read", { ) ) expect_equal( - iso_get_units(iso_get_vendor_data_table(cf)) %>% as.character(), + iso_get_units(iso_get_vendor_data_table(cf)) |> as.character(), c(NA, NA, "s", "s", "s", "mV", "mV", "mV", "mV", "mVs", "mVs", "mVs", "Vs", "Vs", "Vs", NA, NA, NA, NA, NA, "permil", "permil", NA, "permil", "%", NA) @@ -109,7 +117,7 @@ test_that("test that continous flow files can be read", { "d 17O/16O", "Rps 45CO2/44CO2", "Rps 46CO2/44CO2") ) expect_equal( - iso_get_units(iso_get_vendor_data_table(dxf)) %>% as.character(), + iso_get_units(iso_get_vendor_data_table(dxf)) |> as.character(), c(NA, NA, "s", "s", "s", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mV", "mVs", "mVs", "mVs", "mVs", "Vs", "Vs", "Vs", "Vs", "%", NA, NA, NA, NA, NA, "permil", "permil", @@ -127,10 +135,21 @@ test_that("test that continous flow files can be read", { }) +# additional test files ====== + test_that("test that additional continous flow files can be read", { - # additional test files (skip on CRAN because test files not includes due to tarball size limits) ===== + # skip on CRAN (test files too big and check time too long) skip_on_cran() + + # skip if flag is set + if (identical(getOption("isoreader.skip_file_tests"), TRUE)) + skip("Currently not testing additional continuous flow files.") + + # start tests for files + iso_turn_reader_caching_off() + + test_folder <- file.path("test_data") # test_folder <- file.path("tests", "testthat", "test_data") # direct iso_turn_reader_caching_off() @@ -470,13 +489,13 @@ test_that("test that additional continous flow files can be read", { `d 2H/1H` = "permil", `AT% 2H/1H` = "%", `Rps 3H2/2H2` = NA) ) - # test re-reading ======= + ## test re-reading ======= # NOTE: ideally this should also include an iarc file iso_files <- c(dxf1, dxf2, cf1) expect_true(iso_is_continuous_flow(reread_dxf <- reread_iso_files(iso_files))) expect_equal(nrow(problems(reread_dxf)), 0) - # test parallel processing ====== + ## test parallel processing ====== # multisession file_paths <- file.path(test_folder, diff --git a/tests/testthat/test-data-structures.R b/tests/testthat/test-data-structures.R index 096ce4a7..2f24c37f 100644 --- a/tests/testthat/test-data-structures.R +++ b/tests/testthat/test-data-structures.R @@ -109,17 +109,17 @@ test_that("test that data structure can be printed", { test_that("test that iso_file list checks work", { # empty iso file list doesn't break anything expect_is(iso_files <- iso_as_file_list(), "iso_file_list") - expect_error(make_cf_data_structure() %>% iso_as_file_list(), "encountered undefined.*file ID") + expect_error(make_cf_data_structure() |> iso_as_file_list(), "encountered undefined.*file ID") - expect_is(make_cf_data_structure("NA") %>% iso_as_file_list(), "iso_file_list") - expect_is(make_cf_data_structure("NA") %>% iso_as_file_list() %>% + expect_is(make_cf_data_structure("NA") |> iso_as_file_list(), "iso_file_list") + expect_is(make_cf_data_structure("NA") |> iso_as_file_list() |> iso_as_file_list(), "iso_file_list") - expect_equal(iso_as_file_list() %>% iso_get_problems() %>% nrow(), 0) - expect_equal(iso_as_file_list() %>% iso_get_problems() %>% names(), c("file_id", "type", "func", "details")) - expect_equal(iso_as_file_list() %>% iso_get_data_summary() %>% nrow(), 0) - expect_equal(iso_as_file_list() %>% iso_get_raw_data() %>% nrow(), 0) - expect_equal(iso_as_file_list() %>% iso_get_file_info() %>% nrow(), 0) - expect_equal(iso_as_file_list() %>% iso_get_resistors() %>% nrow(), 0) + expect_equal(iso_as_file_list() |> iso_get_problems() |> nrow(), 0) + expect_equal(iso_as_file_list() |> iso_get_problems() |> names(), c("file_id", "type", "func", "details")) + expect_equal(iso_as_file_list() |> iso_get_data_summary() |> nrow(), 0) + expect_equal(iso_as_file_list() |> iso_get_raw_data() |> nrow(), 0) + expect_equal(iso_as_file_list() |> iso_get_file_info() |> nrow(), 0) + expect_equal(iso_as_file_list() |> iso_get_resistors() |> nrow(), 0) # combining data structures with filled and unfilled file_datetime cf1 <- make_cf_data_structure("A") @@ -131,8 +131,8 @@ test_that("test that iso_file list checks work", { as_datetime(c(NA, cf2$file_info$file_datetime), tz = Sys.timezone())) # expected errors - expect_error(iso_as_file_list() %>% iso_get_vendor_data_table(), "only dual inlet.*continuous flow") - expect_error(iso_as_file_list() %>% iso_get_standards(), "only dual inlet.*continuous flow") + expect_error(iso_as_file_list() |> iso_get_vendor_data_table(), "only dual inlet.*continuous flow") + expect_error(iso_as_file_list() |> iso_get_standards(), "only dual inlet.*continuous flow") expect_error(iso_as_file_list(1, error = "test"), "encountered incompatible data type") expect_false(iso_is_file_list(42)) expect_false(iso_is_file_list(make_iso_file_data_structure("NA"))) @@ -193,9 +193,9 @@ test_that("test that can update read options", { test_that("test that isofils objects can be combined and subset", { expect_is(iso_file <- make_iso_file_data_structure("NA"), "iso_file") - expect_equal({ iso_fileA <- iso_file %>% { .$file_info$file_id <- "A"; . }; iso_fileA$file_info$file_id }, "A") - expect_equal({ iso_fileB <- iso_file %>% { .$file_info$file_id <- "B"; . }; iso_fileB$file_info$file_id }, "B") - expect_equal({ iso_fileC <- iso_file %>% { .$file_info$file_id <- "C"; . }; iso_fileC$file_info$file_id }, "C") + expect_equal({ iso_fileA <- iso_file; iso_fileA$file_info$file_id <- "A"; iso_fileA$file_info$file_id }, "A") + expect_equal({ iso_fileB <- iso_file; iso_fileB$file_info$file_id <- "B"; iso_fileB$file_info$file_id }, "B") + expect_equal({ iso_fileC <- iso_file; iso_fileC$file_info$file_id <- "C"; iso_fileC$file_info$file_id }, "C") # combinining iso_files expect_error(c(iso_fileA, 5), "can only process iso_file and iso_file\\_list") @@ -210,10 +210,10 @@ test_that("test that isofils objects can be combined and subset", { "duplicate files kept") expect_is(iso_filesABABC, "iso_file_list") expect_equal(names(iso_filesABABC), c("A#1", "B#1", "A#2", "B#2", "C")) - expect_equal(problems(iso_filesABABC) %>% select(file_id, type), tibble(file_id = c("A#1", "B#1", "A#2", "B#2"), type = "warning")) - expect_equal(problems(iso_filesABABC[[1]]) %>% select(type), tibble(type = "warning")) - expect_equal(problems(iso_filesABABC[[2]]) %>% select(type), tibble(type = "warning")) - expect_equal(problems(iso_filesABABC[[5]]) %>% select(type), tibble(type = character(0))) + expect_equal(problems(iso_filesABABC) |> select(file_id, type), tibble(file_id = c("A#1", "B#1", "A#2", "B#2"), type = "warning")) + expect_equal(problems(iso_filesABABC[[1]]) |> select(type), tibble(type = "warning")) + expect_equal(problems(iso_filesABABC[[2]]) |> select(type), tibble(type = "warning")) + expect_equal(problems(iso_filesABABC[[5]]) |> select(type), tibble(type = character(0))) expect_warning(warn_problems(iso_filesABABC), "encountered 4 problems") expect_warning(warn_problems(iso_filesABABC), "4 \\|") expect_warning(warn_problems(iso_filesABABC, cutoff = 3), "3-4") @@ -221,7 +221,7 @@ test_that("test that isofils objects can be combined and subset", { ## combining identical files (with discarding duplicates, i.e. default behavior) expect_message(iso_filesABA <- c(iso_fileA, iso_fileB, iso_fileA), "duplicate files encountered, only first kept") - expect_equal(problems(iso_filesABA) %>% select(file_id, type), tibble(file_id = "A", type = "warning")) + expect_equal(problems(iso_filesABA) |> select(file_id, type), tibble(file_id = "A", type = "warning")) expect_equal(names(iso_filesABA), c("A", "B")) expect_warning(warn_problems(iso_filesABA), "encountered 1 problem\\.") expect_equal(problems(c(iso_fileA, iso_fileA)), problems(c(iso_fileA, iso_fileA, iso_fileA))) @@ -233,10 +233,10 @@ test_that("test that isofils objects can be combined and subset", { register_warning(iso_fileB, "warning B", warn=FALSE)), "iso_file_list" ) - expect_equal(problems(iso_filesAB_probs) %>% select(file_id, details), + expect_equal(problems(iso_filesAB_probs) |> select(file_id, details), tibble(file_id = c("A", "B"), details = paste("warning", c("A", "B")))) expect_message(iso_files_ABB_probs <- c(iso_filesAB_probs, iso_fileB), "duplicate files encountered") - expect_equal(problems(iso_files_ABB_probs) %>% select(file_id, details), + expect_equal(problems(iso_files_ABB_probs) |> select(file_id, details), tibble(file_id = c("A", "B", "B"), details = c("warning A", "warning B", "duplicate files encountered, only first kept: B"))) @@ -266,7 +266,7 @@ test_that("test that isofils objects can be combined and subset", { expect_equal(names(iso_files), "B") # convertion to list - expect_equal(as.list(iso_filesABC) %>% class(), "list") + expect_equal(as.list(iso_filesABC) |> class(), "list") expect_equal(as.list(iso_filesABC)[[1]], iso_filesABC[[1]]) }) diff --git a/tests/testthat/test-deprecated.R b/tests/testthat/test-deprecated.R index 6e4e2b03..5e5da91a 100644 --- a/tests/testthat/test-deprecated.R +++ b/tests/testthat/test-deprecated.R @@ -1,30 +1,12 @@ context("Deprecated functions") test_that("test that deprecated functions give the appropriate error", { - expect_error(isoread(), "Deprecated") + + expect_warning(tryCatch(iso_get_data(), error = function(e){}), "deprecated") + expect_warning(tryCatch(iso_get_standards_info(), error = function(e){}), "deprecated") + expect_warning(tryCatch(iso_get_resistors_info(), error = function(e){}), "deprecated") + expect_warning(tryCatch(iso_omit_files_with_problems(), error = function(e){}), "deprecated") + expect_warning(tryCatch(iso_export_to_excel(), error = function(e){}), "deprecated") + expect_warning(tryCatch(iso_export_to_feather(), error = function(e){}), "deprecated") + }) - - -test_that("test that ratio functions throw deprecation error", { - expect_error(iso_calculate_ratios(), "moved to the isoprocessor") -}) - - -test_that("test that plottings functions throw deprecation error", { - expect_error(iso_convert_time(), "moved to the isoprocessor") - expect_error(iso_convert_signals(), "moved to the isoprocessor") -}) - - -test_that("test that plottings functions throw deprecation error", { - expect_error(iso_plot_raw_data(), "moved to the isoprocessor") - expect_error(iso_plot_dual_inlet_data(), "moved to the isoprocessor") - expect_error(iso_plot_continuous_flow_data(), "moved to the isoprocessor") -}) - - -test_that("test that cache files with errors is deprecated", { - expect_warning(tryCatch(iso_read_continuous_flow("NA", cache_files_with_errors = FALSE), error = function(e) {}, warning = function(w){ warning(w) }), "deprecated") - expect_warning(tryCatch(iso_read_dual_inlet("NA", cache_files_with_errors = FALSE), error = function(e) {}, warning = function(w){ warning(w) }), "deprecated") - expect_warning(tryCatch(iso_read_scan("NA", cache_files_with_errors = FALSE), error = function(e) {}, warning = function(w){ warning(w) }), "deprecated") -}) \ No newline at end of file diff --git a/tests/testthat/test-dual-inlet.R b/tests/testthat/test-dual-inlet.R index 853ca2d7..c0d73460 100644 --- a/tests/testthat/test-dual-inlet.R +++ b/tests/testthat/test-dual-inlet.R @@ -6,78 +6,82 @@ test_that("test that supported di files are correct", { initialize_options() expect_is(exts <- get_supported_di_files(), "data.frame") expect_equal(exts$extension, c(".caf", ".di.rds", ".did", ".txt")) - expect_true(all(exts$func %>% sapply(class) == "character")) - expect_true(all(exts$func %>% map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) + expect_true(all(exts$func |> sapply(class) == "character")) + expect_true(all(exts$func |> map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) }) test_that("test that parameter checks are performed", { - expect_error(iso_read_did(make_cf_data_structure("NA")), + expect_error(iso_read_did(make_cf_data_structure("NA")), "data structure must be a \\'dual_inlet\\' iso_file") - + }) # nu files ====== test_that("test that nu file processor works properly", { - + expect_error(group_lines(list())) expect_error(group_lines("")) - + }) -# actual files ======== +# example files ======== -test_that("test that dual inlet files can be read", { - - # skip on CRAN to reduce checktime to below 10 minutes +test_that("test that dual inlet example files can be read", { + + # skip on CRAN to reduce check time to below 10 minutes skip_on_cran() - # test specific files + # skip if flag is set + if (identical(getOption("isoreader.skip_file_tests"), TRUE)) + skip("Currently not testing dual inlet example files.") + + # start tests for files iso_turn_reader_caching_off() - + # .did expect_true(file.exists(file <- iso_get_reader_example("dual_inlet_example.did"))) expect_is(did <- iso_read_dual_inlet(file), "dual_inlet") expect_equal(nrow(problems(did)), 0) expect_equal(nrow(did$file_info), 1) expect_equal( - names(did$file_info), - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Line", "Peak Center", "Pressadjust", "Background", - "Identifier 1", "Identifier 2", "Analysis", "Method", "measurement_info", + names(did$file_info), + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Line", "Peak Center", "Pressadjust", "Background", + "Identifier 1", "Identifier 2", "Analysis", "Method", "measurement_info", "MS_integration_time.s") ) expect_equal(nrow(did$vendor_data_table), 7) expect_equal( iso_get_units(did$vendor_data_table), - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) - + # .caf expect_true(file.exists(file <- iso_get_reader_example("dual_inlet_example.caf"))) expect_is(caf <- iso_read_dual_inlet(file), "dual_inlet") expect_equal(nrow(problems(caf)), 0) expect_equal(nrow(caf$file_info), 1) expect_equal( - names(caf$file_info), - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Line", "Peak Center", "Pressadjust", "Background", - "Reference Refill", "Weight [mg]", "Sample", "Identifier 1", - "Identifier 2", "Analysis", "Comment", "Preparation", "Pre Script", + names(caf$file_info), + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Line", "Peak Center", "Pressadjust", "Background", + "Reference Refill", "Weight [mg]", "Sample", "Identifier 1", + "Identifier 2", "Analysis", "Comment", "Preparation", "Pre Script", "Post Script", "Method", "MS_integration_time.s") ) expect_equal(nrow(caf$vendor_data_table), 8) expect_equal( iso_get_units(caf$vendor_data_table), - c(cycle = NA, `d 45CO2/44CO2` = "permil", `d 46CO2/44CO2` = "permil", - `d 13C/12C` = "permil", `d 18O/16O` = "permil", `d 17O/16O` = " ", + c(cycle = NA, `d 45CO2/44CO2` = "permil", `d 46CO2/44CO2` = "permil", + `d 13C/12C` = "permil", `d 18O/16O` = "permil", `d 17O/16O` = " ", `d 47CO2/44CO2` = "permil", `d 48CO2/44CO2` = "permil", `d 49CO2/44CO2` = "permil" ) ) - + # .txt (nu) expect_true(file.exists(file <- iso_get_reader_example("dual_inlet_nu_example.txt"))) expect_is(did <- iso_read_dual_inlet(file, nu_masses = 49:44, read_cache = FALSE), "dual_inlet") @@ -85,161 +89,170 @@ test_that("test that dual inlet files can be read", { expect_warning(did <- iso_read_dual_inlet(file, read_cache = FALSE), "encountered 1 problem\\.") expect_true(stringr::str_detect(iso_get_problems(did)$details, fixed("found 6 channels but 0 masses were specified"))) expect_equal(nrow(problems(did)), 1) - + }) +# additional test files ====== + test_that("test that additional dual inlet files can be read", { - - # additional test files (skip on CRAN because test files not includes due to tarball size limits) ===== + + # skip on CRAN (test files too big and check time too long) skip_on_cran() - iso_turn_reader_caching_off() + # skip if flag is set + if (identical(getOption("isoreader.skip_file_tests"), TRUE)) + skip("Currently not testing additional dual inlet files.") + + # start tests for files + iso_turn_reader_caching_off() + # testing wrapper check_dual_inlet_test_file <- function(file, file_info_cols = NULL, data_table_nrow = NULL, data_table_col_units = NULL, ...) { file_path <- get_isoreader_test_file(file, local_folder = test_data) expect_true(file.exists(file_path)) expect_is(file <- iso_read_dual_inlet(file_path, ...), "dual_inlet") expect_equal(nrow(problems(file)), 0) - if (!is.null(file_info_cols)) + if (!is.null(file_info_cols)) expect_equal(names(file$file_info), file_info_cols) else dput(names(file$file_info)) - if (!is.null(data_table_nrow)) + if (!is.null(data_table_nrow)) expect_equal(nrow(file$vendor_data_table), data_table_nrow) else cat(nrow(file$vendor_data_table), ",\n") - if (!is.null(data_table_col_units)) + if (!is.null(data_table_col_units)) expect_equal(iso_get_units(file$vendor_data_table), data_table_col_units) else dput(iso_get_units(file$vendor_data_table)) return(invisible(file)) } - + # .did files test_data <- file.path("test_data") # test_data <- file.path("tests", "testthat", "test_data") # direct check_dual_inlet_test_file( "did_example_air.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s"), 15, - c(cycle = NA_character_, `d 32O2/28N2` = NA_character_, `d 32O2/29N2` = NA_character_, - `d 40Ar/28N2` = NA_character_, `d 32O2/40Ar` = NA_character_, + c(cycle = NA_character_, `d 32O2/28N2` = NA_character_, `d 32O2/29N2` = NA_character_, + `d 40Ar/28N2` = NA_character_, `d 32O2/40Ar` = NA_character_, `d 44CO2/28N2` = NA_character_, `d 44CO2/40Ar` = NA_character_ ) ) check_dual_inlet_test_file( "did_example_CO2_clumped_01.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s" ), 7, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) check_dual_inlet_test_file( "did_example_many_cycles.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Preparation", "Method", "measurement_info", "MS_integration_time.s" ), 50, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) check_dual_inlet_test_file( "did_example_unicode.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Line", "Sample", "Weight [mg]", "Identifier 1", - "Analysis", "Comment", "Preparation", "Method", "measurement_info", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Line", "Sample", "Weight [mg]", "Identifier 1", + "Analysis", "Comment", "Preparation", "Method", "measurement_info", "MS_integration_time.s"), 60, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 47CO2/44CO2` = NA_character_, `d 48CO2/44CO2` = NA_character_, - `d 49CO2/44CO2` = NA_character_, `d 13C/12C` = NA_character_, - `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 47CO2/44CO2` = NA_character_, `d 48CO2/44CO2` = NA_character_, + `d 49CO2/44CO2` = NA_character_, `d 13C/12C` = NA_character_, + `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) check_dual_inlet_test_file( "did_ultra_example.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Switch Cup Positions", "Baseline", - "Background", "Pressadjust", "Identifier 1", "Analysis", "Method", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Switch Cup Positions", "Baseline", + "Background", "Pressadjust", "Identifier 1", "Analysis", "Method", "measurement_info", "MS_integration_time.s"), 10, c(cycle = NA_character_, `d 17CH4/16CH4` = NA_character_) ) check_dual_inlet_test_file( "caf_example_CO2_01.caf", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Line", "Peak Center", "Pressadjust", "Background", - "Reference Refill", "Weight [mg]", "Sample", "Identifier 1", - "Identifier 2", "Analysis", "Comment", "Preparation", "Pre Script", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Line", "Peak Center", "Pressadjust", "Background", + "Reference Refill", "Weight [mg]", "Sample", "Identifier 1", + "Identifier 2", "Analysis", "Comment", "Preparation", "Pre Script", "Post Script", "Method", "MS_integration_time.s"), 8, - c(cycle = NA, `d 45CO2/44CO2` = "permil", `d 46CO2/44CO2` = "permil", - `d 13C/12C` = "permil", `d 18O/16O` = "permil", `d 17O/16O` = " ", + c(cycle = NA, `d 45CO2/44CO2` = "permil", `d 46CO2/44CO2` = "permil", + `d 13C/12C` = "permil", `d 18O/16O` = "permil", `d 17O/16O` = " ", `d 47CO2/44CO2` = "permil", `d 48CO2/44CO2` = "permil", `d 49CO2/44CO2` = "permil" ) ) - + # minimal files test_data <- file.path("minimal_data") # test_data <- file.path("tests", "testthat", "minimal_data") # direct did1 <- check_dual_inlet_test_file( "minimal_01.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s" ), 7, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) did2 <- check_dual_inlet_test_file( "minimal_02.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s" ), 7, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) did3 <- check_dual_inlet_test_file( "minimal_03.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s" ), 7, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) did4 <- check_dual_inlet_test_file( "minimal_04.did", - c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", - "file_size", "Row", "Peak Center", "Background", "Pressadjust", - "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", + c("file_id", "file_root", "file_path", "file_subpath", "file_datetime", + "file_size", "Row", "Peak Center", "Background", "Pressadjust", + "Reference Refill", "Identifier 1", "Identifier 2", "Analysis", "Comment", "Method", "measurement_info", "MS_integration_time.s" ), 7, - c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, - `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, + c(cycle = NA_character_, `d 45CO2/44CO2` = NA_character_, `d 46CO2/44CO2` = NA_character_, + `d 13C/12C` = NA_character_, `d 18O/16O` = NA_character_, `d 17O/16O` = NA_character_, `AT% 13C/12C` = NA_character_, `AT% 18O/16O` = NA_character_) ) expect_true(iso_is_file_list(dids <- c(did1, did2, did3, did4))) - expect_true(dids %>% iso_get_file_info(select = c(Comment, starts_with("MS"))) %>% - mutate(MSIT_correct = Comment == paste(MS_integration_time.s, "sec")) %>% - { all(.$MSIT_correct) }) - + expect_true(dids |> iso_get_file_info(select = c(Comment, starts_with("MS"))) |> + mutate(MSIT_correct = Comment == paste(MS_integration_time.s, "sec")) |> + dplyr::pull(MSIT_correct) |> + all()) + }) diff --git a/tests/testthat/test-export.R b/tests/testthat/test-export.R index 773f89d1..4352d6c4 100644 --- a/tests/testthat/test-export.R +++ b/tests/testthat/test-export.R @@ -36,7 +36,7 @@ test_that("test that export to rda works properly", { expect_true(file.exists(paste0(filepath, ".cf.rds"))) expect_message(cf_back <- iso_read_continuous_flow(paste0(filepath, ".cf.rds"), quiet = FALSE), "reading file") expect_equal(cf$raw_data, cf_back$raw_data) - expect_equal(cf$file_info %>% unnest(vector_test), cf_back$file_info %>% unnest(vector_test)) + expect_equal(cf$file_info |> unnest(vector_test), cf_back$file_info |> unnest(vector_test)) expect_equal(cf$method_info$standards, cf_back$method_info$standards) expect_equal(cf$method_info$resistors, cf_back$method_info$resistors) expect_equal(cf$vendor_data_table, cf_back$vendor_data_table) @@ -59,8 +59,8 @@ test_that("test that export to rda works properly", { expect_true(file.exists(str_c(filepath, ".di.rds"))) expect_message(di_example_back <- iso_read_dual_inlet(str_c(filepath, ".di.rds"), quiet = FALSE), "reading file") expect_equal(di_example$raw_data, di_example_back$raw_data) - expect_equal(di_example$file_info %>% unnest_aggregated_data_frame() %>% unnest(measurement_info), - di_example_back$file_info %>% unnest_aggregated_data_frame() %>% unnest(measurement_info)) + expect_equal(di_example$file_info |> unnest_aggregated_data_frame() |> unnest(measurement_info), + di_example_back$file_info |> unnest_aggregated_data_frame() |> unnest(measurement_info)) expect_equal(di_example$method_info$standards, di_example_back$method_info$standards) expect_equal(di_example$method_info$resistors, di_example_back$method_info$resistors) expect_equal(di_example$vendor_data_table, di_example_back$vendor_data_table) @@ -71,8 +71,8 @@ test_that("test that export to rda works properly", { expect_true(file.exists(str_c(filepath, ".cf.rds"))) expect_message(cf_examples_back <- suppressWarnings(iso_read_continuous_flow(str_c(filepath, ".cf.rds"), quiet = FALSE)), "reading 1 file") expect_equal(cf_example$raw_data, cf_examples_back[[1]]$raw_data) - expect_equal(cf_example$file_info %>% unnest_aggregated_data_frame(), - cf_examples_back[[1]]$file_info %>% unnest_aggregated_data_frame()) + expect_equal(cf_example$file_info |> unnest_aggregated_data_frame(), + cf_examples_back[[1]]$file_info |> unnest_aggregated_data_frame()) expect_equal(cf_example$method_info$standards, cf_examples_back[[1]]$method_info$standards) expect_equal(cf_example$method_info$resistors, cf_examples_back[[1]]$method_info$resistors) expect_equal(cf_example$vendor_data_table, cf_examples_back[[1]]$vendor_data_table) @@ -84,8 +84,8 @@ test_that("test that export to rda works properly", { expect_true(file.exists(str_c(filepath, ".scan.rds"))) expect_message(scan_example_back <- iso_read_scan(str_c(filepath, ".scan.rds"), quiet = FALSE), "reading file") expect_equal(scan_example$raw_data, scan_example_back$raw_data) - expect_equal(scan_example$file_info %>% unnest_aggregated_data_frame(), - scan_example_back$file_info %>% unnest_aggregated_data_frame()) + expect_equal(scan_example$file_info |> unnest_aggregated_data_frame(), + scan_example_back$file_info |> unnest_aggregated_data_frame()) expect_equal(scan_example$method_info$resistors, scan_example_back$method_info$resistors) expect_true(file.remove(str_c(filepath, ".scan.rds"))) }) @@ -103,9 +103,9 @@ test_that("test that export to Excel works properly", { skip("'readxl' package not installed - skipping Excel export tests") } - expect_error(iso_export_to_excel(42), "can only export iso files") - expect_error(iso_export_to_excel(make_cf_data_structure("NA")), "no filepath provided") - expect_error(iso_export_to_excel(make_cf_data_structure("NA"), file.path("DOESNOTEXIST", "test")), + expect_error(iso_export_files_to_excel(42), "can only export iso files") + expect_error(iso_export_files_to_excel(make_cf_data_structure("NA")), "no filepath provided") + expect_error(iso_export_files_to_excel(make_cf_data_structure("NA"), file.path("DOESNOTEXIST", "test")), "folder .* does not exist") # test data @@ -116,11 +116,12 @@ test_that("test that export to Excel works properly", { cf$raw_data <- tibble(time = (1:10)*0.1, m44 = (1:10)*0.2, m45 = (1:10)*0.3) cf$method_info$standards <- tibble(standard = "test a") cf$method_info$resistors <- tibble(cup = 1:3, R.Ohm = c(1e9, 1e10, 1e11)) - cf$vendor_data_table <- tibble(x = 1:5, y = letters[1:5]) %>% { attr(., "units") <- tibble(column=c("x", "y"), units = ""); . } + cf$vendor_data_table <- tibble(x = 1:5, y = letters[1:5]) + attr(cf$vendor_data_table, "units") <- tibble(column=c("x", "y"), units = "") filepath <- file.path(tempdir(), "test") # export and check - expect_message(cf_out <- iso_export_to_excel(cf, filepath, quiet = FALSE), "exporting data .* into Excel") + expect_message(cf_out <- iso_export_files_to_excel(cf, filepath, quiet = FALSE), "exporting data .* into Excel") expect_equal(names(cf), names(cf_out)) expect_true(is_tibble(cf$file_info)) expect_true(is_tibble(cf_out$file_info)) @@ -130,111 +131,111 @@ test_that("test that export to Excel works properly", { expect_equal(cf$vendor_data_table, cf_out$vendor_data_table) expect_true(file.exists(str_c(filepath, ".cf.xlsx"))) # note for comparisons: rounding is necessary because storage is not perfect numerically - expect_equal(iso_get_raw_data(cf) %>% + expect_equal(iso_get_raw_data(cf) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "raw data") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "raw data") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) - expect_equal(iso_get_file_info(cf) %>% collapse_list_columns(), + expect_equal(iso_get_file_info(cf) |> collapse_list_columns(), readxl::read_excel(str_c(filepath, ".cf.xlsx"), "file info", - col_types = c("text", "text", "text", "text", "numeric", "numeric", "text")) %>% + col_types = c("text", "text", "text", "text", "numeric", "numeric", "text")) |> mutate(file_datetime = as_datetime(file_datetime, tz = Sys.timezone()), file_size = as.integer(file_size))) expect_equal(iso_get_standards(cf), readxl::read_excel(str_c(filepath, ".cf.xlsx"), "standards", col_types = c("text", "text"))) expect_equal(iso_get_resistors(cf), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "resistors", col_types = c("text", "numeric", "numeric")) %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "resistors", col_types = c("text", "numeric", "numeric")) |> mutate(cup = as.integer(cup))) expect_equal(iso_get_vendor_data_table(cf), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") |> mutate(x = as.integer(x))) - expect_equal(iso_get_problems(cf) %>% select(file_id), + expect_equal(iso_get_problems(cf) |> select(file_id), readxl::read_excel(str_c(filepath, ".cf.xlsx"), "problems", col_types = c("text"))) expect_true(file.remove(str_c(filepath, ".cf.xlsx"))) # export real data files - dual inlet - expect_message(iso_export_to_excel(di_example, filepath, quiet = FALSE), "exporting data .* into Excel") + expect_message(iso_export_files_to_excel(di_example, filepath, quiet = FALSE), "exporting data .* into Excel") expect_true(file.exists(str_c(filepath, ".di.xlsx"))) - expect_equal(iso_get_raw_data(di_example) %>% + expect_equal(iso_get_raw_data(di_example) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".di.xlsx"), "raw data") %>% + readxl::read_excel(str_c(filepath, ".di.xlsx"), "raw data") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) - expect_equal(iso_get_file_info(di_example) %>% collapse_list_columns() %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !is.na(x)) %>% + expect_equal(iso_get_file_info(di_example) |> collapse_list_columns() |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !is.na(x)) |> select(-file_datetime), # never exactly identical, - readxl::read_excel(str_c(filepath, ".di.xlsx"), "file info") %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !is.na(x)) %>% + readxl::read_excel(str_c(filepath, ".di.xlsx"), "file info") |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !is.na(x)) |> select(-file_datetime)) expect_equal(iso_get_standards(di_example), readxl::read_excel(str_c(filepath, ".di.xlsx"), "standards")) expect_equal(iso_get_resistors(di_example), - readxl::read_excel(str_c(filepath, ".di.xlsx"), "resistors") %>% + readxl::read_excel(str_c(filepath, ".di.xlsx"), "resistors") |> mutate(cup = as.integer(cup))) - expect_equal(iso_get_vendor_data_table(di_example) %>% + expect_equal(iso_get_vendor_data_table(di_example) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".di.xlsx"), "vendor data table") %>% + readxl::read_excel(str_c(filepath, ".di.xlsx"), "vendor data table") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) - expect_equal(iso_get_problems(di_example) %>% select(file_id), + expect_equal(iso_get_problems(di_example) |> select(file_id), readxl::read_excel(str_c(filepath, ".di.xlsx"), "problems", col_types = c("text"))) expect_true(file.remove(str_c(filepath, ".di.xlsx"))) # export real data files - continuous flow cf_examples <- c(cf_example, cf_err_example) - expect_message(iso_export_to_excel(cf_examples, filepath, quiet = FALSE), "exporting data .* into Excel") + expect_message(iso_export_files_to_excel(cf_examples, filepath, quiet = FALSE), "exporting data .* into Excel") expect_true(file.exists(str_c(filepath, ".cf.xlsx"))) - expect_equal(iso_get_raw_data(cf_examples) %>% + expect_equal(iso_get_raw_data(cf_examples) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "raw data") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "raw data") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) - expect_equal(iso_get_file_info(cf_examples) %>% collapse_list_columns() %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !all(is.na(x))) %>% + expect_equal(iso_get_file_info(cf_examples) |> collapse_list_columns() |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !all(is.na(x))) |> select(-file_datetime), # never exactly identical, - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "file info") %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !all(is.na(x))) %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "file info") |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !all(is.na(x))) |> select(-file_datetime)) expect_equal(iso_get_standards(cf_examples), readxl::read_excel(str_c(filepath, ".cf.xlsx"), "standards")) expect_equal(iso_get_resistors(cf_examples), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "resistors") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "resistors") |> mutate(cup = as.integer(cup))) - expect_equal(iso_get_vendor_data_table(cf_examples) %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% + expect_equal(iso_get_vendor_data_table(cf_examples) |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> iso_strip_units(), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) expect_equal(iso_get_problems(cf_examples), readxl::read_excel(str_c(filepath, ".cf.xlsx"), "problems", col_types = c("text", "text", "text", "text"))) expect_true(file.remove(str_c(filepath, ".cf.xlsx"))) # export real data files with explicit units - expect_message(iso_export_to_excel(cf_example, filepath, with_explicit_units = TRUE, quiet = FALSE), "exporting data .* into Excel") + expect_message(iso_export_files_to_excel(cf_example, filepath, with_explicit_units = TRUE, quiet = FALSE), "exporting data .* into Excel") expect_true(file.exists(str_c(filepath, ".cf.xlsx"))) - expect_equal(iso_get_vendor_data_table(cf_example, with_explicit_units = TRUE) %>% + expect_equal(iso_get_vendor_data_table(cf_example, with_explicit_units = TRUE) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") %>% + readxl::read_excel(str_c(filepath, ".cf.xlsx"), "vendor data table") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) expect_true(file.remove(str_c(filepath, ".cf.xlsx"))) # export real data files - scan - expect_message(iso_export_to_excel(scan_example, filepath, quiet = FALSE), "exporting data .* into Excel") + expect_message(iso_export_files_to_excel(scan_example, filepath, quiet = FALSE), "exporting data .* into Excel") expect_true(file.exists(str_c(filepath, ".scan.xlsx"))) - expect_equal(iso_get_raw_data(scan_example) %>% + expect_equal(iso_get_raw_data(scan_example) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".scan.xlsx"), "raw data") %>% + readxl::read_excel(str_c(filepath, ".scan.xlsx"), "raw data") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) - expect_equal(iso_get_file_info(scan_example) %>% collapse_list_columns() %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !is.na(x)) %>% + expect_equal(iso_get_file_info(scan_example) |> collapse_list_columns() |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !is.na(x)) |> select(-file_datetime), # never exactly identical, - readxl::read_excel(str_c(filepath, ".scan.xlsx"), "file info") %>% - dplyr::mutate_if(.predicate = is.numeric, .funs = signif) %>% - dplyr::select_if(function(x) !is.na(x)) %>% + readxl::read_excel(str_c(filepath, ".scan.xlsx"), "file info") |> + dplyr::mutate_if(.predicate = is.numeric, .funs = signif) |> + dplyr::select_if(function(x) !is.na(x)) |> select(-file_datetime)) - expect_equal(iso_get_resistors(scan_example) %>% + expect_equal(iso_get_resistors(scan_example) |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif), - readxl::read_excel(str_c(filepath, ".scan.xlsx"), "resistors") %>% + readxl::read_excel(str_c(filepath, ".scan.xlsx"), "resistors") |> dplyr::mutate_if(.predicate = is.numeric, .funs = signif)) expect_true(file.remove(str_c(filepath, ".scan.xlsx"))) @@ -249,9 +250,9 @@ test_that("test that export to Feather works properly", { skip("'feather' package not installed - skipping feather export tests") } - expect_error(iso_export_to_feather(42), "can only export iso files") - expect_error(iso_export_to_feather(make_cf_data_structure("NA")), "no filepath provided") - expect_error(iso_export_to_feather(make_cf_data_structure("NA"), file.path("DOESNOTEXIST", "test")), + expect_error(iso_export_files_to_feather(42), "can only export iso files") + expect_error(iso_export_files_to_feather(make_cf_data_structure("NA")), "no filepath provided") + expect_error(iso_export_files_to_feather(make_cf_data_structure("NA"), file.path("DOESNOTEXIST", "test")), "folder .* does not exist") # test data @@ -262,11 +263,13 @@ test_that("test that export to Feather works properly", { cf$raw_data <- tibble(time = (1:10)*0.1, m44 = (1:10)*0.2, m45 = (1:10)*0.3) cf$method_info$standards <- tibble(standard = "test a") cf$method_info$resistors <- tibble(cup = 1:3, R.Ohm = c(1e9, 1e10, 1e11)) - cf$vendor_data_table <- tibble(x = 1:5, y = letters[1:5]) %>% { attr(., "units") <- tibble(column=c("x", "y"), units = ""); . } %>% convert_df_units_attr_to_implicit_units() + cf$vendor_data_table <- tibble(x = 1:5, y = letters[1:5]) + attr(cf$vendor_data_table, "units") <- tibble(column=c("x", "y"), units = "") + cf$vendor_data_table <- cf$vendor_data_table |> convert_df_units_attr_to_implicit_units() filepath <- file.path(tempdir(), "test") # export and check - expect_message(cf_out <- iso_export_to_feather(cf, filepath, quiet = FALSE), "exporting data .* into .cf.feather") + expect_message(cf_out <- iso_export_files_to_feather(cf, filepath, quiet = FALSE), "exporting data .* into .cf.feather") expect_equal(names(cf), names(cf_out)) expect_true(is_tibble(cf$file_info)) expect_true(is_tibble(cf_out$file_info)) @@ -283,7 +286,7 @@ test_that("test that export to Feather works properly", { # note for comparisons: rounding is NOT necessary because storage is equivalent to values in R expect_equal(iso_get_raw_data(cf), feather::read_feather(str_c(filepath, "_raw_data.cf.feather"))) expect_true(identical( - iso_get_file_info(cf) %>% collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.cf.feather")) + iso_get_file_info(cf) |> collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.cf.feather")) )) expect_equal(iso_get_standards(cf), feather::read_feather(str_c(filepath, "_standards.cf.feather"))) expect_equal(iso_get_resistors (cf), feather::read_feather(str_c(filepath, "_resistors.cf.feather"))) @@ -291,7 +294,7 @@ test_that("test that export to Feather works properly", { expect_true(all(file.remove(list.files(dirname(filepath), pattern = "\\.cf\\.feather$", full.names = TRUE)))) # export real data files - dual inlet - expect_message(iso_export_to_feather(di_example, filepath, quiet = FALSE), "exporting data .* into .di.feather") + expect_message(iso_export_files_to_feather(di_example, filepath, quiet = FALSE), "exporting data .* into .di.feather") expect_true(file.exists(str_c(filepath, "_raw_data.di.feather"))) expect_true(file.exists(str_c(filepath, "_file_info.di.feather"))) expect_true(file.exists(str_c(filepath, "_standards.di.feather"))) @@ -300,16 +303,16 @@ test_that("test that export to Feather works properly", { expect_true(file.exists(str_c(filepath, "_problems.di.feather"))) # note for comparisons: rounding is NOT necessary because storage is equivalent to values in R expect_equal(iso_get_raw_data(di_example), feather::read_feather(str_c(filepath, "_raw_data.di.feather"))) - expect_equal(iso_get_file_info(di_example) %>% collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.di.feather"))) + expect_equal(iso_get_file_info(di_example) |> collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.di.feather"))) expect_equal(iso_get_standards(di_example), feather::read_feather(str_c(filepath, "_standards.di.feather"))) expect_equal(iso_get_resistors (di_example), feather::read_feather(str_c(filepath, "_resistors.di.feather"))) expect_equal(iso_get_vendor_data_table(di_example), feather::read_feather(str_c(filepath, "_vendor_data_table.di.feather"))) - expect_equal(iso_get_problems(di_example) %>% select(file_id), feather::read_feather(str_c(filepath, "_problems.di.feather"))) + expect_equal(iso_get_problems(di_example) |> select(file_id), feather::read_feather(str_c(filepath, "_problems.di.feather"))) expect_true(all(file.remove(list.files(dirname(filepath), pattern = "\\.di\\.feather$", full.names = TRUE)))) # export real data files - continuous flow cf_examples <- c(cf_example, cf_err_example) - expect_message(iso_export_to_feather(cf_examples, filepath, quiet = FALSE), "exporting data .* into .cf.feather") + expect_message(iso_export_files_to_feather(cf_examples, filepath, quiet = FALSE), "exporting data .* into .cf.feather") expect_true(file.exists(str_c(filepath, "_raw_data.cf.feather"))) expect_true(file.exists(str_c(filepath, "_file_info.cf.feather"))) expect_true(file.exists(str_c(filepath, "_standards.cf.feather"))) @@ -318,29 +321,29 @@ test_that("test that export to Feather works properly", { expect_true(file.exists(str_c(filepath, "_problems.cf.feather"))) # note for comparisons: rounding is NOT necessary because storage is equivalent to values in R expect_equal(iso_get_raw_data(cf_examples), feather::read_feather(str_c(filepath, "_raw_data.cf.feather"))) - expect_equal(iso_get_file_info(cf_examples) %>% collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.cf.feather"))) + expect_equal(iso_get_file_info(cf_examples) |> collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.cf.feather"))) expect_equal(iso_get_standards(cf_examples), feather::read_feather(str_c(filepath, "_standards.cf.feather"))) expect_equal(iso_get_resistors (cf_examples), feather::read_feather(str_c(filepath, "_resistors.cf.feather"))) - expect_equal(iso_get_vendor_data_table(cf_examples) %>% iso_strip_units(), + expect_equal(iso_get_vendor_data_table(cf_examples) |> iso_strip_units(), feather::read_feather(str_c(filepath, "_vendor_data_table.cf.feather"))) expect_equal(iso_get_problems(cf_examples), feather::read_feather(str_c(filepath, "_problems.cf.feather"))) expect_true(all(file.remove(list.files(dirname(filepath), pattern = "\\.cf\\.feather$", full.names = TRUE)))) # export with explicit units - expect_message(iso_export_to_feather(cf_example, filepath, with_explicit_units = TRUE, quiet = FALSE), "exporting data .* into .cf.feather") + expect_message(iso_export_files_to_feather(cf_example, filepath, with_explicit_units = TRUE, quiet = FALSE), "exporting data .* into .cf.feather") expect_true(file.exists(str_c(filepath, "_vendor_data_table.cf.feather"))) expect_equal(iso_get_vendor_data_table(cf_example, with_explicit_units = TRUE), feather::read_feather(str_c(filepath, "_vendor_data_table.cf.feather"))) expect_true(all(file.remove(list.files(dirname(filepath), pattern = "\\.cf\\.feather$", full.names = TRUE)))) # export real data files - scan - expect_message(iso_export_to_feather(scan_example, filepath, quiet = FALSE), "exporting data .* into .scan.feather") + expect_message(iso_export_files_to_feather(scan_example, filepath, quiet = FALSE), "exporting data .* into .scan.feather") expect_true(file.exists(str_c(filepath, "_raw_data.scan.feather"))) expect_true(file.exists(str_c(filepath, "_file_info.scan.feather"))) expect_true(file.exists(str_c(filepath, "_resistors.scan.feather"))) # note for comparisons: rounding is NOT necessary because storage is equivalent to values in R expect_equal(iso_get_raw_data(scan_example), feather::read_feather(str_c(filepath, "_raw_data.scan.feather"))) - expect_equal(iso_get_file_info(scan_example) %>% collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.scan.feather"))) + expect_equal(iso_get_file_info(scan_example) |> collapse_list_columns(), feather::read_feather(str_c(filepath, "_file_info.scan.feather"))) expect_equal(iso_get_resistors (scan_example), feather::read_feather(str_c(filepath, "_resistors.scan.feather"))) expect_true(all(file.remove(list.files(dirname(filepath), pattern = "\\.scan\\.feather$", full.names = TRUE)))) diff --git a/tests/testthat/test-file-info-operations.R b/tests/testthat/test-file-info-operations.R index 7262fae8..986cc47c 100644 --- a/tests/testthat/test-file-info-operations.R +++ b/tests/testthat/test-file-info-operations.R @@ -43,27 +43,27 @@ test_that("Test that selecting/renaming file info works", { # select outcomes expect_equal( - iso_select_file_info(iso_files, y = new_info2, y = new_info3, file_specific = TRUE) %>% iso_get_file_info(), + iso_select_file_info(iso_files, y = new_info2, y = new_info3, file_specific = TRUE) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), y = c(NA, 2, 3)) ) expect_equal( - iso_select_file_info(iso_files, -starts_with("file"), -new_info) %>% iso_get_file_info(), + iso_select_file_info(iso_files, -starts_with("file"), -new_info) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), new_info2 = c(NA, 2, NA), new_info3 = c(NA, NA, 3)) ) expect_equal( - iso_select_file_info(iso_files, -starts_with("file"), -new_info, file_specific = TRUE) %>% iso_get_file_info(), + iso_select_file_info(iso_files, -starts_with("file"), -new_info, file_specific = TRUE) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), new_info2 = c(NA, 2, NA), new_info3 = c(NA, NA, 3)) ) expect_equal( - iso_select_file_info(iso_files, newer_info = new_info) %>% iso_get_file_info(), + iso_select_file_info(iso_files, newer_info = new_info) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), newer_info = 42) ) expect_equal( - iso_select_file_info(iso_files, newer_info = new_info, file_specific = TRUE) %>% iso_get_file_info(), + iso_select_file_info(iso_files, newer_info = new_info, file_specific = TRUE) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), newer_info = 42) ) expect_equal( - iso_select_file_info(iso_files, newest_info = new_info, new_info2, new_info2 = new_info3, file_specific = TRUE) %>% iso_get_file_info(), + iso_select_file_info(iso_files, newest_info = new_info, new_info2, new_info2 = new_info3, file_specific = TRUE) |> iso_get_file_info(), tibble(file_id = c("A", "B", "C"), newest_info = 42, new_info2 = c(NA, 2, 3)) ) @@ -86,16 +86,16 @@ test_that("Test that selecting/renaming file info works", { # rename outcomes expect_equal( - iso_rename_file_info(iso_files, y = new_info2, y = new_info3, file_specific = TRUE) %>% - iso_get_file_info() %>% select(file_id, new_info, y), + iso_rename_file_info(iso_files, y = new_info2, y = new_info3, file_specific = TRUE) |> + iso_get_file_info() |> select(file_id, new_info, y), tibble(file_id = c("A", "B", "C"), new_info = 42, y = c(NA, 2, 3)) ) expect_equal( - iso_rename_file_info(iso_files, newer_info = new_info) %>% iso_get_file_info() %>% select(file_id, newer_info), + iso_rename_file_info(iso_files, newer_info = new_info) |> iso_get_file_info() |> select(file_id, newer_info), tibble(file_id = c("A", "B", "C"), newer_info = 42) ) expect_equal( - iso_rename_file_info(iso_files, newest_info = new_info, new_info2 = new_info3, file_specific = TRUE) %>% iso_get_file_info() %>% select(file_id, newest_info, new_info2), + iso_rename_file_info(iso_files, newest_info = new_info, new_info2 = new_info3, file_specific = TRUE) |> iso_get_file_info() |> select(file_id, newest_info, new_info2), tibble(file_id = c("A", "B", "C"), newest_info = 42, new_info2 = c(NA, 2, 3)) ) @@ -161,14 +161,14 @@ test_that("Test that mutating file info works", { # mutating expect_equal( - mutate(iso_files) %>% iso_get_file_info(), - iso_files %>% iso_get_file_info()) + mutate(iso_files) |> iso_get_file_info(), + iso_files |> iso_get_file_info()) expect_equal( - mutate(iso_files, new_info = as.character(new_info)) %>% iso_get_file_info(), - iso_files %>% iso_get_file_info() %>% mutate(new_info = as.character(new_info))) + mutate(iso_files, new_info = as.character(new_info)) |> iso_get_file_info(), + iso_files |> iso_get_file_info() |> mutate(new_info = as.character(new_info))) expect_equal( - mutate(iso_files, new_info = iso_double_with_units(1:3, "s")) %>% iso_get_file_info(), - iso_files %>% iso_get_file_info() %>% mutate(new_info = iso_double_with_units(1:3, "s"))) + mutate(iso_files, new_info = iso_double_with_units(1:3, "s")) |> iso_get_file_info(), + iso_files |> iso_get_file_info() |> mutate(new_info = iso_double_with_units(1:3, "s"))) expect_true( iso_is_file_list( @@ -176,8 +176,8 @@ test_that("Test that mutating file info works", { mutate(iso_files, newest_info = case_when(new_info2 == 2 ~ 20, new_info3 == 3 ~ 30, TRUE ~ 00))) ) expect_equal( - mutated_iso_files %>% iso_get_file_info(), - iso_files %>% iso_get_file_info() %>% + mutated_iso_files |> iso_get_file_info(), + iso_files |> iso_get_file_info() |> mutate(newest_info = case_when(new_info2 == 2 ~ 20, new_info3 == 3 ~ 30, TRUE ~ 00)) ) @@ -189,8 +189,8 @@ test_that("Test that mutating file info works", { # mutate and iso_mutate_file_info equivalence expect_equal( - mutate(iso_files, newest_info = "A") %>% iso_get_file_info(), - iso_mutate_file_info(iso_files, newest_info = "A") %>% iso_get_file_info()) + mutate(iso_files, newest_info = "A") |> iso_get_file_info(), + iso_mutate_file_info(iso_files, newest_info = "A") |> iso_get_file_info()) # file root update ===== @@ -201,7 +201,7 @@ test_that("Test that mutating file info works", { expect_message(rerooted_files <- iso_set_file_root(iso_files, root = "test"), "setting file root for 3 data file.*to.*test") expect_equal(iso_get_file_info(rerooted_files, c(file_root, file_path)), - iso_get_file_info(iso_files, c(file_root, file_path)) %>% mutate(file_root = "test")) + iso_get_file_info(iso_files, c(file_root, file_path)) |> mutate(file_root = "test")) # with removing embedded root iso_files[[1]]$file_info$file_path <- "A/B/C/a.cf" @@ -211,12 +211,12 @@ test_that("Test that mutating file info works", { expect_warning(rerooted_files <- iso_set_file_root(iso_files, remove_embedded_root = "DNE"), "3/3 file paths do not include the embedded root") expect_equal(iso_get_file_info(rerooted_files, c(file_root, file_path)), - iso_get_file_info(iso_files, c(file_root, file_path)) %>% mutate(file_root = ".")) + iso_get_file_info(iso_files, c(file_root, file_path)) |> mutate(file_root = ".")) expect_warning(rerooted_files <- iso_set_file_root(iso_files, root = "test", remove_embedded_root = "A/B/C"), "1/3 file paths do not include the embedded root") expect_equal(iso_get_file_info(rerooted_files, c(file_root, file_path)), - iso_get_file_info(iso_files, c(file_root, file_path)) %>% + iso_get_file_info(iso_files, c(file_root, file_path)) |> mutate(file_path = str_replace(file_path, "A/B/C/", ""), file_root = "test")) expect_message(rerooted_files <- iso_set_file_root(iso_files, remove_embedded_root = "A/B"), @@ -224,7 +224,7 @@ test_that("Test that mutating file info works", { expect_message(rerooted_files <- iso_set_file_root(iso_files, root = "test", remove_embedded_root = "././A/B"), "setting file root for 3 data file.*to.*test.*removing embedded root.*A/B") expect_equal(iso_get_file_info(rerooted_files, c(file_root, file_path)), - iso_get_file_info(iso_files, c(file_root, file_path)) %>% + iso_get_file_info(iso_files, c(file_root, file_path)) |> mutate(file_path = str_replace(file_path, "A/B/", ""), file_root = "test")) @@ -286,17 +286,17 @@ test_that("Test that file info parsing works", { # outcome expect_equal(iso_get_file_info(no_effect_isos), iso_get_file_info(iso_files)) - expect_equal(iso_get_file_info(text_isos), iso_get_file_info(iso_files) %>% + expect_equal(iso_get_file_info(text_isos), iso_get_file_info(iso_files) |> mutate(new_info = as.character(new_info))) - expect_equal(iso_get_file_info(number_isos), iso_get_file_info(iso_files) %>% + expect_equal(iso_get_file_info(number_isos), iso_get_file_info(iso_files) |> mutate(new_info2 = parse_number(new_info2), new_info3 = parse_number(new_info3))) - expect_equal(iso_get_file_info(integer_isos), iso_get_file_info(iso_files) %>% + expect_equal(iso_get_file_info(integer_isos), iso_get_file_info(iso_files) |> mutate(new_info2 = parse_integer(new_info2))) - expect_equal(iso_get_file_info(double_isos), iso_get_file_info(iso_files) %>% + expect_equal(iso_get_file_info(double_isos), iso_get_file_info(iso_files) |> mutate(new_info2 = parse_double(new_info2))) - expect_equal(iso_get_file_info(datetime_isos)$new_info3, (iso_get_file_info(iso_files) %>% - mutate(new_info3 = parse_datetime(new_info3) %>% lubridate::with_tz(Sys.timezone())))$new_info3) + expect_equal(iso_get_file_info(datetime_isos)$new_info3, (iso_get_file_info(iso_files) |> + mutate(new_info3 = parse_datetime(new_info3) |> lubridate::with_tz(Sys.timezone())))$new_info3) }) @@ -373,7 +373,7 @@ test_that("Test that file info addition works", { # test with isofiles (not just in data frame) template <- make_cf_data_structure("NA") template$read_options$file_info <- TRUE - iso_files <- map(split(file_info, seq(nrow(file_info))), ~{ x <- template; x$file_info <- .x; x }) %>% + iso_files <- map(split(file_info, seq(nrow(file_info))), ~{ x <- template; x$file_info <- .x; x }) |> iso_as_file_list() expect_message( @@ -389,7 +389,7 @@ test_that("Test that file info addition works", { "'file_id' join.*3/3 new info.*matched 3" ) # check that iso files and df derived are the same - expect_equal(iso_files_out %>% iso_get_file_info(), df_out) + expect_equal(iso_files_out |> iso_get_file_info(), df_out) }) diff --git a/tests/testthat/test-file-utils.R b/tests/testthat/test-file-utils.R index 35d2d06c..c2d10029 100644 --- a/tests/testthat/test-file-utils.R +++ b/tests/testthat/test-file-utils.R @@ -23,7 +23,7 @@ test_that("binary isodat file navigation", { expect_error(move_to_pos(tl, 9), "exceeds position max") expect_equal(move_to_pos(tl, 9, reset_cap = TRUE)$pos, 9L) expect_error(cap_at_pos(tl, NULL), "cannot cap at position") - expect_error(move_to_pos(tl, 5) %>% cap_at_pos(4), "smaller than current position") + expect_error(move_to_pos(tl, 5) |> cap_at_pos(4), "smaller than current position") expect_equal(cap_at_pos(tl, 5)$max_pos, 5L) expect_equal(set_pos_and_cap(tl, 2, 4)[c("pos", "max_pos")], list(pos = 2L, max_pos = 4L)) diff --git a/tests/testthat/test-isoread.R b/tests/testthat/test-isoread.R index e2b3e5fd..c6ef30e0 100644 --- a/tests/testthat/test-isoread.R +++ b/tests/testthat/test-isoread.R @@ -6,16 +6,16 @@ test_that("test that file reader registration works", { initialize_options() expect_error(iso_register_continuous_flow_file_reader(".new", nrow), "please provide the function name") - expect_equal(iso_register_continuous_flow_file_reader(".new", "nrow") %>% dplyr::filter(extension == ".new") %>% nrow(), 1) - expect_equal(iso_register_continuous_flow_file_reader(".new", "nrow") %>% dplyr::filter(extension == ".new") %>% nrow(), 1) + expect_equal(iso_register_continuous_flow_file_reader(".new", "nrow") |> dplyr::filter(extension == ".new") |> nrow(), 1) + expect_equal(iso_register_continuous_flow_file_reader(".new", "nrow") |> dplyr::filter(extension == ".new") |> nrow(), 1) expect_error(iso_register_continuous_flow_file_reader(".new", "mean"), "already exists") expect_warning(new <- iso_register_continuous_flow_file_reader(".new", "mean", overwrite = TRUE), "will be overwritte") - expect_equal(new %>% dplyr::filter(extension == ".new") %>% nrow(), 1) + expect_equal(new |> dplyr::filter(extension == ".new") |> nrow(), 1) expect_error(iso_register_continuous_flow_file_reader(".new2", "THISFUNCTIONDOESNOTEXIST"), "could not find function") .GlobalEnv$iso_is_file <- function() stop("testing") expect_error(iso_register_continuous_flow_file_reader(".new2", "iso_is_file"), "exists in more than one environment") - expect_equal(iso_register_continuous_flow_file_reader(".new2", "iso_is_file", env = "isoreader") %>% - dplyr::filter(extension == ".new") %>% nrow(), 1) + expect_equal(iso_register_continuous_flow_file_reader(".new2", "iso_is_file", env = "isoreader") |> + dplyr::filter(extension == ".new") |> nrow(), 1) rm("iso_is_file", envir = .GlobalEnv) }) @@ -121,7 +121,7 @@ test_that("test that version checking and re-reads are working properly", { temp_cache <- file.path(tempdir(), "cache_files") temp_storage <- file.path(tempdir(), "scan_storage_old.scan.rds") dir.create(temp_cache, showWarnings = FALSE) - save_files <- files %>% iso_set_file_root(remove_embedded_root = data_folder) + save_files <- files |> iso_set_file_root(remove_embedded_root = data_folder) readr::write_rds(save_files[[1]], file.path(temp_cache, basename(generate_cache_filepaths(test_files)[1]))) readr::write_rds(save_files[[2]], file.path(temp_cache, basename(generate_cache_filepaths(test_files)[2]))) readr::write_rds(save_files[[3]], file.path(temp_cache, basename(generate_cache_filepaths(test_files)[3]))) @@ -188,11 +188,11 @@ test_that("test that version checking and re-reads are working properly", { expect_true(is_iso_object_outdated(re_files)) # re-read with changed root - expect_message(files %>% iso_set_file_root(root = "DNE") %>% reread_iso_files(), "Warning.*3 file.*not exist") - expect_error(files %>% iso_set_file_root(root = "DNE") %>% reread_iso_files(stop_if_missing = TRUE), "3 file.*not exist") + expect_message(files |> iso_set_file_root(root = "DNE") |> reread_iso_files(), "Warning.*3 file.*not exist") + expect_error(files |> iso_set_file_root(root = "DNE") |> reread_iso_files(stop_if_missing = TRUE), "3 file.*not exist") expect_message( - files %>% - iso_set_file_root(root = data_folder, remove_embedded_root = data_folder) %>% iso_reread_all_files(), + files |> + iso_set_file_root(root = data_folder, remove_embedded_root = data_folder) |> iso_reread_all_files(), "found 3.*re-reading 3/3" ) @@ -206,17 +206,17 @@ test_that("test that version checking and re-reads are working properly", { expect_true(nrow(problems(files)) == 4) expect_true(is_iso_object_outdated(files)) expect_false(is_iso_object_outdated(files[[3]])) - expect_message(re_files <- files %>% iso_set_file_root(data_folder) %>% iso_reread_outdated_files(), + expect_message(re_files <- files |> iso_set_file_root(data_folder) |> iso_reread_outdated_files(), "found 2 outdated.*re-reading 2/3") expect_false(is_iso_object_outdated(re_files)) expect_true(nrow(problems(re_files)) == 1) - expect_message(re_files <- files %>% iso_set_file_root(data_folder) %>% iso_reread_changed_files(), + expect_message(re_files <- files |> iso_set_file_root(data_folder) |> iso_reread_changed_files(), "found 0 changed.*re-reading 0/3") - expect_message(re_files <- files %>% iso_set_file_root(data_folder) %>% iso_reread_problem_files(), + expect_message(re_files <- files |> iso_set_file_root(data_folder) |> iso_reread_problem_files(), "found 1.*with errors.*re-reading 1/3") expect_true(is_iso_object_outdated(re_files)) expect_true(nrow(problems(re_files)) == 3) - expect_message(re_files <- files %>% iso_set_file_root(data_folder) %>% iso_reread_problem_files(reread_files_with_warnings = TRUE), + expect_message(re_files <- files |> iso_set_file_root(data_folder) |> iso_reread_problem_files(reread_files_with_warnings = TRUE), "found 3.*with warnings or errors.*re-reading 3/3") expect_false(is_iso_object_outdated(re_files)) expect_true(nrow(problems(re_files)) == 0) diff --git a/tests/testthat/test-nse.R b/tests/testthat/test-nse.R index 2c1c45e4..46b5f220 100644 --- a/tests/testthat/test-nse.R +++ b/tests/testthat/test-nse.R @@ -2,7 +2,7 @@ context("Standard and non-standard evaluation") test_that("Getting column names (with # and type requirement checks) works", { - df <- dplyr::as_tibble(mtcars) %>% tibble::rownames_to_column() + df <- dplyr::as_tibble(mtcars) |> tibble::rownames_to_column() # basic errors expect_error(get_column_names(), "no data frame supplied") diff --git a/tests/testthat/test-problems.R b/tests/testthat/test-problems.R index 7150c8b0..10748d71 100644 --- a/tests/testthat/test-problems.R +++ b/tests/testthat/test-problems.R @@ -11,7 +11,7 @@ test_that("Test that problem registration and reporting works properly", { as.character(y) }, x) expect_equal(n_problems(y), 0L) - expect_equal(problems(y) %>% names(), c("type", "func", "details")) + expect_equal(problems(y) |> names(), c("type", "func", "details")) # test auto initialize of get_problems expect_equal(get_problems(x), get_problems(y)) @@ -24,7 +24,7 @@ test_that("Test that problem registration and reporting works properly", { }, x) expect_equal(n_problems(y), 1) - expect_equal(problems(y) %>% select(func, details), tibble(func = "testing", details = "problem")) + expect_equal(problems(y) |> select(func, details), tibble(func = "testing", details = "problem")) # add another problem expect_equal({ @@ -32,7 +32,7 @@ test_that("Test that problem registration and reporting works properly", { as.character(z) }, x) expect_equal(n_problems(z), 2) - expect_equal(problems(z) %>% select(details, code), tibble(details = c("problem", "problem2"), code = c(NA, 5))) + expect_equal(problems(z) |> select(details, code), tibble(details = c("problem", "problem2"), code = c(NA, 5))) # stop for problems expect_error(stop_for_problems(z), "2 parsing failures") @@ -42,19 +42,19 @@ test_that("Test that problem registration and reporting works properly", { test_that("Test that problems set for iso_file lists get propagated to all files", { # propagate problems for iso_files expect_is(iso_file <- make_iso_file_data_structure("NA"), "iso_file") - iso_file1 <- iso_file %>% { .$file_info$file_id <- "A"; . } - iso_file2 <- iso_file %>% { .$file_info$file_id <- "B"; . } + iso_file1 <- iso_file; iso_file1$file_info$file_id <- "A" + iso_file2 <- iso_file; iso_file2$file_info$file_id <- "B" expect_is(iso_files <- c(iso_file1, iso_file2), "iso_file_list") - expect_equal(problems(iso_files) %>% nrow(), 0L) + expect_equal(problems(iso_files) |> nrow(), 0L) expect_error(iso_has_problems(), "provide iso_files") expect_false(iso_has_problems(iso_files)) expect_is(iso_files_w_probs <- register_problem(iso_files, type = "test"), "iso_file_list") expect_true(iso_has_problems(iso_files_w_probs)) - expect_equal(problems(iso_files_w_probs) %>% select(file_id, type), + expect_equal(problems(iso_files_w_probs) |> select(file_id, type), tibble(file_id = c("A", "B"), type = c("test"))) - expect_equal(problems(iso_files_w_probs[[1]]) %>% select(type), tibble(type = "test")) - expect_equal(problems(iso_files_w_probs[[2]]) %>% select(type), tibble(type = "test")) + expect_equal(problems(iso_files_w_probs[[1]]) |> select(type), tibble(type = "test")) + expect_equal(problems(iso_files_w_probs[[2]]) |> select(type), tibble(type = "test")) }) test_that("Test that warning and error registration works properly", { @@ -65,14 +65,14 @@ test_that("Test that warning and error registration works properly", { expect_silent(y <- register_warning(x, details = "problem", warn = FALSE)) expect_equal(as.character(y), x) expect_equal(n_problems(y), 1) - expect_equal(problems(y) %>% select(type, details), tibble(type = "warning", details = "problem")) + expect_equal(problems(y) |> select(type, details), tibble(type = "warning", details = "problem")) # add an error expect_message(y <- register_error(x, details = "problem", warn = TRUE), "caught error - problem") expect_silent(y <- register_error(x, details = "problem", warn = FALSE)) expect_equal(as.character(y), x) expect_equal(n_problems(y), 1) - expect_equal(problems(y) %>% select(type, details), tibble(type = "error", details = "problem")) + expect_equal(problems(y) |> select(type, details), tibble(type = "error", details = "problem")) }) test_that("Combing problems works properly", { @@ -98,24 +98,21 @@ test_that("Test that removing files with errors works properly", { expect_equal(iso_filter_files_with_problems(iso_files, remove_files_with_errors = FALSE, remove_files_with_warnings = FALSE), iso_files) expect_message(iso_filter_files_with_problems(iso_files, quiet = FALSE), "removing") expect_silent(iso_filter_files_with_problems(iso_files, quiet = TRUE)) - expect_equal(iso_filter_files_with_problems(iso_files) %>% # default parameters - sapply(function(x) x$file_info$file_id) %>% as.character(), + expect_equal(iso_filter_files_with_problems(iso_files) |> # default parameters + sapply(function(x) x$file_info$file_id) |> as.character(), c("A", "B")) - expect_equal(iso_filter_files_with_problems(iso_files, remove_files_with_errors = FALSE, remove_files_with_warnings = TRUE) %>% - sapply(function(x) x$file_info$file_id) %>% as.character(), + expect_equal(iso_filter_files_with_problems(iso_files, remove_files_with_errors = FALSE, remove_files_with_warnings = TRUE) |> + sapply(function(x) x$file_info$file_id) |> as.character(), c("A", "C")) - expect_equal(iso_filter_files_with_problems(iso_files, remove_files_with_warnings = TRUE) %>% - sapply(function(x) x$file_info$file_id) %>% as.character(), + expect_equal(iso_filter_files_with_problems(iso_files, remove_files_with_warnings = TRUE) |> + sapply(function(x) x$file_info$file_id) |> as.character(), c("A")) - # deprecated iso_omit_files_with_problems - expect_warning(iso_omit_files_with_problems(iso_files), "renamed.*will be removed") - }) test_that("actual problem file works", { expect_warning( - err <- iso_read_continuous_flow(system.file("errdata", "cf_without_data.dxf", package = "isoreader")), + err <- iso_read_continuous_flow(system.file("errdata", "cf_without_data.dxf", package = "isoreader"), read_cache = FALSE), "encountered 1 problem\\.") expect_warning( # warnings cannot be quieted with quiet err <- iso_read_continuous_flow(system.file("errdata", "cf_without_data.dxf", package = "isoreader"), quiet = TRUE), @@ -127,7 +124,8 @@ test_that("actual problem file works", { file_id = "cf_without_data.dxf", type = "error", func = "extract_dxf_raw_voltage_data", - details = "cannot identify measured masses - block 'CEvalDataIntTransferPart' not found after position 1 (pos 65327)" + details = "cannot identify measured masses - block 'CEvalDataIntTransferPart' not found after position 1 (nav block#1 'CFileHeader', pos 65327, max 119237)" ) ) + }) diff --git a/tests/testthat/test-scan.R b/tests/testthat/test-scan.R index 5b0dbf41..6f328140 100644 --- a/tests/testthat/test-scan.R +++ b/tests/testthat/test-scan.R @@ -4,8 +4,8 @@ test_that("test that supported scan files are correct", { initialize_options() expect_is(exts <- get_supported_scan_files(), "data.frame") expect_equal(exts$extension, c(".scan.rds", ".scn")) - expect_true(all(exts$func %>% sapply(class) == "character")) - expect_true(all(exts$func %>% map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) + expect_true(all(exts$func |> sapply(class) == "character")) + expect_true(all(exts$func |> map_lgl(exists, mode = "function", where = asNamespace("isoreader")))) }) test_that("test that parameter checks are performed", { diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R index 833b7845..75cfb446 100644 --- a/tests/testthat/test-settings.R +++ b/tests/testthat/test-settings.R @@ -46,8 +46,8 @@ test_that("info messages can be switched for just one function", { test_that("info message functions can be part of a pipeline", { df <- tibble(a = 1:5) - expect_equal(df %>% iso_turn_info_messages_on(), df) - expect_equal(df %>% iso_turn_info_messages_off(), df) + expect_equal(df |> iso_turn_info_messages_on(), df) + expect_equal(df |> iso_turn_info_messages_off(), df) }) test_that("test that caching can be turned on/off", { diff --git a/tests/testthat/test-units.R b/tests/testthat/test-units.R index d170fc8d..ad2a74e7 100644 --- a/tests/testthat/test-units.R +++ b/tests/testthat/test-units.R @@ -40,8 +40,8 @@ test_that("test that units class works properly", { expect_warning(out <- vctrs::vec_c(x, y), "different units") expect_equal(out, c(data, data)) expect_is(iso_double_with_units(x), "iso_double_with_units") - expect_equal(iso_double_with_units(x) %>% iso_get_units(), "undefined units") - expect_equal(iso_double_with_units(x, units = "new units") %>% iso_get_units(), "new units") + expect_equal(iso_double_with_units(x) |> iso_get_units(), "undefined units") + expect_equal(iso_double_with_units(x, units = "new units") |> iso_get_units(), "new units") # double expect_is(vctrs::vec_ptype2(x, double()), "numeric") expect_is(vctrs::vec_ptype2(double(), x), "numeric") @@ -69,23 +69,23 @@ test_that("test that units class works properly", { expect_equal(iso_get_units(iso_double_with_units()), "undefined units") expect_equal(iso_get_units(x), "permil") expect_equal(iso_get_units(y), "not permil") - expect_equal(data.frame(x = x, y = y, z = 42) %>% iso_get_units(), + expect_equal(data.frame(x = x, y = y, z = 42) |> iso_get_units(), c(x = "permil", y = "not permil", z = NA_character_)) # strip units expect_equal(iso_strip_units(42.), 42.) - expect_equal(iso_strip_units(42) %>% iso_get_units(), NA_character_) + expect_equal(iso_strip_units(42) |> iso_get_units(), NA_character_) expect_equal(iso_strip_units(x), data) - expect_equal(iso_strip_units(x) %>% iso_get_units(), NA_character_) - expect_equal(data.frame(x = x, y = y, z = 42) %>% iso_strip_units() %>% iso_get_units(), + expect_equal(iso_strip_units(x) |> iso_get_units(), NA_character_) + expect_equal(data.frame(x = x, y = y, z = 42) |> iso_strip_units() |> iso_get_units(), c(x = NA_character_, y = NA_character_, z = NA_character_)) - expect_equal(data.frame(x = x, y = y, z = 42) %>% iso_strip_units(), + expect_equal(data.frame(x = x, y = y, z = 42) |> iso_strip_units(), data.frame(x = data, y = data, z = 42)) # is this what should happen for lists? - expect_equal(list(x = x, y = y, z = 42) %>% iso_get_units(), NA_character_) - expect_equal(list(x = x, y = y, z = 42) %>% purrr::map_chr(iso_get_units), c(x = "permil", y = "not permil", z = NA_character_)) - expect_equal(list(x = x, y = y, z = 42) %>% iso_strip_units() %>% purrr::map_chr(iso_get_units), c(x = "permil", y = "not permil", z = NA_character_)) + expect_equal(list(x = x, y = y, z = 42) |> iso_get_units(), NA_character_) + expect_equal(list(x = x, y = y, z = 42) |> purrr::map_chr(iso_get_units), c(x = "permil", y = "not permil", z = NA_character_)) + expect_equal(list(x = x, y = y, z = 42) |> iso_strip_units() |> purrr::map_chr(iso_get_units), c(x = "permil", y = "not permil", z = NA_character_)) # implicit / explicit units expect_error(iso_make_units_explicit(42), "only.*data frames") @@ -99,7 +99,7 @@ test_that("test that units class works properly", { expect_equal(names(out), c("x.permil", "y.not permil")) expect_equal( iso_make_units_explicit(tibble(x = x, y = y)), - iso_make_units_explicit(tibble(x = x, y = y)) %>% iso_make_units_explicit() + iso_make_units_explicit(tibble(x = x, y = y)) |> iso_make_units_explicit() ) expect_error(iso_make_units_implicit(42), "only.*data frames") expect_error(iso_make_units_implicit(tibble(), prefix = ""), "must be at least 1") @@ -113,9 +113,9 @@ test_that("test that units class works properly", { expect_is(out <- iso_make_units_implicit(tibble(x.permil = data), prefix = ".", suffix = ""), "tbl_df") expect_equal(names(out), "x") expect_equal(iso_get_units(out), c(x = "permil")) - expect_equal(iso_make_units_implicit(tibble(`x [permil]` = data, `y [not permil]` = data, other = data)) %>% iso_make_units_explicit(), + expect_equal(iso_make_units_implicit(tibble(`x [permil]` = data, `y [not permil]` = data, other = data)) |> iso_make_units_explicit(), tibble(`x [permil]` = data, `y [not permil]` = data, other = data)) - expect_equal(iso_make_units_explicit(tibble(x = x, y = y)) %>% iso_make_units_implicit(), tibble(x = x, y = y)) + expect_equal(iso_make_units_explicit(tibble(x = x, y = y)) |> iso_make_units_implicit(), tibble(x = x, y = y)) # printout expect_equal(vctrs::vec_ptype_full(x), "double in 'permil'") @@ -133,11 +133,11 @@ test_that("test that units class works properly", { expect_equal( vctrs::vec_rbind(tibble(x = x), tibble(y = y))$x, vctrs::vec_c(x, rep(NA, length(y)))) expect_equal( vctrs::vec_rbind(tibble(x = x), tibble(y = y))$y, vctrs::vec_c(rep(NA, length(x)), y)) expect_equal( - tibble(a = c("a", "b"), x = purrr::map(a, ~x)) %>% tidyr::unnest(x), + tibble(a = c("a", "b"), x = purrr::map(a, ~x)) |> tidyr::unnest(x), tibble(a = rep(c("a", "b"), each = length(data)), x = iso_double_with_units(c(data, data), "permil")) ) expect_equal( - tibble(a = c("a", "b"), x = purrr::map(a, ~tibble(x=x, y=y))) %>% tidyr::unnest(x), + tibble(a = c("a", "b"), x = purrr::map(a, ~tibble(x=x, y=y))) |> tidyr::unnest(x), tibble(a = rep(c("a", "b"), each = length(data)), x = iso_double_with_units(c(data, data), "permil"), y = iso_double_with_units(c(data, data), "not permil")) ) diff --git a/tests/testthat/test-utils-source-information.R b/tests/testthat/test-utils-source-information.R new file mode 100644 index 00000000..9a88b307 --- /dev/null +++ b/tests/testthat/test-utils-source-information.R @@ -0,0 +1,11 @@ +# file info ==== + +context("Source File Information") + +test_that("test iso_get_source_file_structure()", { + + expect_error(iso_get_source_file_structure(), "`iso_file` has to be an iso file object") + expect_error(iso_get_source_file_structure(42), "`iso_file` has to be an iso file object") + + +}) \ No newline at end of file diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 47cd3805..cdc2e8b7 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -255,18 +255,18 @@ test_that("test that retrieving file paths works correctly", { expect_error(iso_expand_paths(c(".", ".", "."), root = c(".", ".")), "one entry or be of the same length") expect_error(iso_expand_paths(""), "empty paths .* are not valid") expect_error(iso_expand_paths(system.file("extdata", package = "isoreader")), "no extensions") - expect_error(iso_expand_paths(system.file("extdata", package = "isoreader") %>% list.files(full.names = TRUE), "did"), + expect_error(iso_expand_paths(system.file("extdata", package = "isoreader") |> list.files(full.names = TRUE), "did"), "do not have one of the supported extensions") # check expected result - direct_list <- system.file("extdata", package = "isoreader") %>% list.files(full.names = T, pattern = "\\.(dxf|did|cf)$") + direct_list <- system.file("extdata", package = "isoreader") |> list.files(full.names = T, pattern = "\\.(dxf|did|cf)$") expect_identical( tibble(root = ".", path = direct_list), - system.file("extdata", package = "isoreader") %>% iso_expand_paths(c("did", "dxf", "cf")) + system.file("extdata", package = "isoreader") |> iso_expand_paths(c("did", "dxf", "cf")) ) expect_identical( - tibble(root = ".", path = direct_list %>% {.[c(2,1,3:length(.))]}), - c(direct_list[2], system.file("extdata", package = "isoreader")) %>% iso_expand_paths(c("did", "dxf", "cf")) + tibble(root = ".", path = direct_list[c(2,1,3:length(direct_list))]), + c(direct_list[2], system.file("extdata", package = "isoreader")) |> iso_expand_paths(c("did", "dxf", "cf")) ) expect_warning( @@ -287,7 +287,7 @@ test_that("test that column name checks work correctly", { test_that("test that get support file types are listed", { expect_true(is.data.frame(iso_get_supported_file_types())) expect_equal( - iso_get_supported_file_types() %>% names(), + iso_get_supported_file_types() |> names(), c("type", "extension", "software", "description", "call") ) }) @@ -311,7 +311,7 @@ test_that("test that error catching works correctly", { expect_error(exec_func_with_error_catch(function(x) stop("problem"), 1), "problem") expect_equal( {suppressMessages(iso_turn_debug_off()); default(debug)}, FALSE) expect_message(y <- exec_func_with_error_catch(function(x) stop("problem"), 1), "problem") - expect_equal(problems(y) %>% select(type, details), tibble(type = "error", details = "problem")) + expect_equal(problems(y) |> select(type, details), tibble(type = "error", details = "problem")) }) diff --git a/vignettes/continuous_flow.Rmd b/vignettes/continuous_flow.Rmd index e1fde121..525cbb08 100644 --- a/vignettes/continuous_flow.Rmd +++ b/vignettes/continuous_flow.Rmd @@ -40,7 +40,7 @@ Reading continuous flow files is as simple as passing one or multiple file or fo ```{r, message = FALSE} # all available examples -iso_get_reader_examples() %>% knitr::kable() +iso_get_reader_examples() |> knitr::kable() ``` ```{r} @@ -58,7 +58,7 @@ cf_files <- The `cf_files` variable now contains a set of isoreader objects, one for each file. Take a look at what information was retrieved from the files using the `iso_get_data_summary()` function. ```{r} -cf_files %>% iso_get_data_summary() %>% knitr::kable() +cf_files |> iso_get_data_summary() |> knitr::kable() ``` ## Problems @@ -66,8 +66,8 @@ cf_files %>% iso_get_data_summary() %>% knitr::kable() In case there was any trouble with reading any of the files, the following functions provide an overview summary as well as details of all errors and warnings, respectively. The examples here contain no errors but if you run into any unexpected file read problems, please file a bug report in the [isoreader issue tracker](https://github.com/isoverse/isoreader/issues). ```{r} -cf_files %>% iso_get_problems_summary() %>% knitr::kable() -cf_files %>% iso_get_problems() %>% knitr::kable() +cf_files |> iso_get_problems_summary() |> knitr::kable() +cf_files |> iso_get_problems() |> knitr::kable() ``` # File Information @@ -76,9 +76,9 @@ Detailed file information can be aggregated for all isofiles using the `iso_get_ ```{r} # all file information -cf_files %>% iso_get_file_info(select = c(-file_root)) %>% knitr::kable() +cf_files |> iso_get_file_info(select = c(-file_root)) |> knitr::kable() # select file information -cf_files %>% +cf_files |> iso_get_file_info( select = c( # rename sample id columns from the different file types to a new ID column @@ -90,7 +90,7 @@ cf_files %>% ), # explicitly allow for file specific rename (for the new ID column) file_specific = TRUE - ) %>% knitr::kable() + ) |> knitr::kable() ``` ## Select/Rename @@ -99,7 +99,7 @@ Rather than retrieving specific file info columns using the above example of `is ```{r} # select + rename specific file info columns -cf_files2 <- cf_files %>% +cf_files2 <- cf_files |> iso_select_file_info( ID = `Identifier 1`, ID = `Name`, Analysis, `Peak Center`, `H3 Factor`, `Date & Time` = file_datetime, @@ -109,7 +109,7 @@ cf_files2 <- cf_files %>% ) # fetch all file info -cf_files2 %>% iso_get_file_info() %>% knitr::kable() +cf_files2 |> iso_get_file_info() |> knitr::kable() ``` ## Filter @@ -118,14 +118,14 @@ Any collection of isofiles can also be filtered based on the available file info ```{r} # find files that have 'acetanilide' in the new ID field -cf_files2 %>% iso_filter_files(grepl("acetanilide", ID)) %>% - iso_get_file_info() %>% +cf_files2 |> iso_filter_files(grepl("acetanilide", ID)) |> + iso_get_file_info() |> knitr::kable() # find files that were run since 2015 -cf_files2 %>% - iso_filter_files(`Date & Time` > "2015-01-01") %>% - iso_get_file_info() %>% +cf_files2 |> + iso_filter_files(`Date & Time` > "2015-01-01") |> + iso_get_file_info() |> knitr::kable() ``` @@ -135,19 +135,19 @@ The file information in any collection of isofiles can also be mutated using the ```{r} cf_files3 <- - cf_files2 %>% + cf_files2 |> iso_mutate_file_info( # update existing column ID = paste("ID:", ID), # introduce new column `Run since 2015?` = `Date & Time` > "2015-01-01", # parse weight as a number and turn into a column with units - `Sample Weight` = `Sample Weight` %>% parse_number() %>% iso_with_units("mg") + `Sample Weight` = `Sample Weight` |> parse_number() |> iso_with_units("mg") ) -cf_files3 %>% - iso_get_file_info() %>% - iso_make_units_explicit() %>% +cf_files3 |> + iso_get_file_info() |> + iso_make_units_explicit() |> knitr::kable() ``` @@ -171,12 +171,12 @@ new_info <- "6617_IAEA600", "no", "did not inject properly" ) ) -new_info %>% knitr::kable() +new_info |> knitr::kable() # adding it to the isofiles -cf_files3 %>% - iso_add_file_info(new_info, by1 = "Run since 2015?", by2 = "file_id") %>% - iso_get_file_info(select = !!names(new_info)) %>% +cf_files3 |> + iso_add_file_info(new_info, by1 = "Run since 2015?", by2 = "file_id") |> + iso_get_file_info(select = !!names(new_info)) |> knitr::kable() ``` @@ -187,7 +187,7 @@ Most file information is initially read as text to avoid cumbersome specificatio ```{r} # use parsing and extraction in iso_mutate_file_info -cf_files2 %>% +cf_files2 |> iso_mutate_file_info( # change type of Peak Center to logical `Peak Center` = parse_logical(`Peak Center`), @@ -197,24 +197,24 @@ cf_files2 %>% file_id_2nd = extract_word(file_id, 2), # retrieve file extension from the file_id using regular expression name = extract_substring(ID, "(\\w+)-?(.*)?", capture_bracket = 1) - ) %>% - iso_get_file_info(select = c(matches("file_id"), ID, name, `Peak Center`)) %>% + ) |> + iso_get_file_info(select = c(matches("file_id"), ID, name, `Peak Center`)) |> knitr::kable() # use parsing in iso_filter_file_info -cf_files2 %>% - iso_filter_files(parse_number(`H3 Factor`) > 2) %>% - iso_get_file_info() %>% +cf_files2 |> + iso_filter_files(parse_number(`H3 Factor`) > 2) |> + iso_get_file_info() |> knitr::kable() # use iso_parse_file_info for simplified parsing of column data types -cf_files2 %>% +cf_files2 |> iso_parse_file_info( integer = Analysis, number = `H3 Factor`, logical = `Peak Center` - ) %>% - iso_get_file_info() %>% + ) |> + iso_get_file_info() |> knitr::kable() ``` @@ -223,7 +223,7 @@ cf_files2 %>% Additionally, some IRMS data files contain resistor information that are useful for downstream calculations (see e.g. section on signal conversion later in this vignette): ```{r} -cf_files %>% iso_get_resistors() %>% knitr::kable() +cf_files |> iso_get_resistors() |> knitr::kable() ``` # Reference values @@ -232,9 +232,9 @@ As well as isotopic reference values for the different gases: ```{r} # reference delta values without ratio values -cf_files %>% iso_get_standards(file_id:reference) %>% knitr::kable() +cf_files |> iso_get_standards(file_id:reference) |> knitr::kable() # reference values with ratios -cf_files %>% iso_get_standards() %>% knitr::kable() +cf_files |> iso_get_standards() |> knitr::kable() ``` @@ -244,17 +244,17 @@ The raw data read from the IRMS files can be retrieved similarly using the `iso_ ```{r} # get raw data with default selections (all raw data, no additional file info) -cf_files %>% iso_get_raw_data() %>% head(n=10) %>% knitr::kable() +cf_files |> iso_get_raw_data() |> head(n=10) |> knitr::kable() # get specific raw data and add some file information -cf_files %>% +cf_files |> iso_get_raw_data( # select just time and the m/z 2 and 3 ions select = c(time.s, v2.mV, v3.mV), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) |> # look at first few records only - head(n=10) %>% knitr::kable() + head(n=10) |> knitr::kable() ``` # Data Processing @@ -269,28 +269,28 @@ As with most data retrieval functions, the `iso_get_vendor_data_table()` functio ```{r} # entire vendor data table -cf_files %>% iso_get_vendor_data_table() %>% knitr::kable() +cf_files |> iso_get_vendor_data_table() |> knitr::kable() # get specific parts and add some file information -cf_files %>% +cf_files |> iso_get_vendor_data_table( # select peak number, ret. time, overall intensity and all H delta columns select = c(Nr., Rt, area = `rIntensity All`, matches("^d \\d+H")), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) |> knitr::kable() # the data table also provides units if included in the original data file # which can be made explicit using the function iso_make_units_explicit() -cf_files %>% +cf_files |> iso_get_vendor_data_table( # select peak number, ret. time, overall intensity and all H delta columns select = c(Nr., Rt, area = `rIntensity All`, matches("^d \\d+H")), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) |> # make column units explicit - iso_make_units_explicit() %>% + iso_make_units_explicit() |> knitr::kable() ``` @@ -299,7 +299,7 @@ cf_files %>% For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: ```{r} -all_data <- cf_files %>% iso_get_all_data() +all_data <- cf_files |> iso_get_all_data() # not printed out because this data frame is very big ``` @@ -309,11 +309,11 @@ Saving entire collections of isofiles for retrieval at a later point is easily d ```{r} # export to R data archive -cf_files %>% iso_save("cf_files_export.cf.rds") +cf_files |> iso_save("cf_files_export.cf.rds") # read back the exported R data archive cf_files <- iso_read_continuous_flow("cf_files_export.cf.rds") -cf_files %>% iso_get_data_summary() %>% knitr::kable() +cf_files |> iso_get_data_summary() |> knitr::kable() ``` @@ -323,7 +323,7 @@ At the moment, isoreader supports export of all data to Excel and the [Feather f ```{r, eval = FALSE} # export to excel -cf_files %>% iso_export_to_excel("cf_files_export") +cf_files |> iso_export_files_to_excel("cf_files_export") # data sheets available in the exported data file: readxl::excel_sheets("cf_files_export.cf.xlsx") @@ -331,7 +331,7 @@ readxl::excel_sheets("cf_files_export.cf.xlsx") ```{r, eval=FALSE} # export to feather -cf_files %>% iso_export_to_feather("cf_files_export") +cf_files |> iso_export_files_to_feather("cf_files_export") # exported feather files list.files(pattern = ".cf.feather") diff --git a/vignettes/development.Rmd b/vignettes/development.Rmd index 3c4dc2de..06515952 100644 --- a/vignettes/development.Rmd +++ b/vignettes/development.Rmd @@ -44,7 +44,7 @@ readers <- iso_register_dual_inlet_file_reader(".new.did", "new_reader") knitr::kable(readers) # copy an example file from the package with the new extension -iso_get_reader_example("dual_inlet_example.did") %>% file.copy(to = "example.new.did") +iso_get_reader_example("dual_inlet_example.did") |> file.copy(to = "example.new.did") # read the file iso_read_dual_inlet("example.new.did", read_cache = FALSE) @@ -70,7 +70,7 @@ isoreader:::set_finish_file_event_expr({ c( iso_get_reader_example("dual_inlet_example.did"), iso_get_reader_example("dual_inlet_example.caf") -) %>% iso_read_dual_inlet(read_cache = FALSE) +) |> iso_read_dual_inlet(read_cache = FALSE) isoreader:::initialize_options() # reset all isoreader options ``` @@ -80,42 +80,40 @@ isoreader:::initialize_options() # reset all isoreader options The best way to start debugging an isoreader call is to switch the package into debug mode. This is done using the internal `iso_turn_debug_on()` function. This enables debug messages, turns caching off by default so files are always read anew, and makes the package keep more information in the isofile objects. It continues to catch errors inside file readers (keeping track of them in the [problems](operations.html#dealing-with-file-read-problems)) unless you set `iso_turn_debug_on(catch_errors = FALSE)`, in which case no errors are caught and stop the processing so you get the full traceback and debugging options of your IDE. -## Debugging binary file reads (Isodat) +# Debugging binary file reads (Isodat) -Errors during the binary file reads usually indicate the approximate position in the file where the error was encountered. The easiest way to get started on figuring out what the file looks like at that position is to use a binary file editor and jump to the position. For a sense of the interpreted structure around that position, one can use the internal function `map_binary_structure` which tries to apply all frequently occurring binary patterns recognized by isoreader. The binary representation of the source file is only available if in debug mode but if debug mode is ON, it can be accessed as follows: +Errors during the binary file reads usually indicate the approximate position in the file where the error was encountered. The easiest way to get started on figuring out what the file looks like at that position is to use a binary file editor and jump to the position. For a sense of the interpreted structure around that position, one can use `iso_print_source_file_structure()` which shows what binary patterns isoreader recognized. This binary representation of the source file is only available if the file is read while in debug mode, otherwise file objects would get unnecessarily large: ```{r} # turn on debug mode isoreader:::iso_turn_debug_on() # read example file -ex <- iso_get_reader_example("dual_inlet_example.did") %>% +ex <- iso_get_reader_example("dual_inlet_example.did") |> iso_read_dual_inlet(quiet = TRUE) -# access binary -bin <- ex$binary -# use structure mapping -bin %>% - isoreader:::move_to_pos(1340) %>% - isoreader:::map_binary_structure(length = 200) +# retrieve source structure and print a part of it +bin <- ex |> iso_get_source_file_structure() +bin |> iso_print_source_file_structure(length = 500) ``` -This structure representation shows recognized control elements in `<...>` and data elements in `{...}` which are converted to text or numeric representation if the interpretation is unambiguous, or plain hexadecimal characters if the nature of the data cannot be determined with certainty. Because this function tries all possible control elements and data interpretations, it is quite slow and may take a while if run for large stretches of binary code (i.e. if the `length` parameter is very long). +This structure representation shows recognized control elements in `<...>` and data elements in `{...}` which are converted to text or numeric representation if the interpretation is unambiguous, or plain hexadecimal characters if the nature of the data cannot be determined with certainty. You can adjust `start` and `length` to look at different parts of the binary file or save the the structure to a text file with `save_to_file`. -For an overview of all the control elements that are currently consider, use the internal `get_ctrl_blocks_config_df()` function. +For an overview of all the elements (`blocks`) identified in the binary file as a tibble, use: ```{r} -isoreader:::get_ctrl_blocks_config_df() +bin$blocks |> head(20) ``` -Additional information can be gleaned from the so-called control blocks, which are larger structural elements of Isodat binary files and are kept in a data frame within the binary object (again only available in debug mode). +While this provides all elements, the top level structure is provided by the so-called control blocks: ```{r} -bin$C_blocks +bin$blocks |> dplyr::filter(type == "C block") |> head(20) ``` -Same as for specific byte positions, one can use the control blocks to navigate the file and `map_binary_structure`. +To look at specific control-blocks, simply provide the relevant start position to `iso_print_source_file_structure()`: ```{r} -bin %>% - isoreader:::move_to_C_block("CMethod") %>% - isoreader:::map_binary_structure(length = 200) +cdata <- bin$blocks |> dplyr::filter(block == "CData") +cdata + +bin |> iso_print_source_file_structure(start = cdata$start, length = 500) ``` diff --git a/vignettes/dual_inlet.Rmd b/vignettes/dual_inlet.Rmd index b9452300..dec7cfec 100644 --- a/vignettes/dual_inlet.Rmd +++ b/vignettes/dual_inlet.Rmd @@ -43,7 +43,7 @@ Reading dual inlet files is as simple as passing one or multiple file or folder ```{r, message=FALSE} # all available examples -iso_get_reader_examples() %>% knitr::kable() +iso_get_reader_examples() |> knitr::kable() ``` ```{r} @@ -62,7 +62,7 @@ di_files <- The `di_files` variable now contains a set of isoreader objects, one for each file. Take a look at what information was retrieved from the files using the `iso_get_data_summary()` function. ```{r} -di_files %>% iso_get_data_summary() %>% knitr::kable() +di_files |> iso_get_data_summary() |> knitr::kable() ``` ## Problems @@ -70,8 +70,8 @@ di_files %>% iso_get_data_summary() %>% knitr::kable() In case there was any trouble with reading any of the files, the following functions provide an overview summary as well as details of all errors and warnings, respectively. The examples here contain no errors but if you run into any unexpected file read problems, please file a bug report in the [isoreader issue tracker](https://github.com/isoverse/isoreader/issues). ```{r} -di_files %>% iso_get_problems_summary() %>% knitr::kable() -di_files %>% iso_get_problems() %>% knitr::kable() +di_files |> iso_get_problems_summary() |> knitr::kable() +di_files |> iso_get_problems() |> knitr::kable() ``` # File Information @@ -80,9 +80,9 @@ Detailed file information can be aggregated for all isofiles using the `iso_get_ ```{r} # all file information -di_files %>% iso_get_file_info(select = c(-file_root)) %>% knitr::kable() +di_files |> iso_get_file_info(select = c(-file_root)) |> knitr::kable() # select file information -di_files %>% +di_files |> iso_get_file_info( select = c( # rename sample id columns from the different file types to a new ID column @@ -96,7 +96,7 @@ di_files %>% ), # explicitly allow for file specific rename (for the new ID column) file_specific = TRUE - ) %>% knitr::kable() + ) |> knitr::kable() ``` ## Select/Rename @@ -105,7 +105,7 @@ Rather than retrieving specific file info columns using the above example of `is ```{r} # select + rename specific file info columns -di_files2 <- di_files %>% +di_files2 <- di_files |> iso_select_file_info( ID = `Identifier 1`, ID = `Sample Name`, Analysis, Method, `Peak Center`, `Date & Time` = file_datetime, @@ -114,7 +114,7 @@ di_files2 <- di_files %>% ) # fetch all file info -di_files2 %>% iso_get_file_info() %>% knitr::kable() +di_files2 |> iso_get_file_info() |> knitr::kable() ``` ## Filter @@ -123,14 +123,14 @@ Any collection of isofiles can also be filtered based on the available file info ```{r} # find files that have 'CIT' in the new ID field -di_files2 %>% iso_filter_files(grepl("CIT", ID)) %>% - iso_get_file_info() %>% +di_files2 |> iso_filter_files(grepl("CIT", ID)) |> + iso_get_file_info() |> knitr::kable() # find files that were run in 2017 -di_files2 %>% - iso_filter_files(`Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01") %>% - iso_get_file_info() %>% +di_files2 |> + iso_filter_files(`Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01") |> + iso_get_file_info() |> knitr::kable() ``` @@ -139,19 +139,19 @@ di_files2 %>% The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on/overwrite any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. It can also be used in conjunction with `iso_with_unit` to generate values with implicit units. ```{r} -di_files3 <- di_files2 %>% +di_files3 <- di_files2 |> iso_mutate_file_info( # update existing column ID = paste("ID:", ID), # introduce new column `Run in 2017?` = `Date & Time` > "2017-01-01" & `Date & Time` < "2018-01-01", # parse weight as a number and turn into a column with units - `Sample Weight` = `Sample Weight` %>% parse_number() %>% iso_with_units("mg") + `Sample Weight` = `Sample Weight` |> parse_number() |> iso_with_units("mg") ) -di_files3 %>% - iso_get_file_info() %>% - iso_make_units_explicit() %>% +di_files3 |> + iso_get_file_info() |> + iso_make_units_explicit() |> knitr::kable() ``` @@ -175,12 +175,12 @@ new_info <- "16068", "no", "did not inject properly" ) ) -new_info %>% knitr::kable() +new_info |> knitr::kable() # adding it to the isofiles -di_files3 %>% - iso_add_file_info(new_info, by1 = "Run in 2017?", by2 = "Analysis") %>% - iso_get_file_info(select = !!names(new_info)) %>% +di_files3 |> + iso_add_file_info(new_info, by1 = "Run in 2017?", by2 = "Analysis") |> + iso_get_file_info(select = !!names(new_info)) |> knitr::kable() ``` @@ -191,7 +191,7 @@ Most file information is initially read as text to avoid cumbersome specificatio ```{r} # use parsing and extraction in iso_mutate_file_info -di_files2 %>% +di_files2 |> iso_mutate_file_info( # change type of Peak Center to logical `Peak Center` = parse_logical(`Peak Center`), @@ -201,24 +201,24 @@ di_files2 %>% Method_2nd = extract_word(Method, 2), # retrieve file extension from the file_id using regular expression extension = extract_substring(file_id, "\\.(\\w+)$", capture_bracket = 1) - ) %>% - iso_get_file_info(select = c(extension, `Peak Center`, matches("Method"))) %>% + ) |> + iso_get_file_info(select = c(extension, `Peak Center`, matches("Method"))) |> knitr::kable() # use parsing in iso_filter_file_info -di_files2 %>% - iso_filter_files(parse_integer(Analysis) > 1500) %>% - iso_get_file_info() %>% +di_files2 |> + iso_filter_files(parse_integer(Analysis) > 1500) |> + iso_get_file_info() |> knitr::kable() # use iso_parse_file_info for simplified parsing of column data types -di_files2 %>% +di_files2 |> iso_parse_file_info( integer = Analysis, number = `Sample Weight`, logical = `Peak Center` - ) %>% - iso_get_file_info() %>% + ) |> + iso_get_file_info() |> knitr::kable() ``` @@ -227,7 +227,7 @@ di_files2 %>% Additionally, some IRMS data files contain resistor information that are useful for downstream calculations (see e.g. section on signal conversion later in this vignette): ```{r} -di_files %>% iso_get_resistors() %>% knitr::kable() +di_files |> iso_get_resistors() |> knitr::kable() ``` # Reference values @@ -236,9 +236,9 @@ As well as isotopic reference values for the different gases: ```{r} # reference delta values without ratio values -di_files %>% iso_get_standards(file_id:reference) %>% knitr::kable() +di_files |> iso_get_standards(file_id:reference) |> knitr::kable() # reference values with ratios -di_files %>% iso_get_standards() %>% knitr::kable() +di_files |> iso_get_standards() |> knitr::kable() ``` # Raw Data @@ -247,17 +247,17 @@ The raw data read from the IRMS files can be retrieved similarly using the `iso_ ```{r} # get raw data with default selections (all raw data, no additional file info) -di_files %>% iso_get_raw_data() %>% head(n=10) %>% knitr::kable() +di_files |> iso_get_raw_data() |> head(n=10) |> knitr::kable() # get specific raw data and add some file information -di_files %>% +di_files |> iso_get_raw_data( # select just time and the two ions select = c(type, cycle, v44.mV, v45.mV), # include the Analysis number fron the file info and rename it to 'run' include_file_info = c(run = Analysis) - ) %>% + ) |> # look at first few records only - head(n=10) %>% knitr::kable() + head(n=10) |> knitr::kable() ``` @@ -273,15 +273,15 @@ As with most data retrieval functions, the `iso_get_vendor_data_table()` functio ```{r} # entire vendor data table -di_files %>% iso_get_vendor_data_table() %>% knitr::kable() +di_files |> iso_get_vendor_data_table() |> knitr::kable() # get specific parts and add some file information -di_files %>% +di_files |> iso_get_vendor_data_table( # select cycle and all carbon columns select = c(cycle, matches("C")), # include the Identifier 1 fron the file info and rename it to 'id' include_file_info = c(id = `Identifier 1`) - ) %>% knitr::kable() + ) |> knitr::kable() ``` # For expert users: retrieving all data @@ -289,7 +289,7 @@ di_files %>% For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: ```{r} -all_data <- di_files %>% iso_get_all_data() +all_data <- di_files |> iso_get_all_data() # not printed out because this data frame is very big ``` @@ -299,7 +299,7 @@ Saving entire collections of isofiles for retrieval at a later point is easily d ```{r} # export to R data archive -di_files %>% iso_save("di_files_export.di.rds") +di_files |> iso_save("di_files_export.di.rds") # read back the exported R data storage iso_read_dual_inlet("di_files_export.di.rds") @@ -312,7 +312,7 @@ At the moment, isoreader supports export of all data to Excel and the [Feather f ```{r, eval = FALSE} # export to excel -di_files %>% iso_export_to_excel("di_files_export") +di_files |> iso_export_files_to_excel("di_files_export") # data sheets available in the exported data file: readxl::excel_sheets("di_files_export.di.xlsx") @@ -320,7 +320,7 @@ readxl::excel_sheets("di_files_export.di.xlsx") ```{r, eval=FALSE} # export to feather -di_files %>% iso_export_to_feather("di_files_export") +di_files |> iso_export_files_to_feather("di_files_export") # exported feather files list.files(pattern = ".di.feather") diff --git a/vignettes/operations.Rmd b/vignettes/operations.Rmd index f1c5c2f2..e6580e35 100644 --- a/vignettes/operations.Rmd +++ b/vignettes/operations.Rmd @@ -39,8 +39,8 @@ library(isoreader) ```{r} # list all suported file types -iso_get_supported_file_types() %>% - dplyr::select(extension, software, description, type) %>% +iso_get_supported_file_types() |> + dplyr::select(extension, software, description, type) |> knitr::kable() ``` @@ -50,25 +50,25 @@ By default, isoreader is quite verbose to let the user know what is happening. H ```{r} # read a file in the default verbose mode -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet() %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") |> + iso_read_dual_inlet() |> + iso_select_file_info(file_datetime, `Identifier 1`) |> + iso_get_file_info() |> knitr::kable() # read the same file but make the read process quiet -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet(quiet = TRUE) %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") |> + iso_read_dual_inlet(quiet = TRUE) |> + iso_select_file_info(file_datetime, `Identifier 1`) |> + iso_get_file_info() |> knitr::kable() # read the same file but turn all isoreader messages off iso_turn_info_messages_off() -iso_get_reader_example("dual_inlet_example.did") %>% - iso_read_dual_inlet(quiet = TRUE) %>% - iso_select_file_info(file_datetime, `Identifier 1`) %>% - iso_get_file_info() %>% +iso_get_reader_example("dual_inlet_example.did") |> + iso_read_dual_inlet(quiet = TRUE) |> + iso_select_file_info(file_datetime, `Identifier 1`) |> + iso_get_file_info() |> knitr::kable() # turn message back on @@ -84,18 +84,18 @@ By default, isoreader caches files as R objects to make access faster in the fut iso_cleanup_reader_cache() # read a new file (notice the time elapsed) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") |> iso_read_continuous_flow() # re-read the same file much faster (it will be read from cache) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") |> iso_read_continuous_flow() # turn reader caching off iso_turn_reader_caching_off() # re-read the same file (it will NOT be read from cache) -cf_file <- iso_get_reader_example("continuous_flow_example.dxf") %>% +cf_file <- iso_get_reader_example("continuous_flow_example.dxf") |> iso_read_continuous_flow() # turn reader caching back on @@ -142,7 +142,7 @@ di_files[c(1,3)] di_files[c("dual_inlet_example.did", "dual_inlet_example.caf")] # same result using iso_filter_files (more flexible + verbose output) -di_files %>% iso_filter_files( +di_files |> iso_filter_files( file_id %in% c("dual_inlet_example.did", "dual_inlet_example.caf") ) @@ -166,13 +166,13 @@ iso_files <- ) # retrieve problem summary -iso_files %>% iso_get_problems_summary() %>% knitr::kable() +iso_files |> iso_get_problems_summary() |> knitr::kable() # retrieve problem details -iso_files %>% iso_get_problems() %>% knitr::kable() +iso_files |> iso_get_problems() |> knitr::kable() # filter out erroneous files -iso_files <- iso_files %>% iso_filter_files_with_problems() +iso_files <- iso_files |> iso_filter_files_with_problems() ``` # Re-reading files @@ -183,13 +183,13 @@ Similar functions can be used to re-read outdated files from an older isoreader ```{r} # re-read the 3 dual inlet files from their original location if any have changed -di_files %>% +di_files |> iso_reread_changed_files() # update the file_root for the files before re-read (in this case to a location # that does not hold these files and hence will lead to a warning) -di_files %>% - iso_set_file_root(root = ".") %>% +di_files |> + iso_set_file_root(root = ".") |> iso_reread_all_files() ``` @@ -199,28 +199,28 @@ Isoreader provides a built in data type with units (`iso_with_units`) that can b ```{r} # strip all units -cf_file %>% - iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% - iso_strip_units() %>% head(3) +cf_file |> + iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) |> + iso_strip_units() |> head(3) # make units explicit -cf_file %>% - iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) %>% - iso_make_units_explicit() %>% head(3) +cf_file |> + iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`)) |> + iso_make_units_explicit() |> head(3) # introduce new unit columns e.g. in the file info -cf_file %>% - iso_mutate_file_info(weight = iso_with_units(0.42, "mg")) %>% +cf_file |> + iso_mutate_file_info(weight = iso_with_units(0.42, "mg")) |> iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`), - include_file_info = weight) %>% - iso_make_units_explicit() %>% head(3) + include_file_info = weight) |> + iso_make_units_explicit() |> head(3) # or turn a column e.g. with custom format units in the header into implicit units -cf_file %>% - iso_mutate_file_info(weight.mg = 0.42) %>% +cf_file |> + iso_mutate_file_info(weight.mg = 0.42) |> iso_get_vendor_data_table(select = c(`Ampl 28`, `rIntensity 28`, `d 15N/14N`), - include_file_info = weight.mg) %>% - iso_make_units_implicit(prefix = ".", suffix = "") %>% head(3) + include_file_info = weight.mg) |> + iso_make_units_implicit(prefix = ".", suffix = "") |> head(3) ``` # Formatting @@ -238,10 +238,10 @@ iso_format( ) # example inside a data frame -cf_file %>% - iso_get_vendor_data_table(select = c(`Nr.`, `Ampl 28`, `d 15N/14N`)) %>% - dplyr::select(-file_id) %>% - head(3) %>% +cf_file |> + iso_get_vendor_data_table(select = c(`Nr.`, `Ampl 28`, `d 15N/14N`)) |> + dplyr::select(-file_id) |> + head(3) |> # introduce new label columns using iso_format dplyr::mutate( # default concatenation of values diff --git a/vignettes/quick_start.Rmd b/vignettes/quick_start.Rmd index e2efa54b..c273bfb2 100644 --- a/vignettes/quick_start.Rmd +++ b/vignettes/quick_start.Rmd @@ -51,7 +51,7 @@ For demonstration purposes, this vignette simply reads all supported dual inlet, ```{r} # all available examples -iso_get_reader_examples() %>% knitr::kable() +iso_get_reader_examples() |> knitr::kable() ``` # Dual Inlet Files @@ -68,7 +68,7 @@ iso_save(di_files, filepath = "di_save") ```{r, eval = FALSE} # export to excel -iso_export_to_excel(di_files, filepath = "di_export") +iso_export_files_to_excel(di_files, filepath = "di_export") ``` # Continuous Flow Files @@ -85,7 +85,7 @@ iso_save(cf_files, filepath = "cf_save") ```{r, eval = FALSE} # export to excel -iso_export_to_excel(cf_files, filepath = "cf_export") +iso_export_files_to_excel(cf_files, filepath = "cf_export") ``` # Scan Files @@ -102,6 +102,6 @@ iso_save(scan_files, filepath = "scan_save") ```{r, eval = FALSE} # export to excel -iso_export_to_excel(scan_files, filepath = "scan_export") +iso_export_files_to_excel(scan_files, filepath = "scan_export") ``` diff --git a/vignettes/scan.Rmd b/vignettes/scan.Rmd index ac398b16..5f5b1448 100644 --- a/vignettes/scan.Rmd +++ b/vignettes/scan.Rmd @@ -45,7 +45,7 @@ Reading scan files is as simple as passing one or multiple file or folder paths ```{r, message=FALSE} # all available examples -iso_get_reader_examples() %>% knitr::kable() +iso_get_reader_examples() |> knitr::kable() ``` ```{r} @@ -64,7 +64,7 @@ scan_files <- The `scan_files` variable now contains a set of isoreader objects, one for each file. Take a look at what information was retrieved from the files using the `iso_get_data_summary()` function. ```{r} -scan_files %>% iso_get_data_summary() %>% knitr::kable() +scan_files |> iso_get_data_summary() |> knitr::kable() ``` ## Problems @@ -72,8 +72,8 @@ scan_files %>% iso_get_data_summary() %>% knitr::kable() In case there was any trouble with reading any of the files, the following functions provide an overview summary as well as details of all errors and warnings, respectively. The examples here contain no errors but if you run into any unexpected file read problems, please file a bug report in the [isoreader issue tracker](https://github.com/isoverse/isoreader/issues). ```{r} -scan_files %>% iso_get_problems_summary() %>% knitr::kable() -scan_files %>% iso_get_problems() %>% knitr::kable() +scan_files |> iso_get_problems_summary() |> knitr::kable() +scan_files |> iso_get_problems() |> knitr::kable() ``` # File Information @@ -82,7 +82,7 @@ Detailed file information can be aggregated for all isofiles using the `iso_get_ ```{r} # all file information -scan_files %>% iso_get_file_info(select = c(-file_root)) %>% knitr::kable() +scan_files |> iso_get_file_info(select = c(-file_root)) |> knitr::kable() ``` ## Select/Rename @@ -91,12 +91,12 @@ File information can also be modified across an entire collection of isofiles us ```{r} # select + rename specific file info columns -scan_files2 <- scan_files %>% - iso_select_file_info(-file_root) %>% +scan_files2 <- scan_files |> + iso_select_file_info(-file_root) |> iso_rename_file_info(`Date & Time` = file_datetime) # fetch all file info -scan_files2 %>% iso_get_file_info() %>% knitr::kable() +scan_files2 |> iso_get_file_info() |> knitr::kable() ``` @@ -106,9 +106,9 @@ Any collection of isofiles can also be filtered based on the available file info ```{r} # find files that have 'CIT' in the new ID field -scan_files2 %>% - iso_filter_files(type == "High Voltage") %>% - iso_get_file_info() %>% +scan_files2 |> + iso_filter_files(type == "High Voltage") |> + iso_get_file_info() |> knitr::kable() ``` @@ -117,14 +117,14 @@ scan_files2 %>% The file information in any collection of isofiles can also be mutated using the function `iso_mutate_file_info`. This function can introduce new columns and operate on any existing columns available in the file information (even if it does not exist in all files) and supports full [dplyr](https://dplyr.tidyverse.org/reference/mutate.html) syntax. ```{r} -scan_files3 <- scan_files2 %>% +scan_files3 <- scan_files2 |> iso_mutate_file_info( # introduce new column `Run in 2019?` = `Date & Time` > "2019-01-01" & `Date & Time` < "2020-01-01" ) -scan_files3 %>% - iso_get_file_info() %>% +scan_files3 |> + iso_get_file_info() |> knitr::kable() ``` @@ -133,7 +133,7 @@ scan_files3 %>% Additionally, some IRMS data files contain resistor information that are useful for downstream calculations (see e.g. section on signal conversion later in this vignette): ```{r} -scan_files %>% iso_get_resistors() %>% knitr::kable() +scan_files |> iso_get_resistors() |> knitr::kable() ``` # Raw Data @@ -142,17 +142,17 @@ The raw data read from the scan files can be retrieved similarly using the `iso_ ```{r} # get raw data with default selections (all raw data, no additional file info) -scan_files %>% iso_get_raw_data() %>% head(n=10) %>% knitr::kable() +scan_files |> iso_get_raw_data() |> head(n=10) |> knitr::kable() # get specific raw data and add some file information -scan_files %>% +scan_files |> iso_get_raw_data( # select just time and the two ions select = c(x, x_units, v44.mV, v45.mV), # include the scan type and rename the column include_file_info = c(`Scan Type` = type) - ) %>% + ) |> # look at first few records only - head(n=10) %>% knitr::kable() + head(n=10) |> knitr::kable() ``` # For expert users: retrieving all data @@ -160,7 +160,7 @@ scan_files %>% For users familiar with the nested data frames from the [tidyverse](https://www.tidyverse.org/) (particularly [tidyr](https://tidyr.tidyverse.org/)'s `nest` and `unnest`), there is an easy way to retrieve all data from the iso file objects in a single nested data frame: ```{r} -all_data <- scan_files %>% iso_get_all_data() +all_data <- scan_files |> iso_get_all_data() # not printed out because this data frame is very big ``` @@ -171,7 +171,7 @@ Saving entire collections of isofiles for retrieval at a later point is easily d ```{r} # export to R data archive -scan_files %>% iso_save("scan_files_export.scan.rds") +scan_files |> iso_save("scan_files_export.scan.rds") # read back the exported R data storage iso_read_scan("scan_files_export.scan.rds") @@ -183,7 +183,7 @@ At the moment, isoreader supports export of all data to Excel and the [Feather f ```{r, eval=FALSE} # export to excel -scan_files %>% iso_export_to_excel("scan_files_export") +scan_files |> iso_export_files_to_excel("scan_files_export") # data sheets available in the exported data file: readxl::excel_sheets("scan_files_export.scan.xlsx") @@ -191,7 +191,7 @@ readxl::excel_sheets("scan_files_export.scan.xlsx") ```{r, eval=FALSE} # export to feather -scan_files %>% iso_export_to_feather("scan_files_export") +scan_files |> iso_export_files_to_feather("scan_files_export") # exported feather files list.files(pattern = ".scan.feather")