Merge pull request #99 from isoverse/dev

upgrade to version 1.1.1
isoverse · Feb 18, 2020 · 9ebf1ea · 9ebf1ea
2 parents 881590e + 8bb320d
commit 9ebf1ea
Show file tree

Hide file tree

Showing 123 changed files with 778 additions and 481 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,7 +4,7 @@
 ^appveyor\.yml$
 ^codecov\.yml$
 ^_pkgdown\.yml$
-^\.httr-oauth$
+^\.httr-oauth*
 ^Makefile$
 ^NEWS$
 ^docs$

diff --git a/.gitignore b/.gitignore
@@ -31,7 +31,7 @@ vignettes/*.R
 vignettes/**/*.png
 vignettes/**/*.pdf
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
-.httr-oauth
+.httr-oauth*
 # knitr and R markdown default cache directories
 /*_cache/
 /cache/

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: isoreader
 Title: Read IRMS data files
 Description: R interface to IRMS (isotope ratio mass spectrometry) file formats typically used in stable isotope geochemistry.
-Version: 1.1.0
+Version: 1.1.1
 Authors@R: person("Sebastian", "Kopf", email = "sebastian.kopf@colorado.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2044-0201"))
 URL: https://github.com/isoverse/isoreader
 BugReports: https://github.com/isoverse/isoreader/issues
 Depends:
-    R (>= 3.1)
+    R (>= 3.5)
 Imports:
     methods,
     R.utils,
@@ -16,7 +16,7 @@ Imports:
     tidyselect (>= 1.0.0),
     vctrs,
     tibble,
-    dplyr (>= 0.7.4),
+    dplyr (>= 0.8.4),
     tidyr (>= 1.0.0),
     stringr,
     purrr,
@@ -29,7 +29,7 @@ Imports:
     feather,
     UNF,
     openxlsx,
-    lazyeval
+    stats
 Suggests:
     testthat,
     readxl,

diff --git a/NAMESPACE b/NAMESPACE
@@ -190,7 +190,6 @@ importFrom(future,plan)
 importFrom(future,resolved)
 importFrom(future,value)
 importFrom(glue,glue)
-importFrom(lazyeval,as.lazy)
 importFrom(lubridate,as_datetime)
 importFrom(lubridate,duration)
 importFrom(lubridate,interval)

diff --git a/R/aggregate_data.R b/R/aggregate_data.R
@@ -55,9 +55,6 @@ iso_get_data_summary <- function(iso_files, quiet = default(quiet)) {
 # summary of raw data info
 get_raw_data_info <- function(iso_files) {
 
-  # global vars
-  all_ions <- n_ions <- label <- read_raw_data <- NULL
-
   # make sure to convert to file list
   iso_files <- iso_as_file_list(iso_files)
 
@@ -72,8 +69,8 @@ get_raw_data_info <- function(iso_files) {
       file_id = names(iso_files),
       read_raw_data = map_lgl(iso_files, ~.x$read_options$raw_data),
       all_ions = map(iso_files, ~names(.x$raw_data) %>% str_subset("^[iIvV](\\d+)\\.")),
-      n_ions = map_int(all_ions, length),
-      ions = map2_chr(all_ions, n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}) %>% 
+      n_ions = map_int(.data$all_ions, length),
+      ions = map2_chr(.data$all_ions, .data$n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}) %>% 
         str_replace_all("[^0-9,]", "")
     )
 
@@ -103,15 +100,12 @@ get_raw_data_info <- function(iso_files) {
     glue("cannot process '{class(iso_files[[1]])[1]}' in get_raw_data_info") %>% stop(call. = FALSE)
   }
 
-  return(dplyr::select(raw_data_sum, file_id, raw_data = label))
+  return(dplyr::select(raw_data_sum, .data$file_id, raw_data = .data$label))
 }
 
 # summary of file info
 get_file_info_info <- function(iso_files) {
 
-  # global vars
-  read_file_info <- file_info <- NULL
-
   # make sure to convert to file list
   iso_files <- iso_as_file_list(iso_files)
 
@@ -124,16 +118,13 @@ get_file_info_info <- function(iso_files) {
       file_id = names(iso_files),
       read_file_info = map_lgl(iso_files, ~.x$read_options$file_info),
       file_info = ifelse(!read_file_info, "file info not read", paste(map_int(iso_files, ~length(.x$file_info)), "entries"))
-    ) %>% select(file_id, file_info)
+    ) %>% select(.data$file_id, .data$file_info)
   }
 }
 
 # summary of method info
 get_method_info_info <- function(iso_files) {
 
-  # global vars
-  method_info <- NULL
-
   # make sure to convert to file list
   iso_files <- iso_as_file_list(iso_files) 
 
@@ -154,7 +145,7 @@ get_method_info_info <- function(iso_files) {
         has_resistors ~ "resistors",
         TRUE ~ "no method info"
       ) 
-    ) %>% select(file_id, method_info)
+    ) %>% select(.data$file_id, .data$method_info)
   }
 
 }
@@ -163,10 +154,7 @@ get_method_info_info <- function(iso_files) {
 get_vendor_data_table_info <- function(iso_files) {
   # make sure to convert to file list
   iso_files <- iso_as_file_list(iso_files) 
-
-  # global vars
-  vendor_data_table <- NULL
-
+
   # make sure to not process empty list
   if (length(iso_files) == 0) {
     tibble(file_id = character(), vendor_data_table = character())
@@ -179,10 +167,10 @@ get_vendor_data_table_info <- function(iso_files) {
       cols = map_int(iso_files, ~ncol(.x$vendor_data_table)),
       vendor_data_table = case_when(
         !read_vendor_data_table ~ "vendor data table not read",
-        rows > 0 & cols > 0 ~ sprintf("%d rows, %d columns", rows, cols),
+        .data$rows > 0 & .data$cols > 0 ~ sprintf("%d rows, %d columns", .data$rows, .data$cols),
         TRUE ~ "no vendor data table"
       ) 
-    ) %>% select(file_id, vendor_data_table)
+    ) %>% select(.data$file_id, .data$vendor_data_table)
   }
 }
 
@@ -191,6 +179,7 @@ get_vendor_data_table_info <- function(iso_files) {
 #' DEPRECATED
 #' 
 #' Please use \link{iso_get_all_data} instead.
+#' @param ... forwarded to \link{iso_get_all_data}
 #' 
 #' @export
 iso_get_data <- function(...) {
@@ -207,7 +196,7 @@ iso_get_data <- function(...) {
 #' @inheritParams iso_get_vendor_data_table
 #' @param include_raw_data which columns from the raw data to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no raw data.
 #' @param include_standards which columns from the standards info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. By default, everything is included (both standards and ratios). To omit the ratios, change to \code{select = file_id:reference}. Set to NULL to include no standards info.
-#' #' @param include_resistors which columns from the resistors info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no resistors info.
+#' @param include_resistors which columns from the resistors info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no resistors info.
 #' @param include_vendor_data_table which columns from the vendor data table to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set parameter \code{with_explicit_units = TRUE} to make column units explicit (keep in mind that this will require specific \code{include_vendor_data_table} column selections to reflect the column names including the units). Set to NULL to include no vendor data table.
 #' @param include_problems which columns from problems to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes none of the read problems by default. Set to \code{include_problems = everything()} to include all columns.
 #' @return data_frame with file_ids, file_types and nested data frames for each data type (file_info, raw_data, vendor_data_table, etc.)
@@ -312,48 +301,46 @@ iso_get_all_data <- function(
 
 #' Aggregate file info
 #'
-#' Combine file information from multiple iso_files. By default all information is included but specific columns can be targeted using the \code{select} parameter, which uses the \code{\link{iso_select_file_info}} function to select and/or rename columns. File information beyond \code{file_id}, \code{file_root}, \code{file_path} and \code{file_datetime} is only available if the \code{iso_files} were read with parameter \code{read_file_info=TRUE}.
+#' Combine file information from multiple iso_files. By default all information is included but specific columns can be targeted using the \code{select} parameter to select and/or rename columns. File information beyond \code{file_id}, \code{file_root}, \code{file_path} and \code{file_datetime} is only available if the \code{iso_files} were read with parameter \code{read_file_info=TRUE}.
 #'
 #' @inheritParams iso_get_raw_data
+#' @inheritParams iso_select_file_info
 #' @param select which columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax including renaming columns. File id is always included and cannot be renamed. 
+#' @param simplify if set to TRUE (the default), nested value columns in the file info will be unnested as long as they are compatible across file types. Note that file info entries with multiple values still remain nested multi-value (=list) columns even with \code{simplify=TRUE}. These can be unnested using \link[tidyr]{unnest}.
 #' @family data retrieval functions
-#' @note File info entries with multiple values remain nested multi-value (=list) columns and can be unnested using \link[tidyr]{unnest}.
+#' @note this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not incouraged for use in the frequently called \code{iso_get_file_info}.
 #' @export
-iso_get_file_info <- function(iso_files, select = everything(), quiet = default(quiet)) {
+iso_get_file_info <- function(iso_files, select = everything(), file_specific = FALSE, simplify = TRUE, quiet = default(quiet)) {
 
   # make sure it's an iso file list
   iso_files <- iso_as_file_list(iso_files)
   select_exp <- rlang::enexpr(select)
 
-  if (!quiet) { 
+  if (!quiet) {
     glue::glue(
       "Info: aggregating file info from {length(iso_files)} data file(s)",
-      "{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") %>% message()
+      "{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") %>%
+      message()
   }
   check_read_options(iso_files, "file_info")
 
   # retrieve info
   file_info <- iso_files %>% 
+    # select files
+    iso_select_file_info(!!select_exp, file_specific = file_specific, quiet = TRUE) %>% 
     # retrieve file info
     map(~.x$file_info) %>% 
     # combine in data frame (use safe bind to make sure different data column 
     # types of the same name don't trip up the combination)
     safe_bind_rows() 
 
+  # check if empty
   if(nrow(file_info) == 0) return(tibble(file_id = character(0)))
 
-  # selecting columns
-  select_cols <- get_column_names(file_info, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select
-  if (!"file_id" %in% select_cols) 
-    select_cols <- c("file_id", select_cols) # file id always included
-
-  # final processing
-  file_info <- 
-    file_info %>% 
-    # focus on selected columns only (also takes care of the rename)
-    dplyr::select(!!!select_cols) %>%
+  # simplify by disaggregated columns
+  if (simplify)
     # unnest aggregated columns
-    unnest_aggregated_data_frame()
+    file_info <- unnest_aggregated_data_frame(file_info)
 
   return(file_info)
 }
@@ -489,7 +476,7 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
     unnest(bgrd_data)
 
   # check for rows
-  if (nrow(data) == 0) return(dplyr::select(data, file_id))
+  if (nrow(data) == 0) return(dplyr::select(data, .data$file_id))
 
   # selecting columns
   select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select
@@ -507,12 +494,12 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
       # gather all masses and ratios
       gather(column, value, matches(masses_ratios_re)) %>% 
       # extract unit information
-      extract(column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>% 
-      dplyr::select(-extra_parens) %>% 
+      extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>% 
+      dplyr::select(-.data$extra_parens) %>% 
       # remove unknown data
-      filter(!is.na(value)) %>% 
+      filter(!is.na(.data$value)) %>% 
       # assign category
-      mutate(category = ifelse(category == "r", "ratio", "mass"))
+      mutate(category = ifelse(.data$category == "r", "ratio", "mass"))
   } 
 
   # if file info
@@ -526,6 +513,7 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
 #' DEPRECATED
 #' 
 #' Please use \link{iso_get_standards} instead.
+#' @param ... forwarded to \link{iso_get_standards}
 #' 
 #' @export
 iso_get_standards_info <- function(...) {
@@ -615,6 +603,8 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf
 #' 
 #' Please use \link{iso_get_resistors} instead.
 #' 
+#' @param ... forwarded to \link{iso_get_resistors}
+#' 
 #' @export
 iso_get_resistors_info <- function(...) {
   warning("'iso_get_resistors_info()' is deprecated in favor of the simpler 'iso_get_resistors()'. Please use 'iso_get_resistors()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE)

diff --git a/R/export.R b/R/export.R
@@ -78,15 +78,15 @@ iso_export_to_excel <- function(
   if ("file_info" %in% names(all_data)) {
     # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
     file_info <- 
-      all_data %>% select(file_id, file_info) %>% 
-      unnest(file_info) %>% 
+      all_data %>% select(.data$file_id, .data$file_info) %>% 
+      unnest(.data$file_info) %>% 
       collapse_list_columns()
     add_excel_sheet(wb, "file info", file_info)
   }
 
   # raw data
   if ("raw_data" %in% names(all_data)) {
-    raw_data <- all_data %>% select(file_id, raw_data) %>% unnest(raw_data)
+    raw_data <- all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data)
     add_excel_sheet(wb, "raw data", raw_data)
   }
 
@@ -98,20 +98,20 @@ iso_export_to_excel <- function(
 
   # resistors
   if ("resistors" %in% names(all_data)) {
-    resistors <- all_data %>% select(file_id, resistors) %>% unnest(resistors)
+    resistors <- all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors)
     add_excel_sheet(wb, "resistors", resistors)
   } 
 
   # vendor data table
   if ("vendor_data_table" %in% names(all_data)) {
-    vendor_data <- all_data %>% select(file_id, vendor_data_table) %>% 
-      unnest(vendor_data_table) %>% iso_strip_units()
+    vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>% 
+      unnest(.data$vendor_data_table) %>% iso_strip_units()
     add_excel_sheet(wb, "vendor data table", vendor_data)
   }
 
   # problems
   if ("problems" %in% names(all_data)) {
-    problems <- all_data %>% select(file_id, problems) %>% unnest(problems)
+    problems <- all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems)
     add_excel_sheet(wb, "problems", problems)
   }
   saveWorkbook(wb, filepath, overwrite = TRUE)
@@ -199,6 +199,7 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width =
 #' 
 #' @inheritParams iso_save
 #' @inheritParams iso_export_to_excel
+#' @param filepath_prefix what to use as the prefix for the feather file names (e.g. name of the data collection or current date)
 #' @family export functions
 #' @return returns the iso_files object invisibly for use in pipelines
 #' @export
@@ -244,40 +245,40 @@ iso_export_to_feather <- function(
   # file info
   if ("file_info" %in% names(all_data)) {
     # note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
-    all_data %>% select(file_id, file_info) %>% 
-      unnest(file_info) %>% 
+    all_data %>% select(.data$file_id, .data$file_info) %>% 
+      unnest(.data$file_info) %>% 
       collapse_list_columns() %>% 
       write_feather(filepaths[['file_info']])
   }
 
   # raw data
   if ("raw_data" %in% names(all_data)) {
-    all_data %>% select(file_id, raw_data) %>% unnest(raw_data) %>% 
+    all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>% 
       write_feather(filepaths[['raw_data']])
   }
 
   # standards
   if ("standards" %in% names(all_data)) {
-   all_data %>% select(file_id, standards) %>% unnest(standards) %>% 
+   all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>% 
       write_feather(filepaths[['method_info_standards']])
   } 
 
   # resistors
   if ("resistors" %in% names(all_data)) {
-    all_data %>% select(file_id, resistors) %>% unnest(resistors) %>% 
+    all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>% 
       write_feather(filepaths[['method_info_resistors']])
   } 
 
   # vendor data table
   if ("vendor_data_table" %in% names(all_data)) {
-    all_data %>% select(file_id, vendor_data_table) %>% 
-      unnest(vendor_data_table) %>% iso_strip_units() %>% 
+    all_data %>% select(.data$file_id, .data$vendor_data_table) %>% 
+      unnest(.data$vendor_data_table) %>% iso_strip_units() %>% 
       write_feather(filepaths[['vendor_data_table']])
   }
 
   # problems
   if ("problems" %in% names(all_data)) {
-   all_data %>% select(file_id, problems) %>% unnest(problems) %>% 
+   all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>% 
       write_feather(filepaths[['problems']])
   }