Skip to content

Commit

Permalink
Merge pull request #99 from isoverse/dev
Browse files Browse the repository at this point in the history
upgrade to version 1.1.1
  • Loading branch information
sebkopf committed Feb 18, 2020
2 parents 881590e + 8bb320d commit 9ebf1ea
Show file tree
Hide file tree
Showing 123 changed files with 778 additions and 481 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Expand Up @@ -4,7 +4,7 @@
^appveyor\.yml$
^codecov\.yml$
^_pkgdown\.yml$
^\.httr-oauth$
^\.httr-oauth*
^Makefile$
^NEWS$
^docs$
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -31,7 +31,7 @@ vignettes/*.R
vignettes/**/*.png
vignettes/**/*.pdf
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
.httr-oauth*
# knitr and R markdown default cache directories
/*_cache/
/cache/
Expand Down
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,12 +1,12 @@
Package: isoreader
Title: Read IRMS data files
Description: R interface to IRMS (isotope ratio mass spectrometry) file formats typically used in stable isotope geochemistry.
Version: 1.1.0
Version: 1.1.1
Authors@R: person("Sebastian", "Kopf", email = "sebastian.kopf@colorado.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2044-0201"))
URL: https://github.com/isoverse/isoreader
BugReports: https://github.com/isoverse/isoreader/issues
Depends:
R (>= 3.1)
R (>= 3.5)
Imports:
methods,
R.utils,
Expand All @@ -16,7 +16,7 @@ Imports:
tidyselect (>= 1.0.0),
vctrs,
tibble,
dplyr (>= 0.7.4),
dplyr (>= 0.8.4),
tidyr (>= 1.0.0),
stringr,
purrr,
Expand All @@ -29,7 +29,7 @@ Imports:
feather,
UNF,
openxlsx,
lazyeval
stats
Suggests:
testthat,
readxl,
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Expand Up @@ -190,7 +190,6 @@ importFrom(future,plan)
importFrom(future,resolved)
importFrom(future,value)
importFrom(glue,glue)
importFrom(lazyeval,as.lazy)
importFrom(lubridate,as_datetime)
importFrom(lubridate,duration)
importFrom(lubridate,interval)
Expand Down
74 changes: 32 additions & 42 deletions R/aggregate_data.R
Expand Up @@ -55,9 +55,6 @@ iso_get_data_summary <- function(iso_files, quiet = default(quiet)) {
# summary of raw data info
get_raw_data_info <- function(iso_files) {

# global vars
all_ions <- n_ions <- label <- read_raw_data <- NULL

# make sure to convert to file list
iso_files <- iso_as_file_list(iso_files)

Expand All @@ -72,8 +69,8 @@ get_raw_data_info <- function(iso_files) {
file_id = names(iso_files),
read_raw_data = map_lgl(iso_files, ~.x$read_options$raw_data),
all_ions = map(iso_files, ~names(.x$raw_data) %>% str_subset("^[iIvV](\\d+)\\.")),
n_ions = map_int(all_ions, length),
ions = map2_chr(all_ions, n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}) %>%
n_ions = map_int(.data$all_ions, length),
ions = map2_chr(.data$all_ions, .data$n_ions, ~if(.y > 0) { collapse(.x, sep = ", ") } else {""}) %>%
str_replace_all("[^0-9,]", "")
)

Expand Down Expand Up @@ -103,15 +100,12 @@ get_raw_data_info <- function(iso_files) {
glue("cannot process '{class(iso_files[[1]])[1]}' in get_raw_data_info") %>% stop(call. = FALSE)
}

return(dplyr::select(raw_data_sum, file_id, raw_data = label))
return(dplyr::select(raw_data_sum, .data$file_id, raw_data = .data$label))
}

# summary of file info
get_file_info_info <- function(iso_files) {

# global vars
read_file_info <- file_info <- NULL

# make sure to convert to file list
iso_files <- iso_as_file_list(iso_files)

Expand All @@ -124,16 +118,13 @@ get_file_info_info <- function(iso_files) {
file_id = names(iso_files),
read_file_info = map_lgl(iso_files, ~.x$read_options$file_info),
file_info = ifelse(!read_file_info, "file info not read", paste(map_int(iso_files, ~length(.x$file_info)), "entries"))
) %>% select(file_id, file_info)
) %>% select(.data$file_id, .data$file_info)
}
}

# summary of method info
get_method_info_info <- function(iso_files) {

# global vars
method_info <- NULL

# make sure to convert to file list
iso_files <- iso_as_file_list(iso_files)

Expand All @@ -154,7 +145,7 @@ get_method_info_info <- function(iso_files) {
has_resistors ~ "resistors",
TRUE ~ "no method info"
)
) %>% select(file_id, method_info)
) %>% select(.data$file_id, .data$method_info)
}

}
Expand All @@ -163,10 +154,7 @@ get_method_info_info <- function(iso_files) {
get_vendor_data_table_info <- function(iso_files) {
# make sure to convert to file list
iso_files <- iso_as_file_list(iso_files)

# global vars
vendor_data_table <- NULL


# make sure to not process empty list
if (length(iso_files) == 0) {
tibble(file_id = character(), vendor_data_table = character())
Expand All @@ -179,10 +167,10 @@ get_vendor_data_table_info <- function(iso_files) {
cols = map_int(iso_files, ~ncol(.x$vendor_data_table)),
vendor_data_table = case_when(
!read_vendor_data_table ~ "vendor data table not read",
rows > 0 & cols > 0 ~ sprintf("%d rows, %d columns", rows, cols),
.data$rows > 0 & .data$cols > 0 ~ sprintf("%d rows, %d columns", .data$rows, .data$cols),
TRUE ~ "no vendor data table"
)
) %>% select(file_id, vendor_data_table)
) %>% select(.data$file_id, .data$vendor_data_table)
}
}

Expand All @@ -191,6 +179,7 @@ get_vendor_data_table_info <- function(iso_files) {
#' DEPRECATED
#'
#' Please use \link{iso_get_all_data} instead.
#' @param ... forwarded to \link{iso_get_all_data}
#'
#' @export
iso_get_data <- function(...) {
Expand All @@ -207,7 +196,7 @@ iso_get_data <- function(...) {
#' @inheritParams iso_get_vendor_data_table
#' @param include_raw_data which columns from the raw data to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no raw data.
#' @param include_standards which columns from the standards info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. By default, everything is included (both standards and ratios). To omit the ratios, change to \code{select = file_id:reference}. Set to NULL to include no standards info.
#' #' @param include_resistors which columns from the resistors info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no resistors info.
#' @param include_resistors which columns from the resistors info to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set to NULL to include no resistors info.
#' @param include_vendor_data_table which columns from the vendor data table to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes all columns by default. Set parameter \code{with_explicit_units = TRUE} to make column units explicit (keep in mind that this will require specific \code{include_vendor_data_table} column selections to reflect the column names including the units). Set to NULL to include no vendor data table.
#' @param include_problems which columns from problems to include. Use \code{c(...)} to select multiple columns, supports all \link[dplyr]{select} syntax including renaming columns. Includes none of the read problems by default. Set to \code{include_problems = everything()} to include all columns.
#' @return data_frame with file_ids, file_types and nested data frames for each data type (file_info, raw_data, vendor_data_table, etc.)
Expand Down Expand Up @@ -312,48 +301,46 @@ iso_get_all_data <- function(

#' Aggregate file info
#'
#' Combine file information from multiple iso_files. By default all information is included but specific columns can be targeted using the \code{select} parameter, which uses the \code{\link{iso_select_file_info}} function to select and/or rename columns. File information beyond \code{file_id}, \code{file_root}, \code{file_path} and \code{file_datetime} is only available if the \code{iso_files} were read with parameter \code{read_file_info=TRUE}.
#' Combine file information from multiple iso_files. By default all information is included but specific columns can be targeted using the \code{select} parameter to select and/or rename columns. File information beyond \code{file_id}, \code{file_root}, \code{file_path} and \code{file_datetime} is only available if the \code{iso_files} were read with parameter \code{read_file_info=TRUE}.
#'
#' @inheritParams iso_get_raw_data
#' @inheritParams iso_select_file_info
#' @param select which columns to select - use \code{c(...)} to select multiple, supports all \link[dplyr]{select} syntax including renaming columns. File id is always included and cannot be renamed.
#' @param simplify if set to TRUE (the default), nested value columns in the file info will be unnested as long as they are compatible across file types. Note that file info entries with multiple values still remain nested multi-value (=list) columns even with \code{simplify=TRUE}. These can be unnested using \link[tidyr]{unnest}.
#' @family data retrieval functions
#' @note File info entries with multiple values remain nested multi-value (=list) columns and can be unnested using \link[tidyr]{unnest}.
#' @note this function used to allow selecting/renaming different file_info_columns in different files to the same column. This was a significant speed impediment and only covered very rare use cases. It is still available in the related function \code{\link{iso_select_file_info}} with a special flag but is no longer the default and not incouraged for use in the frequently called \code{iso_get_file_info}.
#' @export
iso_get_file_info <- function(iso_files, select = everything(), quiet = default(quiet)) {
iso_get_file_info <- function(iso_files, select = everything(), file_specific = FALSE, simplify = TRUE, quiet = default(quiet)) {

# make sure it's an iso file list
iso_files <- iso_as_file_list(iso_files)
select_exp <- rlang::enexpr(select)

if (!quiet) {
if (!quiet) {
glue::glue(
"Info: aggregating file info from {length(iso_files)} data file(s)",
"{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") %>% message()
"{get_info_message_concat(select_exp, prefix = ', selecting info columns ', empty = 'everything()')}") %>%
message()
}
check_read_options(iso_files, "file_info")

# retrieve info
file_info <- iso_files %>%
# select files
iso_select_file_info(!!select_exp, file_specific = file_specific, quiet = TRUE) %>%
# retrieve file info
map(~.x$file_info) %>%
# combine in data frame (use safe bind to make sure different data column
# types of the same name don't trip up the combination)
safe_bind_rows()

# check if empty
if(nrow(file_info) == 0) return(tibble(file_id = character(0)))

# selecting columns
select_cols <- get_column_names(file_info, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select
if (!"file_id" %in% select_cols)
select_cols <- c("file_id", select_cols) # file id always included

# final processing
file_info <-
file_info %>%
# focus on selected columns only (also takes care of the rename)
dplyr::select(!!!select_cols) %>%
# simplify by disaggregated columns
if (simplify)
# unnest aggregated columns
unnest_aggregated_data_frame()
file_info <- unnest_aggregated_data_frame(file_info)

return(file_info)
}
Expand Down Expand Up @@ -489,7 +476,7 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
unnest(bgrd_data)

# check for rows
if (nrow(data) == 0) return(dplyr::select(data, file_id))
if (nrow(data) == 0) return(dplyr::select(data, .data$file_id))

# selecting columns
select_cols <- get_column_names(data, select = select_exp, n_reqs = list(select = "*"), cols_must_exist = FALSE)$select
Expand All @@ -507,12 +494,12 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
# gather all masses and ratios
gather(column, value, matches(masses_ratios_re)) %>%
# extract unit information
extract(column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>%
dplyr::select(-extra_parens) %>%
extract(.data$column, into = c("category", "data", "extra_parens", "units"), regex = masses_ratios_re) %>%
dplyr::select(-.data$extra_parens) %>%
# remove unknown data
filter(!is.na(value)) %>%
filter(!is.na(.data$value)) %>%
# assign category
mutate(category = ifelse(category == "r", "ratio", "mass"))
mutate(category = ifelse(.data$category == "r", "ratio", "mass"))
}

# if file info
Expand All @@ -526,6 +513,7 @@ iso_get_bgrd_data <- function(iso_files, select = everything(), gather = FALSE,
#' DEPRECATED
#'
#' Please use \link{iso_get_standards} instead.
#' @param ... forwarded to \link{iso_get_standards}
#'
#' @export
iso_get_standards_info <- function(...) {
Expand Down Expand Up @@ -615,6 +603,8 @@ iso_get_standards <- function(iso_files, select = everything(), include_file_inf
#'
#' Please use \link{iso_get_resistors} instead.
#'
#' @param ... forwarded to \link{iso_get_resistors}
#'
#' @export
iso_get_resistors_info <- function(...) {
warning("'iso_get_resistors_info()' is deprecated in favor of the simpler 'iso_get_resistors()'. Please use 'iso_get_resistors()' directly to avoid this warning.", immediate. = TRUE, call. = FALSE)
Expand Down
31 changes: 16 additions & 15 deletions R/export.R
Expand Up @@ -78,15 +78,15 @@ iso_export_to_excel <- function(
if ("file_info" %in% names(all_data)) {
# note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
file_info <-
all_data %>% select(file_id, file_info) %>%
unnest(file_info) %>%
all_data %>% select(.data$file_id, .data$file_info) %>%
unnest(.data$file_info) %>%
collapse_list_columns()
add_excel_sheet(wb, "file info", file_info)
}

# raw data
if ("raw_data" %in% names(all_data)) {
raw_data <- all_data %>% select(file_id, raw_data) %>% unnest(raw_data)
raw_data <- all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data)
add_excel_sheet(wb, "raw data", raw_data)
}

Expand All @@ -98,20 +98,20 @@ iso_export_to_excel <- function(

# resistors
if ("resistors" %in% names(all_data)) {
resistors <- all_data %>% select(file_id, resistors) %>% unnest(resistors)
resistors <- all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors)
add_excel_sheet(wb, "resistors", resistors)
}

# vendor data table
if ("vendor_data_table" %in% names(all_data)) {
vendor_data <- all_data %>% select(file_id, vendor_data_table) %>%
unnest(vendor_data_table) %>% iso_strip_units()
vendor_data <- all_data %>% select(.data$file_id, .data$vendor_data_table) %>%
unnest(.data$vendor_data_table) %>% iso_strip_units()
add_excel_sheet(wb, "vendor data table", vendor_data)
}

# problems
if ("problems" %in% names(all_data)) {
problems <- all_data %>% select(file_id, problems) %>% unnest(problems)
problems <- all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems)
add_excel_sheet(wb, "problems", problems)
}
saveWorkbook(wb, filepath, overwrite = TRUE)
Expand Down Expand Up @@ -199,6 +199,7 @@ add_excel_sheet <- function(wb, sheet_name, ..., dbl_digits = 2, col_max_width =
#'
#' @inheritParams iso_save
#' @inheritParams iso_export_to_excel
#' @param filepath_prefix what to use as the prefix for the feather file names (e.g. name of the data collection or current date)
#' @family export functions
#' @return returns the iso_files object invisibly for use in pipelines
#' @export
Expand Down Expand Up @@ -244,40 +245,40 @@ iso_export_to_feather <- function(
# file info
if ("file_info" %in% names(all_data)) {
# note: collapse_list_columns takes care of nested vectors, they get concatenated with ', '
all_data %>% select(file_id, file_info) %>%
unnest(file_info) %>%
all_data %>% select(.data$file_id, .data$file_info) %>%
unnest(.data$file_info) %>%
collapse_list_columns() %>%
write_feather(filepaths[['file_info']])
}

# raw data
if ("raw_data" %in% names(all_data)) {
all_data %>% select(file_id, raw_data) %>% unnest(raw_data) %>%
all_data %>% select(.data$file_id, .data$raw_data) %>% unnest(.data$raw_data) %>%
write_feather(filepaths[['raw_data']])
}

# standards
if ("standards" %in% names(all_data)) {
all_data %>% select(file_id, standards) %>% unnest(standards) %>%
all_data %>% select(.data$file_id, .data$standards) %>% unnest(.data$standards) %>%
write_feather(filepaths[['method_info_standards']])
}

# resistors
if ("resistors" %in% names(all_data)) {
all_data %>% select(file_id, resistors) %>% unnest(resistors) %>%
all_data %>% select(.data$file_id, .data$resistors) %>% unnest(.data$resistors) %>%
write_feather(filepaths[['method_info_resistors']])
}

# vendor data table
if ("vendor_data_table" %in% names(all_data)) {
all_data %>% select(file_id, vendor_data_table) %>%
unnest(vendor_data_table) %>% iso_strip_units() %>%
all_data %>% select(.data$file_id, .data$vendor_data_table) %>%
unnest(.data$vendor_data_table) %>% iso_strip_units() %>%
write_feather(filepaths[['vendor_data_table']])
}

# problems
if ("problems" %in% names(all_data)) {
all_data %>% select(file_id, problems) %>% unnest(problems) %>%
all_data %>% select(.data$file_id, .data$problems) %>% unnest(.data$problems) %>%
write_feather(filepaths[['problems']])
}

Expand Down

0 comments on commit 9ebf1ea

Please sign in to comment.