diff --git a/DESCRIPTION b/DESCRIPTION index 092b02f..08714df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tzara Type: Package Title: Cluster long amplicons using dada2 denoising on variable regions -Version: 0.0.7.3 +Version: 0.0.7.5 Authors@R: person(given = "Brendan", family = "Furneaux", email = "brendan.furneaux@gmail.com", role = c("aut", "cre")) @@ -50,4 +50,4 @@ Suggests: testthat (>= 2.1.0), covr, lintr -RoxygenNote: 6.1.1 +RoxygenNote: 7.0.0 diff --git a/NAMESPACE b/NAMESPACE index 07f742d..34bd8bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(dadamap,derep) S3method(dadamap,list) S3method(extract_region,ShortRead) S3method(extract_region,character) +S3method(extract_region,list) S3method(seqhash,XStringSet) S3method(seqhash,character) S3method(summarize_sread,ShortReadQ) diff --git a/R/tzara.R b/R/tzara.R index 19c7d08..4f4e267 100644 --- a/R/tzara.R +++ b/R/tzara.R @@ -96,11 +96,13 @@ combine_derep <- function(dereps, .data = NULL, ...) { # preserve the sequence names as "seq.id" if they are present oldmap <- dereps oldmap[["oldmap"]] <- lapply(oldmap[["derep"]], `[[`, "map") + nestedcols <- "oldmap" if (all(vapply(oldmap[["derep"]], assertthat::has_name, TRUE, "names"))) { oldmap[["seq.id"]] <- lapply(oldmap[["derep"]], `[[`, "names") + nestedcols <- c(nestedcols, "seq.id") } oldmap <- dplyr::select(oldmap, -"derep") %>% - tidyr::unnest() %>% + tidyr::unnest(cols = nestedcols) %>% dplyr::group_by_at(gps) %>% dplyr::mutate(idx = 1:dplyr::n()) %>% dplyr::ungroup() @@ -110,7 +112,7 @@ combine_derep <- function(dereps, .data = NULL, ...) { dplyr::mutate_at("derep", ~purrr::map(., .f = ~tibble(seq = names(.$uniques), n = .$uniques))) %>% - tidyr::unnest() + tidyr::unnest(cols = "derep") # combine duplicate sequences among all files. newuniques <- olduniques %>% @@ -129,7 +131,7 @@ combine_derep <- function(dereps, .data = NULL, ...) { dplyr::mutate(oldmap = seq_along(seq)) %>% dplyr::ungroup(), newuniques, - .by = "seq") + by = "seq") } out <- list() @@ -701,6 +703,8 @@ extract_region.ShortRead <- function(seq, positions, region, region2 = region, return(out) } +#' @rdname extract_region +#' @export extract_region.list <- function(seq, positions, region, region2 = region, outfile = NULL, ...) { diff --git a/man/cluster_consensus.Rd b/man/cluster_consensus.Rd index c71a481..fe439d6 100644 --- a/man/cluster_consensus.Rd +++ b/man/cluster_consensus.Rd @@ -8,11 +8,16 @@ \usage{ cluster_consensus(seq, ..., ncpus = 1, simplify = TRUE) -\method{cluster_consensus}{character}(seq, names = names(seq), - dna2rna = TRUE, ..., ncpus = 1, simplify = TRUE) +\method{cluster_consensus}{character}( + seq, + names = names(seq), + dna2rna = TRUE, + ..., + ncpus = 1, + simplify = TRUE +) -\method{cluster_consensus}{XStringSet}(seq, ..., ncpus = 1, - simplify = TRUE) +\method{cluster_consensus}{XStringSet}(seq, ..., ncpus = 1, simplify = TRUE) } \arguments{ \item{seq}{(\code{character} vector or \code{\link[Biostrings]{XStringSet}}) diff --git a/man/extract_region.Rd b/man/extract_region.Rd index b6edab5..a83c5ca 100644 --- a/man/extract_region.Rd +++ b/man/extract_region.Rd @@ -4,17 +4,33 @@ \alias{extract_region} \alias{extract_region.character} \alias{extract_region.ShortRead} +\alias{extract_region.list} \title{Extract regions from a set of sequences (maybe with qualities)} \usage{ -extract_region(seq, positions, region, region2 = region, - outfile = NULL, ...) +extract_region(seq, positions, region, region2 = region, outfile = NULL, ...) -\method{extract_region}{character}(seq, positions, region, - region2 = region, outfile = NULL, qualityType = "FastqQuality", - append = FALSE, ...) +\method{extract_region}{character}( + seq, + positions, + region, + region2 = region, + outfile = NULL, + qualityType = "FastqQuality", + append = FALSE, + ... +) -\method{extract_region}{ShortRead}(seq, positions, region, - region2 = region, outfile = NULL, append = FALSE, ...) +\method{extract_region}{ShortRead}( + seq, + positions, + region, + region2 = region, + outfile = NULL, + append = FALSE, + ... +) + +\method{extract_region}{list}(seq, positions, region, region2 = region, outfile = NULL, ...) } \arguments{ \item{seq}{(\code{character} (a file name) or a diff --git a/man/find_all_region_chimeras.Rd b/man/find_all_region_chimeras.Rd index 8e30d2c..e3186aa 100644 --- a/man/find_all_region_chimeras.Rd +++ b/man/find_all_region_chimeras.Rd @@ -4,8 +4,14 @@ \alias{find_all_region_chimeras} \title{Find chimeras in adjacent variable-conserved-variable domain triplets} \usage{ -find_all_region_chimeras(region_table, order, sample_column = NULL, - read_column = "read_id", chimera_offset = 0, ...) +find_all_region_chimeras( + region_table, + order, + sample_column = NULL, + read_column = "read_id", + chimera_offset = 0, + ... +) } \arguments{ \item{region_table}{(\code{data.frame} containing all the regions in diff --git a/man/find_region_chimeras.Rd b/man/find_region_chimeras.Rd index db046ad..e360581 100644 --- a/man/find_region_chimeras.Rd +++ b/man/find_region_chimeras.Rd @@ -4,8 +4,7 @@ \alias{find_region_chimeras} \title{Check for bimeras in a subset of regions} \usage{ -find_region_chimeras(region_table, chimset, sample_column, read_column, - ...) +find_region_chimeras(region_table, chimset, sample_column, read_column, ...) } \arguments{ \item{region_table}{(\code{data.frame} containing all the regions in diff --git a/man/map_or_consensus.Rd b/man/map_or_consensus.Rd index c336588..fefcfa9 100644 --- a/man/map_or_consensus.Rd +++ b/man/map_or_consensus.Rd @@ -4,8 +4,15 @@ \alias{map_or_consensus} \title{Assign consensus sequences to unmapped reads} \usage{ -map_or_consensus(asvs, raw, maxdist = 10, allow_map = TRUE, - allow_consensus = TRUE, allow_raw = FALSE, ...) +map_or_consensus( + asvs, + raw, + maxdist = 10, + allow_map = TRUE, + allow_consensus = TRUE, + allow_raw = FALSE, + ... +) } \arguments{ \item{asvs}{(\code{character} vector) ASV sequences mapped to a set of reads. diff --git a/man/reconstruct.Rd b/man/reconstruct.Rd index 388232c..7a62592 100644 --- a/man/reconstruct.Rd +++ b/man/reconstruct.Rd @@ -5,14 +5,28 @@ \title{Reconstruct a longer region out of ASVs or consensus sequence of individual domains.} \usage{ -reconstruct(seqtabs, regions = names(seqtabs), regions_regex = NULL, - regions_replace = NULL, output = "concat", use_output = c("first", - "second", "no"), order = setdiff(regions, output), - read_column = "seq.id", asv_column = "dada.seq", rawtabs = seqtabs, - raw_column = NULL, raw_regions = names(rawtabs), - sample_column = NULL, sample_regex = NULL, sample_replace = NULL, - chimera_offset = 0, allow_map = TRUE, allow_consensus = TRUE, - allow_raw = FALSE, ...) +reconstruct( + seqtabs, + regions = names(seqtabs), + regions_regex = NULL, + regions_replace = NULL, + output = "concat", + use_output = c("first", "second", "no"), + order = setdiff(regions, output), + read_column = "seq.id", + asv_column = "dada.seq", + rawtabs = seqtabs, + raw_column = NULL, + raw_regions = names(rawtabs), + sample_column = NULL, + sample_regex = NULL, + sample_replace = NULL, + chimera_offset = 0, + allow_map = TRUE, + allow_consensus = TRUE, + allow_raw = FALSE, + ... +) } \arguments{ \item{seqtabs}{(\code{list} of \code{data.frame}) with columns diff --git a/man/seqhash.Rd b/man/seqhash.Rd index 7f76a5b..65568b5 100644 --- a/man/seqhash.Rd +++ b/man/seqhash.Rd @@ -8,11 +8,9 @@ \usage{ seqhash(seq, algo = "xxhash32", len = NA, preserve_na = TRUE) -\method{seqhash}{character}(seq, algo = "xxhash32", len = NA, - preserve_na = TRUE) +\method{seqhash}{character}(seq, algo = "xxhash32", len = NA, preserve_na = TRUE) -\method{seqhash}{XStringSet}(seq, algo = "xxhash32", len = NA, - preserve_na = TRUE) +\method{seqhash}{XStringSet}(seq, algo = "xxhash32", len = NA, preserve_na = TRUE) } \arguments{ \item{seq}{(\code{character} or \code{\link[Biostrings]{XStringSet}}) the