Skip to content

Commit

Permalink
Allow secondary filter when searching for off-targets
Browse files Browse the repository at this point in the history
  • Loading branch information
EricEdwardBryant committed Jul 4, 2017
1 parent e1d24eb commit 9350709
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 15 deletions.
51 changes: 36 additions & 15 deletions R/search-off-target.R
Original file line number Diff line number Diff line change
@@ -1,40 +1,61 @@
#' Search for candidate off target sequences.
#'
#' Search for candidate off target sequences allowing for a fixed-band to reduce computation time
#' and a set number of allowed mismatches.
#'
#' @param guides Character vector of sequences to search for fuzzy matches in the genome.
#' @param genome A BSgenome
#' @param chromosomes Character vector of chromosome names to consider when searching the genome.
#' @param fixed_start Beginning of fixed band (no tolerated mismatches). Larger band reduces search time.
#' @param fixed_end End of fixed band (no tolerated mismatches). Larger band reduces search time.
#' @param max_mismatch Number of tolerated mismatches outside of the fixed band.
#' @param secondary_filter A regular expression passed to [stringr::str_detect] used to filter for
#' off-targets that match this pattern (e.g. 'GG$' will require the matching sequence to end with 'GG').
#'
#' @export
#' @md

search_off_target <- function(seqs, genome, chromosomes,
search_off_target <- function(guides, genome, chromosomes,
fixed_start = 9,
fixed_end = 20,
max_mismatch = 2,
secondary_filter = NULL,
cores = 1) {

message('Counting possible off targets\n',
' Maximum mismatches: ', max_mismatch, '\n',
' Fixed band (no tolerated mismatch): ',
fixed_start, '-', fixed_end)
message('Counting possible off targets\n',
' Maximum mismatches: ', max_mismatch, '\n',
' Fixed band (no tolerated mismatch): ',
fixed_start, '-', fixed_end)

# Progress bar nonsense
if (cores <= 1L) cores <- NULL
pbo <- pbapply::pboptions(type = 'timer', char = '=')
on.exit(pbapply::pboptions(pbo), add = TRUE)

# Search each chromosome
chromosomes %>%
result <-
chromosomes %>%
purrr::set_names(chromosomes) %>% # give the result list names
pbapply::pblapply(function(chrm) {
chr <- genome[[chrm]] # brings chromosome into memory
bind_rows(
search_off_target_chr(seqs, chr, '+', fixed_start, fixed_end, max_mismatch),
search_off_target_chr(seqs, chr, '-', fixed_start, fixed_end, max_mismatch)
search_off_target_chr(guides, chr, '+', fixed_start, fixed_end, max_mismatch),
search_off_target_chr(guides, chr, '-', fixed_start, fixed_end, max_mismatch)
)
}, cl = cores) %>%
bind_rows(.id = 'chr')
bind_rows(.id = 'chr') %>%
mutate(match = Biostrings::getSeq(genome, chr, start, end, strand = strand) %>% as.character()) %>%
select(guide, match, chr, strand, start, end)

if (!is.null(secondary_filter)) result <- filter(result, stringr::str_detect(match, secondary_filter))
return(result)
}

search_off_target_chr <- function(seqs, chromosome, orientation,
search_off_target_chr <- function(guides, chromosome, orientation,
fixed_start, fixed_end,
max_mismatch) {

width <- unique(stringr::str_length(seqs))
width <- unique(stringr::str_length(guides))
assertthat::assert_that(
length(width) == 1,
msg = stringr::str_c(
Expand All @@ -43,11 +64,11 @@ search_off_target_chr <- function(seqs, chromosome, orientation,
)

if (orientation == '+') {
search <- seqs
search <- guides
start <- fixed_start
end <- fixed_end
} else {
search <- Biostrings::reverseComplement(Biostrings::DNAStringSet(seqs))
search <- Biostrings::reverseComplement(Biostrings::DNAStringSet(guides))
start <- width - (fixed_end - 1)
end <- width - (fixed_start - 1)
}
Expand All @@ -64,7 +85,7 @@ search_off_target_chr <- function(seqs, chromosome, orientation,
fixed = 'subject'
) %>%
as.list %>%
purrr::set_names(seqs) %>%
purrr::map_df(as_data_frame, .id = 'sequence') %>%
purrr::set_names(guides) %>%
purrr::map_df(as_data_frame, .id = 'guide') %>%
mutate(strand = orientation)
}
29 changes: 29 additions & 0 deletions man/search_off_target.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9350709

Please sign in to comment.