Skip to content

Commit

Permalink
New version (0.6.0). Removed emcncf2 and added udef option for genome…
Browse files Browse the repository at this point in the history
… build.
  • Loading branch information
veseshan committed Dec 14, 2018
1 parent d002c51 commit 9352925
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 26 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Expand Up @@ -2,12 +2,12 @@ Package: facets
Type: Package
Title: Cellular Fraction and Copy Numbers from Tumor Sequencing
Version: 0.6.0
Date: 2018-02-28
Date: 2018-12-14
Author: Venkatraman E. Seshan and Ronglai Shen
Maintainer: Venkatraman E. Seshan <seshanv@mskcc.org>
Description: Algorithm to implement Fraction and Allelic Copy number
Estimate from Tumor/normal Sequencing.
License: GPL (>= 2)
Depends: R (>= 2.10), pctGCdata (>= 0.2.0)
Depends: R (>= 3.0.0), pctGCdata (>= 0.3.0)
Remotes: veseshan/pctGCdata
NeedsCompilation: yes
2 changes: 1 addition & 1 deletion NAMESPACE
@@ -1,6 +1,6 @@
useDynLib(facets)
import(stats,graphics)
importFrom("utils", "read.csv")
importFrom("grDevices", "colorRampPalette")
importFrom("grDevices", "colorRampPalette", "rainbow")
importFrom("pctGCdata", "getGCpct")
export("readSnpMatrix", "preProcSample", "procSample", "emcncf", "emcncf2", "plotSample","logRlogORspider")
17 changes: 9 additions & 8 deletions R/facets-procreads.R
@@ -1,11 +1,8 @@
# heterozygous and keep flags of the SNPs
procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, gbuild="hg19", unmatched=FALSE, ndepthmax=1000) {
procSnps <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23, unmatched=FALSE, ndepthmax=1000) {
# keep only chromsomes 1-22 & X for humans and 1-19, X for mice
if (gbuild %in% c("hg19", "hg38", "hg18")) {
chromlevels <- c(1:22,"X")
} else {
chromlevels <- c(1:19,"X")
}
# for other genomes (gbuild = udef) nX is number of autosomes plus 1
chromlevels <- c(1:(nX-1),"X")
chr.keep <- rcmat$Chromosome %in% chromlevels
# keep only snps with normal read depth between ndepth and 1000
depthN.keep <- (rcmat$NOR.DP >= ndepth) & (rcmat$NOR.DP < ndepthmax)
Expand Down Expand Up @@ -46,7 +43,7 @@ scanSnp <- function(maploc, het, nbhd) {
}

# obtain logR and logOR from read counts and GC-correct logR
counts2logROR <- function(mat, gbuild, unmatched=FALSE, f=0.2) {
counts2logROR <- function(mat, gbuild, unmatched=FALSE, ugcpct=NULL, f=0.2) {
out <- mat[mat$keep==1,]
# gc percentage
out$gcpct <- rep(NA_real_, nrow(out))
Expand All @@ -57,7 +54,11 @@ counts2logROR <- function(mat, gbuild, unmatched=FALSE, f=0.2) {
ii <- which(out$chrom==i)
# allow for chromosomes with no SNPs i.e. not targeted
if (length(ii) > 0) {
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
if (gbuild == "udef") {
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild, ugcpct)
} else {
out$gcpct[ii] <- getGCpct(i, out$maploc[ii], gbuild)
}
}
}
##### log-ratio with gc correction and maf log-odds ratio steps
Expand Down
30 changes: 21 additions & 9 deletions R/facets-wrapper.R
Expand Up @@ -22,13 +22,24 @@ readSnpMatrix <- function(filename, skip=0L, err.thresh=Inf, del.thresh=Inf, per
rcmat
}

preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10"), hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) {
preProcSample <- function(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25, deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"), ugcpct=NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000) {
gbuild <- match.arg(gbuild)
# integer value for chromosome X depends on the genome
if (gbuild %in% c("hg19", "hg38", "hg18")) nX <- 23
if (gbuild %in% c("mm9", "mm10")) nX <- 20
pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, gbuild, unmatched, ndepthmax)
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched)
if (gbuild == "udef") {
if (missing(ugcpct)) {
stop("GC percent data should be supplied if udef option is used")
} else {
nX <- length(ugcpct)
}
}
pmat <- procSnps(rcmat, ndepth, het.thresh, snp.nbhd, nX, unmatched, ndepthmax)
if (gbuild == "udef") {
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched, ugcpct)
} else {
dmat <- counts2logROR(pmat[pmat$rCountT>0,], gbuild, unmatched)
}
tmp <- segsnps(dmat, cval, hetscale, deltaCN)
out <- list(pmat=pmat, gbuild=gbuild, nX=nX)
c(out, tmp)
Expand All @@ -49,8 +60,7 @@ procSample <- function(x, cval=150, min.nhet=15, dipLogR=NULL) {
chrs <- x$chromlevels
nchr <- length(chrs)
# get chromlevels from chrs
if (x$gbuild %in% c("hg19", "hg38", "hg18")) chromlevels <- c(1:22,"X")[chrs]
if (x$gbuild %in% c("mm9", "mm10")) chromlevels <- c(1:19,"X")[chrs]
chromlevels <- c(1:(nX-1), "X")[chrs]
# get the segment summary for the fit in seg.tree
nsegs <- 0
for (i in 1:nchr) {
Expand Down Expand Up @@ -162,8 +172,8 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
if (length(ii)>0) out$lcn[ii] <- 5 + log10(out$lcn[ii])
plot(c(0,length(jseg$cnlr)), c(0,max(out$tcn)), type="n", ylab="copy number (nv)", xaxt="n")
abline(v=chrbdry, lwd=0.25)
segments(segstart, out$tcn, segend, out$tcn, lwd=1.75, col=1)
segments(segstart, out$lcn, segend, out$lcn, lwd=1.75, col=2)
segments(segstart, out$tcn, segend, out$tcn, lwd=1.75, col=1)
# add the cf
plot(c(0,length(jseg$cnlr)), 0:1, type="n", ylab="", xaxt="n", yaxt="n")
mtext("cf-nv", side=2, at=0.5, line=0.3, las=2, cex=0.75)
Expand All @@ -178,8 +188,8 @@ plotSample <- function(x, emfit=NULL, clustered=FALSE, plot.type=c("em","naive",
if (length(ii)>0) out$lcn.em[ii] <- 5 + log10(out$lcn.em[ii])
plot(c(0,length(jseg$cnlr)), c(0,max(out$tcn.em)), type="n", ylab="copy number (em)", xaxt="n")
abline(v=chrbdry, lwd=0.25)
segments(segstart, out$tcn.em, segend, out$tcn.em, lwd=1.75, col=1)
segments(segstart, out$lcn.em, segend, out$lcn.em, lwd=1.75, col=2)
segments(segstart, out$tcn.em, segend, out$tcn.em, lwd=1.75, col=1)
# add the cf
plot(c(0,length(jseg$cnlr)), 0:1, type="n", ylab="", xaxt="n", yaxt="n")
mtext("cf-em", side=2, at=0.5, line=0.2, las=2, cex=0.75)
Expand Down Expand Up @@ -215,7 +225,7 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
}
}

plot(c(-0.95, 1.8), c(0, 5), type="n", xlab="Expected(logR - dipLogR)", ylab=" Expected(|logOR|)")
plot(c(-0.95, 1.8), c(0, 5.2), type="n", xlab="Expected(logR - dipLogR)", ylab=" Expected(|logOR|)")
l <- 1; i <-1; j <-0
linecols <- c("black","cyan3","green3","blue")
lines(logCNR[,l], logACR[,l], lty=1, col=j+1, lwd=1.25)
Expand All @@ -232,5 +242,7 @@ logRlogORspider <- function(cncf, dipLogR=0, nfrac=0.005) {
nhets <- sum(cncf$nhet)
ii <- cncf$num.mark > nfrac*nsnps & cncf$nhet > nfrac*nhets
cex <- 0.3 + 2.7*(cncf$num.mark[ii]/sum(0.1*cncf$num.mark[ii]))
points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, col="magenta4", lwd=1.5)
chrcol <- rainbow(24)
points(cncf$cnlr.median[ii] - dipLogR, sqrt(abs(cncf$mafR[ii])), cex=cex, pch=10, col=chrcol[cncf$chrom[ii]], lwd=1.5)
legend(-1, 5.25, paste("chr", c(1:22, "X"), sep=""), ncol=4, pch=10, col=chrcol[1:23], cex=0.65)
}
5 changes: 4 additions & 1 deletion inst/ChangeLog
@@ -1,6 +1,9 @@
02/28/2018: v0.6.0
12/13/2018: v0.6.0

o Removed emcncf2 and marked as defunct
o Changed the order of tcn-lcn lines being drawn to address (0,0) masking
o Added udef option for gbuild to enable analyzing other genomes (say
dog) with user supplied GC percentage data

02/28/2018: v0.5.14

Expand Down
4 changes: 2 additions & 2 deletions man/facets-internal.Rd
Expand Up @@ -22,9 +22,9 @@
}
\usage{
jointsegsummary(jointseg)
procSnps(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, gbuild="hg19",
procSnps(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, nX=23,
unmatched=FALSE, ndepthmax=1000)
counts2logROR(mat, gbuild, unmatched=FALSE, f=0.2)
counts2logROR(mat, gbuild, unmatched=FALSE, ugcpct = NULL, f=0.2)
scanSnp(maploc, het, nbhd)
fit.cpt.tree(genomdat, edgelim=10, cval=25, hscl=1, delta=0)
prune.cpt.tree(seg.tree, cval=25)
Expand Down
12 changes: 9 additions & 3 deletions man/preProcSample.Rd
Expand Up @@ -6,8 +6,8 @@
}
\usage{
preProcSample(rcmat, ndepth=35, het.thresh=0.25, snp.nbhd=250, cval=25,
deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10"),
hetscale=TRUE, unmatched=FALSE, ndepthmax=1000)
deltaCN=0, gbuild=c("hg19", "hg38", "hg18", "mm9", "mm10", "udef"),
ugcpct = NULL, hetscale=TRUE, unmatched=FALSE, ndepthmax=1000)
}
\arguments{
\item{rcmat}{data frame with 6 required columns: \code{Chrom},
Expand All @@ -21,7 +21,13 @@
\item{gbuild}{genome build used for the alignment of the genome.
Default value is human genome build hg19. Other possibilities are
hg38 & hg18 for human and mm9 & mm10 for mouse. Chromosomes used for
analysis are \code{1-22, X} for humans and \code{1-19} for mouse.}
analysis are \code{1-22, X} for humans and \code{1-19} for mouse.
Option udef can be used to analyze other genomes.}
\item{ugcpct}{If udef is chosen for gbuild then appropriate GC
percentage date should be provided through this option. This is a
list of numeric vectors that gives the GC percentage windows of
width 1000 bases in steps of 100 i.e. 1-1000, 101-1100 etc. for the
autosomes and the X chromosome.}
\item{hetscale}{logical variable to indicate if logOR should get more
weight in the test statistics for segmentation and clustering. Usually
only 10\% of snps are hets and hetscale gives the logOR contribution
Expand Down
Binary file modified vignettes/FACETS.pdf
Binary file not shown.

0 comments on commit 9352925

Please sign in to comment.