Skip to content

Commit

Permalink
results: small changes to generating plots
Browse files Browse the repository at this point in the history
  • Loading branch information
noporpoise committed Aug 20, 2018
1 parent 78c7850 commit 400c0e3
Show file tree
Hide file tree
Showing 9 changed files with 373 additions and 88 deletions.
4 changes: 2 additions & 2 deletions results/kmer_size_experiment/Makefile
Expand Up @@ -19,8 +19,8 @@ GENREADS=$(CTXDIR)/scripts/python/generate-reads.py
COUNT_BAD_EDGES=python $(CTXDIR)/scripts/python/count-bad-edges.py
TIME=/usr/bin/time -l

# REF=$(CTXDIR)/results/data/chr22/chr22_17M_18M.fa
REF=$(CTXDIR)/results/data/chr22/chr22_28M_29M.fa
REF=$(CTXDIR)/results/data/chr22/chr22_17M_18M.fa
# REF=$(CTXDIR)/results/data/chr22/chr22_28M_29M.fa
PERFECT=data/perfect_cov
STOCH=data/stoch_cov
STOCHERR=data/stocherr_cov
Expand Down
3 changes: 3 additions & 0 deletions results/kmer_size_experiment/notes.txt
Expand Up @@ -5,6 +5,9 @@ generated if you have it installed (e.g. with `brew install science/sga`).
Specify the absolue path to the reference FASTA:

REF=../data/chr22/chr22_28M_29M.fa
# OR
REF=../data/chr22/chr22_17M_18M.fa
# Then convert to absolute path
REF=$(cd "$(dirname "$REF")"; pwd)/$(basename "$REF")

Sample reads and assemble with:
Expand Down
178 changes: 106 additions & 72 deletions results/kmer_size_experiment/results/generate-results.sh
@@ -1,120 +1,154 @@
#!/bin/bash
set -eou pipefail
set -x

# Set to 1 to filter out k=99, else include k99
HIDE_K99=0

if (($HIDE_K99)); then
OUTDIR=latest_k91
else
OUTDIR=latest_k99
fi

mkdir -p $OUTDIR

# hide k99 files
echo "-- hiding k=99"
for d in perfect_cov stoch_cov stocherr_cov stocherr_corr
do
if [ -d ../$d/k99 ]
then
mv ../$d/k99 ../$d/hidden_k99
fi
done
if (($HIDE_K99)); then
echo "-- hiding k=99"
for d in perfect_cov stoch_cov stocherr_cov stocherr_corr
do
if [ -d ../$d/k99 ]
then
mv ../$d/k99 ../$d/hidden_k99
fi
done
fi

mkdir -p latest
echo "-- Perfect Coverage"
./make-csv.sh ../perfect_cov/k*/stats.plain.txt > latest/perfect.plain.csv
./make-csv.sh ../perfect_cov/k*/stats.links.txt > latest/perfect.links.csv
./make-csv.sh ../perfect_cov/k*/stats.pe.txt > latest/perfect.pe.csv
./plot-ng50-and-errs.R "Perfect cov. (100X, 100bp reads)" latest/perfect.pdf \
latest/perfect.plain.csv latest/perfect.links.csv latest/perfect.pe.csv
./make-csv.sh ../perfect_cov/k*/stats.plain.txt > $OUTDIR/perfect.plain.csv
./make-csv.sh ../perfect_cov/k*/stats.links.txt > $OUTDIR/perfect.links.csv
./make-csv.sh ../perfect_cov/k*/stats.pe.txt > $OUTDIR/perfect.pe.csv
./plot-ng50-and-errs.R "Perfect cov. (100X, 100bp reads)" $OUTDIR/perfect.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.links.csv $OUTDIR/perfect.pe.csv

./plot-ng50-and-errs.R "Perfect cov. (100X, 100bp reads)" latest/perfect_no_pe.pdf \
latest/perfect.plain.csv latest/perfect.links.csv
./plot-ng50-and-errs.R "Perfect cov. (100X, 100bp reads)" $OUTDIR/perfect_no_pe.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.links.csv

echo "-- Stochastic Coverage"
./make-csv.sh ../stoch_cov/k*/stats.plain.txt > latest/stoch.plain.csv
./make-csv.sh ../stoch_cov/k*/stats.links.txt > latest/stoch.links.csv
./make-csv.sh ../stoch_cov/k*/stats.pe.txt > latest/stoch.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. (100X, 100bp reads)" latest/stoch.pdf \
latest/stoch.plain.csv latest/stoch.links.csv latest/stoch.pe.csv
./make-csv.sh ../stoch_cov/k*/stats.plain.txt > $OUTDIR/stoch.plain.csv
./make-csv.sh ../stoch_cov/k*/stats.links.txt > $OUTDIR/stoch.links.csv
./make-csv.sh ../stoch_cov/k*/stats.pe.txt > $OUTDIR/stoch.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. (100X, 100bp reads)" $OUTDIR/stoch.pdf \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.links.csv $OUTDIR/stoch.pe.csv

echo "-- Stochastic Coverage + Error"
./make-csv.sh ../stocherr_cov/k*/stats.plain.txt > latest/stocherr.plain.csv
./make-csv.sh ../stocherr_cov/k*/stats.links.txt > latest/stocherr.links.csv
./make-csv.sh ../stocherr_cov/k*/stats.pe.txt > latest/stocherr.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. + 0.5% err (100X, 100bp reads)" latest/stocherr.pdf \
latest/stocherr.plain.csv latest/stocherr.links.csv latest/stocherr.pe.csv
./make-csv.sh ../stocherr_cov/k*/stats.plain.txt > $OUTDIR/stocherr.plain.csv
./make-csv.sh ../stocherr_cov/k*/stats.links.txt > $OUTDIR/stocherr.links.csv
./make-csv.sh ../stocherr_cov/k*/stats.pe.txt > $OUTDIR/stocherr.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. + 0.5% err (100X, 100bp reads)" $OUTDIR/stocherr.pdf \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.links.csv $OUTDIR/stocherr.pe.csv

echo "-- Stochastic Coverage + Error + Error Correction"
./make-csv.sh ../stocherr_corr/k*/stats.plain.txt > latest/stocherrcorr.plain.csv
./make-csv.sh ../stocherr_corr/k*/stats.links.txt > latest/stocherrcorr.links.csv
./make-csv.sh ../stocherr_corr/k*/stats.pe.txt > latest/stocherrcorr.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. + 0.5% err + correct (100X, 100bp reads)" latest/stocherrcorr.pdf \
latest/stocherr.plain.csv latest/stocherr.links.csv latest/stocherr.pe.csv \
latest/stocherrcorr.plain.csv latest/stocherrcorr.links.csv latest/stocherrcorr.pe.csv
./make-csv.sh ../stocherr_corr/k*/stats.plain.txt > $OUTDIR/stocherrcorr.plain.csv
./make-csv.sh ../stocherr_corr/k*/stats.links.txt > $OUTDIR/stocherrcorr.links.csv
./make-csv.sh ../stocherr_corr/k*/stats.pe.txt > $OUTDIR/stocherrcorr.pe.csv
./plot-ng50-and-errs.R "Stochastic cov. + 0.5% err + correct (100X, 100bp reads)" $OUTDIR/stocherrcorr.pdf \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.links.csv $OUTDIR/stocherr.pe.csv \
$OUTDIR/stocherrcorr.plain.csv $OUTDIR/stocherrcorr.links.csv $OUTDIR/stocherrcorr.pe.csv

# Gather SGA results
if [ -d ../stocherr_cov/sga ]
then
echo "-- SGA plots"
# ../stocherr_cov/sga/k21/stats.k21.txt
./make-csv.sh ../stocherr_cov/sga/k*/stats.k*.txt > latest/stocherr.sga.csv
./plot-mccortex-vs-sga.R latest/links-vs-sga-ng50.pdf latest/links-vs-sga-errs.pdf latest/stocherr.links.csv latest/stocherr.sga.csv
./plot-mccortex-vs-sga.R latest/pe-vs-sga-ng50.pdf latest/pe-vs-sga-errs.pdf latest/stocherr.pe.csv latest/stocherr.sga.csv
./make-csv.sh ../stocherr_cov/sga/k*/stats.k*.txt > $OUTDIR/stocherr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/links-vs-sga-ng50.pdf $OUTDIR/links-vs-sga-errs.pdf $OUTDIR/stocherr.links.csv $OUTDIR/stocherr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/pe-vs-sga-ng50.pdf $OUTDIR/pe-vs-sga-errs.pdf $OUTDIR/stocherr.pe.csv $OUTDIR/stocherr.sga.csv
fi

if [ -d ../stocherr_corr/sga ]
then
echo "-- SGA + Error Correction plots"
# ../stocherr_corr/sga/k21/stats.k21.txt
./make-csv.sh ../stocherr_corr/sga/k*/stats.k*.txt > latest/stocherrcorr.sga.csv
./plot-mccortex-vs-sga.R latest/corr-links-vs-sga-ng50.pdf latest/corr-links-vs-sga-errs.pdf latest/stocherrcorr.links.csv latest/stocherrcorr.sga.csv
./plot-mccortex-vs-sga.R latest/corr-pe-vs-sga-ng50.pdf latest/corr-pe-vs-sga-errs.pdf latest/stocherrcorr.pe.csv latest/stocherrcorr.sga.csv
./make-csv.sh ../stocherr_corr/sga/k*/stats.k*.txt > $OUTDIR/stocherrcorr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/corr-links-vs-sga-ng50.pdf $OUTDIR/corr-links-vs-sga-errs.pdf $OUTDIR/stocherrcorr.links.csv $OUTDIR/stocherrcorr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/corr-pe-vs-sga-ng50.pdf $OUTDIR/corr-pe-vs-sga-errs.pdf $OUTDIR/stocherrcorr.pe.csv $OUTDIR/stocherrcorr.sga.csv
# McCortex (SE) vs McCortex (PE) vs SGA (all with bfc corrected reads)
./plot-mccortex-se-pe-vs-sga.R $OUTDIR/corr-both-vs-sga-ng50.pdf $OUTDIR/corr-both-vs-sga-errs.pdf $OUTDIR/stocherrcorr.links.csv $OUTDIR/stocherrcorr.pe.csv $OUTDIR/stocherrcorr.sga.csv
# McCortex (SE) vs McCortex (PE) vs SGA (not with bfc corrected reads)
./plot-mccortex-se-pe-vs-sga.R $OUTDIR/both-vs-sga-ng50.pdf $OUTDIR/both-vs-sga-errs.pdf $OUTDIR/stocherr.links.csv $OUTDIR/stocherr.pe.csv $OUTDIR/stocherr.sga.csv
# Corrected + McCortex vs raw + SGA
./plot-mccortex-vs-sga.R latest/corr-links-vs-raw-sga-ng50.pdf latest/corr-links-vs-raw-sga-errs.pdf latest/stocherrcorr.links.csv latest/stocherr.sga.csv
./plot-mccortex-vs-sga.R latest/corr-pe-vs-raw-sga-ng50.pdf latest/corr-pe-vs-raw-sga-errs.pdf latest/stocherrcorr.pe.csv latest/stocherr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/corr-links-vs-raw-sga-ng50.pdf $OUTDIR/corr-links-vs-raw-sga-errs.pdf $OUTDIR/stocherrcorr.links.csv $OUTDIR/stocherr.sga.csv
./plot-mccortex-vs-sga.R $OUTDIR/corr-pe-vs-raw-sga-ng50.pdf $OUTDIR/corr-pe-vs-raw-sga-errs.pdf $OUTDIR/stocherrcorr.pe.csv $OUTDIR/stocherr.sga.csv
fi

echo "-- Plain vs links"
./plot-ng50-three-sets.R latest/plain-vs-links.pdf \
latest/perfect.plain.csv latest/perfect.links.csv \
latest/stoch.plain.csv latest/stoch.links.csv \
latest/stocherr.plain.csv latest/stocherr.links.csv
./plot-ng50-three-sets.R $OUTDIR/plain-vs-links-ng50.pdf $OUTDIR/plain-vs-links-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.links.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.links.csv \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.links.csv

echo "-- Plain vs PE"
./plot-ng50-three-sets.R latest/plain-vs-pe.pdf \
latest/perfect.plain.csv latest/perfect.pe.csv \
latest/stoch.plain.csv latest/stoch.pe.csv \
latest/stocherr.plain.csv latest/stocherr.pe.csv
./plot-ng50-three-sets.R $OUTDIR/plain-vs-pe-ng50.pdf $OUTDIR/plain-vs-pe-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.pe.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.pe.csv \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.pe.csv

echo "-- Plain vs links (with error correction)"
./plot-ng50-three-sets.R latest/plain-vs-links-corr.pdf \
latest/perfect.plain.csv latest/perfect.links.csv \
latest/stoch.plain.csv latest/stoch.links.csv \
latest/stocherrcorr.plain.csv latest/stocherrcorr.links.csv
./plot-ng50-three-sets.R $OUTDIR/plain-vs-links-corr-ng50.pdf $OUTDIR/plain-vs-links-corr-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.links.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.links.csv \
$OUTDIR/stocherrcorr.plain.csv $OUTDIR/stocherrcorr.links.csv

echo "-- Plain vs PE links (with error correction)"
./plot-ng50-three-sets.R latest/plain-vs-pe-corr.pdf \
latest/perfect.plain.csv latest/perfect.pe.csv \
latest/stoch.plain.csv latest/stoch.pe.csv \
latest/stocherrcorr.plain.csv latest/stocherrcorr.pe.csv
./plot-ng50-three-sets.R $OUTDIR/plain-vs-pe-corr-ng50.pdf $OUTDIR/plain-vs-pe-corr-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.pe.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.pe.csv \
$OUTDIR/stocherrcorr.plain.csv $OUTDIR/stocherrcorr.pe.csv

echo "-- Plain vs links (perfect, stochastic, error, corrected)"
./plot-ng50-four-sets.R $OUTDIR/plain-vs-links-fourway-ng50.pdf $OUTDIR/plain-vs-links-fourway-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.links.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.links.csv \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.links.csv \
$OUTDIR/stocherrcorr.plain.csv $OUTDIR/stocherrcorr.links.csv

echo "-- Plain vs PE (perfect, stochastic, error, corrected)"
./plot-ng50-four-sets.R $OUTDIR/plain-vs-pe-fourway-ng50.pdf $OUTDIR/plain-vs-pe-fourway-errs.pdf \
$OUTDIR/perfect.plain.csv $OUTDIR/perfect.pe.csv \
$OUTDIR/stoch.plain.csv $OUTDIR/stoch.pe.csv \
$OUTDIR/stocherr.plain.csv $OUTDIR/stocherr.pe.csv \
$OUTDIR/stocherrcorr.plain.csv $OUTDIR/stocherrcorr.pe.csv

echo "-- Making cleaning tables"
./make-cleaning-table.py ../stocherr_cov/k*/graph.k*.dist.txt > latest/cleaning.table.csv
./make-cleaning-table.py ../stocherr_corr/k*/graph.k*.dist.txt > latest/cleaning.corr.table.csv
./make-cleaning-table.py ../stocherr_cov/k*/graph.k*.dist.txt > $OUTDIR/cleaning.table.csv
./make-cleaning-table.py ../stocherr_corr/k*/graph.k*.dist.txt > $OUTDIR/cleaning.corr.table.csv

echo "-- Make link count csv"
for t in se pe; do
cat ../perfect_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > latest/perfect.linkcounts.$t.csv
cat ../stoch_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > latest/stoch.linkcounts.$t.csv
cat ../stocherr_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > latest/stocherr.linkcounts.$t.csv
cat ../stocherr_corr/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > latest/stocherrcorr.linkcounts.$t.csv
cat ../perfect_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > $OUTDIR/perfect.linkcounts.$t.csv
cat ../stoch_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > $OUTDIR/stoch.linkcounts.$t.csv
cat ../stocherr_cov/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > $OUTDIR/stocherr.linkcounts.$t.csv
cat ../stocherr_corr/k*/graph.k*.$t.raw.ctp.gz.log | ./count-links.pl > $OUTDIR/stocherrcorr.linkcounts.$t.csv
done
for t in se pe; do
for s in perfect stoch stocherr stocherrcorr; do
./plot-link-counts.R latest/$s.linkcounts.$t.pdf latest/$s.linkcounts.$t.csv
./plot-link-counts.R "$OUTDIR/$s"_linkcounts_$t.pdf $OUTDIR/$s.linkcounts.$t.csv
done
./plot-link-counts-together.R latest/linkcounts.$t.pdf latest/{perfect,stoch,stocherr,stocherrcorr}.linkcounts.$t.csv
./plot-link-counts-threeway.R $OUTDIR/linkcounts_$t\_threeway.pdf $OUTDIR/{perfect,stoch,stocherr}.linkcounts.$t.csv
./plot-link-counts-together.R $OUTDIR/linkcounts_$t\_fourway.pdf $OUTDIR/{perfect,stoch,stocherr,stocherrcorr}.linkcounts.$t.csv
done


# unhide k99 files
echo "-- recovering k=99"
for d in perfect_cov stoch_cov stocherr_cov stocherr_corr
do
if [ -d ../$d/hidden_k99 ]
then
mv ../$d/hidden_k99 ../$d/k99
fi
done
if (($HIDE_K99)); then
echo "-- recovering k=99"
for d in perfect_cov stoch_cov stocherr_cov stocherr_corr
do
if [ -d ../$d/hidden_k99 ]
then
mv ../$d/hidden_k99 ../$d/k99
fi
done
fi
62 changes: 62 additions & 0 deletions results/kmer_size_experiment/results/plot-link-counts-threeway.R
@@ -0,0 +1,62 @@
#!/usr/bin/env Rscript --vanilla

# Isaac Turner 2017-02-16

args <- commandArgs(trailingOnly=TRUE)
if(length(args) != 4) {
stop("Usage: ./plot-link-counts.R <linkcounts.pdf> <perfect.csv> <stoch.csv> <stocherr.csv>\n")
}


plot_path <- "latest/linkcounts.se.pdf"
perf_path <- "latest/perfect.linkcounts.se.csv"
stoch_path <- "latest/stoch.linkcounts.se.csv"
error_path <- "latest/stocherr.linkcounts.se.csv"

plot_path = args[1]
perf_path <- args[2]
stoch_path <- args[3]
error_path <- args[4]

a <- read.table(perf_path, sep='\t',head=T,comment.char='#',as.is=T)
b <- read.table(stoch_path, sep='\t',head=T,comment.char='#',as.is=T)
c <- read.table(error_path, sep='\t',head=T,comment.char='#',as.is=T)

nlinkkmers_ylim <- ceiling(max(a$n_link_kmers, b$n_link_kmers, c$n_link_kmers))
kmers <- a$K

# * joins with no spaces, ~ joins with a space
xlabel = expression(italic('k'))
ylabel = expression('no. of '*italic('k')*'mers with links (log)')

pdf(plot_path, width=6, height=6)

# Remove empty title space
par(mar=c(4,5,2,2)+0.1) # set margins: bottom, left, top and right
par(xpd=TRUE)

legend_labels <- c("perfect", "stochastic", "error")
cols <- c('#1b9e77', '#d95f02', '#7570b3') # from color brewer
pnts <- c(19,4,17) # point styles pch=
jf <- 0.2 # jitter factor
lt <- 2 # line thickness

plot(1, type='n', bty="n", xlab='', ylab='', log='y',
xlim=c(20,100), ylim=c(1,nlinkkmers_ylim), axes=F)

points(jitter(a$K,jf), a$n_link_kmers, type='b', lwd=lt, pch=pnts[1], col=cols[1], lty=1)
points(jitter(b$K,jf), b$n_link_kmers, type='b', lwd=lt, pch=pnts[2], col=cols[2], lty=1)
points(jitter(c$K,jf), c$n_link_kmers, type='b', lwd=lt, pch=pnts[3], col=cols[3], lty=1)

mtext(side=1, text=xlabel, line=2)
mtext(side=2, text=ylabel, line=4)
axis(1, at=a$K)
axis(2, las=2)

par(xpd=TRUE)
legend("topright", bty="n", inset=c(0.2,0),
legend=legend_labels,
col=cols, lwd=lt, lty=c(1,1,1),
pch=pnts)

dev.off()
3 changes: 2 additions & 1 deletion results/kmer_size_experiment/results/plot-mccortex-vs-sga.R
Expand Up @@ -76,7 +76,8 @@ plot(1, type='n', bty="n", log="y", axes=F,
points(jitter(a$K,jf), a$AssemblyErrors+1, type='b', lwd=lt, pch=pnts[3], col=cols[3], lty=1)
points(jitter(b$K,jf), b$AssemblyErrors+1, type='b', lwd=lt, pch=pnts[4], col=cols[4], lty=1)
axis(1, at=kmers)
axis(2, at=c(1,2,11,51,101,201,501), labels=c(0,1,10,50,100,200,500), las=2)
axis(2, at=c(1,2,11,51,101,201,501,1001,2001,4001),
labels=c(0,1,10,50,100,200,500,1000,2000,4000), las=2)

par(xpd=TRUE)
legend("topright", bty="n", inset=c(0.2,-0.15),
Expand Down
2 changes: 1 addition & 1 deletion results/kmer_size_experiment/results/plot-ng50-and-errs.R
Expand Up @@ -107,7 +107,7 @@ N50_ylim <- 100
asm_ylim <- 5
if(use_pe) {
N50_ylim <- 150
asm_ylim <- 1000
asm_ylim <- 2000
}

xlabel = expression(italic('k'))
Expand Down

0 comments on commit 400c0e3

Please sign in to comment.