Skip to content

Commit

Permalink
Copying latest version from Google Code
Browse files Browse the repository at this point in the history
  • Loading branch information
mahmoudibrahim committed Aug 4, 2015
1 parent 508dca1 commit 4fa4548
Show file tree
Hide file tree
Showing 12 changed files with 3,167 additions and 0 deletions.
542 changes: 542 additions & 0 deletions JAMM.sh

Large diffs are not rendered by default.

333 changes: 333 additions & 0 deletions SignalGenerator.sh
@@ -0,0 +1,333 @@
########################################################################
# JAMMv1.0.7rev2 is a peak finder for joint analysis of NGS replicates.
# Copyright (C) 2014-2015 Mahmoud Ibrahim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Contact: mahmoud.ibrahim@mdc-berlin.de
########################################################################

##Finding out the path
sPath="`dirname \"$0\"`"
sPath="`( cd \"$sPath\" && pwd )`"



usage()
{
cat << EOF
Welcome to JAMM v1.0.7rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim.
This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
OPTIONS:
-s Directory containing sample files (required)
-g Genome size file (required)
-o Output Directory (required)
-c directory containing input or Control files
-r file with Regions to get signal for (required)
-b Bin size for signal generation (default: 10)
-f Fragment lengths (required if -t is "single")
-p Number of processors used by R scripts (default: 1)
-t Alignment type, paired or single (default: single)
-n Normalization method, chromAverage or depth (default: chromAverage)
EOF
}


# =========================
# Process Input parameters
# =========================


sdir=""
gsize=""
out=""
signal="10"
regs=""
fraglen=""
wdir=$(mktemp -d)
ran=$RANDOM
cores="1"
type="single"
normal="chromAverage"

while getopts "s:o:c:r:b:f:g:p:t:n:" OPTION
do
case $OPTION in
s) sdir=$OPTARG
;;
g) gsize=$OPTARG
;;
o) out=$OPTARG
;;
c) bdir=$OPTARG
;;
r) regs=$OPTARG
;;
b) signal=$OPTARG
;;
f) fraglen=$OPTARG
;;
p) cores=$OPTARG
;;
t) type=$OPTARG
;;
n) normal=$OPTARG
;;
?)
usage
exit
;;
esac
done


if [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]]
then
usage
exit 1
fi

if [[ -z $fraglen ]]
then
if [ $type == "single" ]
then
usage
exit 1
fi
fi
if [[ -d "$out/signal" ]]; then
printf "\n\nOutput directory $out/signal already exists. I can't override existing results!\n\n"
exit 0
fi
#=======================> DONE!



# =============================
# Step One: Initial Processing
# =============================
printf "\n\n==========================================================\nStarted JAMM1.0.7rev2 Signal Generator Pipeline...Hang on!\n==========================================================\n\n"

if [ ! -d "$wdir" ]; then
mkdir $wdir #make working directory
fi
if [ ! -d "$out" ]; then
mkdir $out #make working directory
fi
mkdir $wdir/bkgd.$ran/ #directory to store background files
mkdir $wdir/sizes.$ran/ #chromosomes and sizes
mkdir $wdir/samples.$ran/ #store sample files

dupnum=$(ls -1 $sdir | wc -l) #count how many sample files


#separate chromosome sizes
printf "Loading genome size file..."
ext="$wdir/sizes.$ran/"
awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize
printf "Done!\n"


printf "Processing sample files..."
#load each chromosome from each sample file
for i in $sdir/*.bed; do
samplefile=$(basename $i)
for f in $wdir/sizes.$ran/*; do
sizefile=$(basename $f)
chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i"
done
done
printf "Done!\n"


if [ ! -z $bdir ]; then
#concatenate all background files into one file
printf "Processing control files..."
cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed

for f in $wdir/sizes.$ran/*; do
sizefile=$(basename $f)
chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed"
done


printf "Done!\n"
fi


#determine average read lengths
if [ $type == "single" ]; then
printf "Getting average read lengths..."
if [ ! -z $bdir ]; then
readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
fi
readL=""
for s in $sdir/*.bed; do #and for each sample file
read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
readL="$readL,$read"
done
readL=${readL#","}
printf "Done!\n"
fi
#=======================> DONE!






# ===========================
# Step Three: Getting Signal
# ===========================
mkdir $wdir/signal.$ran/
mkdir $out/signal #store signal


printf "Generating Signal...(bin size: $signal)\n"

if [ $type == "single" ]; then
counting=1;
for f in $wdir/sizes.$ran/*; do #for each chromosome
samplelist=""
frag=""
frag=$fraglen
k=1

sizefile=$(basename $f)
chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
chrSize=$(cat $f | cut -f2);
printf "Chromosome $chr: "

#list of sample bed files and fragment lengths
for s in $wdir/samples.$ran/*.bed; do #and for each sample file
samplefile=$(basename $s)
chr2=$(echo $samplefile | awk -F"." '{print $2}');
if [ $chr == $chr2 ] #belonging to this chromosome
then
samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile"
samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
shift=$(echo "$frag" | cut -f "$k" -d ",")
read=$(echo "$readL" | cut -f "$k" -d ",")
k=$(($k+1))
perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
fi
done

#control file
bkgdfile="None"
if [ ! -z $bdir ]; then
l=$(($dupnum+1))
bshift=$(echo $frag | cut -f "$l" -d ",")
perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
fi

#remove leading comma
samplelist=${samplelist#","}
frag=${frag#","}

depth="0";
if [ $normal == "depth" ]; then
genomeSize=$(awk '{a=$2;print a;}' "$gsize" | perl -lane '$a+=$_;END{print $a}' | awk '{print int($1)}');
depthTemp="";
for s in $sdir/*.bed; do #and for each sample file
readnum=$(wc -l $s | cut -d" " -f1);
depthTemping=$(awk -v rn=$readnum -v gs=$genomeSize 'BEGIN { print rn / gs }');
depthTemp="$depthTemp,$depthTemping";
done
if [ ! -z $bdir ]; then
readnum=$(wc -l "$wdir/bkgd.$ran/ctrl.bed" | cut -d" " -f1);
depthTemping=$(awk -v rn=$readnum -v gs=$genomeSize 'BEGIN { print rn / gs }');
depthTemp="$depthTemp,$depthTemping";
fi
depth=${depthTemp#","}
fi
#call the peak calling R script
Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" -normal="$normal" -depth="$depth"
cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
rm $wdir/signal.$ran/*.bedSignal
counting=$(($counting+1));
done
counting=1;
fi



if [ $type == "paired" ]; then
counting=1;
for f in $wdir/sizes.$ran/*; do #for each chromosome
samplelist=""

sizefile=$(basename $f)
chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
chrSize=$(cat $f | cut -f2);
printf "Chromosome $chr: "

#list of sample bed files and fragment lengths
for s in $wdir/samples.$ran/*.bed; do #and for each sample file
samplefile=$(basename $s)
chr2=$(echo $samplefile | awk -F"." '{print $2}');
if [ $chr == $chr2 ] #belonging to this chromosome
then
samplelist="$samplelist,$wdir/samples.$ran/$samplefile"
samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
x="$sdir/$samplefilename"
fi
done

#control file
bkgdfile="None"
if [ ! -z $bdir ]; then
bkgdfile="$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed"
fi

#remove leading comma
samplelist=${samplelist#","}
frag=${frag#","}

depth="0";
if [ $normal == "depth" ]; then
genomeSize=$(awk '{a=$2;print a;}' "$gsize" | perl -lane '$a+=$_;END{print $a}' | awk '{print int($1)}');
depthTemp="";
for s in $sdir/*.bed; do #and for each sample file
readnum=$(wc -l $s | cut -d" " -f1);
depthTemping=$(awk -v rn=$readnum -v gs=$genomeSize 'BEGIN { print rn / gs }');
depthTemp="$depthTemp,$depthTemping";
done
if [ ! -z $bdir ]; then
readnum=$(wc -l "$wdir/bkgd.$ran/ctrl.bed" | cut -d" " -f1);
depthTemping=$(awk -v rn=$readnum -v gs=$genomeSize 'BEGIN { print rn / gs }');
depthTemp="$depthTemp,$depthTemping";
fi
depth=${depthTemp#","}
fi
#call the peak calling R script
Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" -normal="$normal" -depth="$depth"
cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
rm $wdir/signal.$ran/*.bedSignal
counting=$(($counting+1));
done
counting=1;
fi
#=======================> DONE!

rm -rf $wdir

printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"

0 comments on commit 4fa4548

Please sign in to comment.