/
LjAt_NC.sh
executable file
·126 lines (100 loc) · 3.49 KB
/
LjAt_NC.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
# scripts to obtain an ASV table from 16S rRNA sequencing
# data from natural community experiments
# by Rui Guan <guan@mpipz.mpg.de>
# modified from the DADA2 tutorial v.1.12
# exits whenever a function returns 1
set -e
# load functions
scripts_dir=`dirname $0`
source $scripts_dir/activate
source $scripts_dir/config.sh
log() {
echo $(date -u)": "$1 >> $logfile
}
mkdir -p $working_dir
working_dir=$working_dir/01.split_fq
output=$working_dir/"output.txt"
logfile=$working_dir/"log.txt"
mkdir -p $working_dir
rm -f -r $working_dir/*
# demultiplexing reads1 and reads2 files
for l in $l_list_miseq
do
# initialize lib. results directory
log "["$l"] initializing the working_dir for miseq step 1..."
rm -f -r $working_dir/"$l"
mkdir $working_dir/"$l"
# merge reads according to the overlap
log "["$l"] decompressing forward/reverse/barcode files..."
gzip -d -c $data_dir/"$l"_forward_reads.fastq.gz > \
$working_dir/$l/forward_reads.fastq
gzip -d -c $data_dir/"$l"_reverse_reads.fastq.gz > \
$working_dir/$l/reverse_reads.fastq
gzip -d -c $data_dir/"$l"_barcodes.fastq.gz > \
$working_dir/$l/barcodes.fastq
## for Bacteria
log "["$l"] demultiplexing Bacteria reads..."
bc_len=`less $data_dir/$l\_mapping.txt |tail -n1 |
awk '{print $2}' |wc -c`
let "bc_len=$bc_len - 1"
split_libraries_fastq.py -i $working_dir/$l/forward_reads.fastq \
-b $working_dir/$l/barcodes.fastq \
-m $data_dir/"$l"_mapping.txt \
--rev_comp_mapping_barcodes \
--barcode_type $bc_len \
--max_barcode_errors 0 \
--store_demultiplexed_fastq \
-q 0 -r 300 -p 0.01 -n 300 \
--phred_offset 33 \
-o $working_dir/$l/Bac_forward \
&>> $output
split_libraries_fastq.py -i $working_dir/$l/reverse_reads.fastq \
-b $working_dir/$l/barcodes.fastq \
-m $data_dir/"$l"_mapping.txt \
--rev_comp_mapping_barcodes \
--barcode_type $bc_len \
--max_barcode_errors 0 \
--store_demultiplexed_fastq \
-q 0 -r 300 -p 0.01 -n 300 \
--phred_offset 33 \
-o $working_dir/$l/Bac_reverse \
&>> $output
split_sequence_file_on_sample_ids.py --file_type fastq \
-i $working_dir/$l/Bac_forward/seqs.fastq \
-o $working_dir/$l/Bac_forward/out \
&>> $output
split_sequence_file_on_sample_ids.py --file_type fastq \
-i $working_dir/$l/Bac_reverse/seqs.fastq \
-o $working_dir/$l/Bac_reverse/out \
&>> $output
log "["$l"] step 0 finished."
`rm $working_dir/$l/forward_reads.fastq
$working_dir/$l/reverse_reads.fastq \
$working_dir/$l/barcodes.fastq`
done
working_dir_s1=$working_dir
working_dir=$working_dir/02.dada2
output=$working_dir/"output.txt"
logfile=$working_dir/"log.txt"
mkdir -p $working_dir
rm -f -r $working_dir/*
for l in $l_list_miseq
do
# initialize lib. results directory
rm -f -r $working_dir/"$l"
mkdir $working_dir/"$l"
mkdir -p $working_dir/$l/Bacteria
$scripts_dir/dada2_bacteria.R $working_dir_s1/$l/ $working_dir/$l/
done
working_dir_s2=$working_dir
working_dir=$working_dir/03.ASV_tab
output=$working_dir/"output.txt"
logfile=$working_dir/"log.txt"
mkdir -p $working_dir
rm -f -r $working_dir/*
mkdir -p $working_dir/Bacteria
$scripts_dir/get_ASV_tab_bacteria.R $l_list_miseq \
$working_dir_s2/ $working_dir/Bacteria
`less $working_dir/Bacteria/ASV_map.txt |
sed 's/ASV/>ASV/; s/\t/\n/' >ASV.fasta`