-
Notifications
You must be signed in to change notification settings - Fork 13
/
gawn
executable file
·119 lines (98 loc) · 3.5 KB
/
gawn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/bin/bash
# Run full GAWN pipeline
# Copy script and config files as they were run
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
SCRIPT=$0
SCRIPT_NAME=$(basename "$SCRIPT")
CONFIG_FILE=$1
CONFIG_FILENAME=$(basename "$CONFIG_FILE")
LOG_FOLDER="00_archive/log_files"
# Log all output
exec > >(tee 99_logs/gawn_"$TIMESTAMP".log) 2>&1
# Functions
print_logo () {
echo
echo " ------------------------------------------------ "
echo "GAWN v0.3.6 - Genome Annotation Without Nightmares"
echo " ------------------------------------------------ "
}
print_begin () {
echo
echo ",########################################################,"
echo "##### #"
echo "### $1"
echo "#"
echo
}
print_finished () {
echo
echo "#"
echo "### $1"
echo "##### #"
echo "'########################################################'"
}
print_end () {
echo
echo " -------------------------------"
echo "GAWN: Genome annotation completed"
echo " -------------------------------"
}
# Printing header
print_logo
# Import config file and keep copy in 00_archive/log_files
if [ -e "$CONFIG_FILE" -a -s "$CONFIG_FILE" ]
then
source "$CONFIG_FILE"
cp "$CONFIG_FILE" "$LOG_FOLDER"/"$TIMESTAMP"_"$CONFIG_FILENAME"
# Printing config file parameters
print_begin "Config file:"
cat $CONFIG_FILE | grep -v "^ *#" | grep -v "^$"
print_finished "End of config file"
else
echo -e "GAWN: Config file does not exist or is empty."
echo -e " Please specify a valid config file."
exit 1
fi
# Index genome
if [ "$SKIP_GENOME_INDEXING" == "0" ]
then
print_begin "GAWN: Indexing genome"
./01_scripts/01_index_genome.sh $GENOME_NAME
print_finished "Finished: Indexing genome"
elif [ "$SKIP_GENOME_INDEXING" == "1" ]
then
print_begin "GAWN: Skipping genome indexing"
echo " nothing to do..."
print_finished ""
else
echo -e "WARNING: Invalid value in config file for SKIP_GENOME_INDEXING"
exit 1
fi
# Annotate genome with GMAP
print_begin "GAWN: Finding transcript positions on genome"
./01_scripts/02_annotate_genome.sh "$GENOME_NAME" "$TRANSCRIPTOME_NAME" "$NCPUS"
cp 04_annotation/"${GENOME_NAME%.fasta}".gff3 05_results
print_finished "Finished: Finding transcript positions on genome"
# Create transcriptome annotation table (.tsv)
print_begin "GAWN: Annotating transcriptome with swissprot"
echo " blasting transcriptome..."
./01_scripts/04_blast_transcriptome_on_swissprot.sh "$TRANSCRIPTOME_NAME" "$SWISSPROT_DB" "$NCPUS"
echo
echo " getting uniprot infos..."
./01_scripts/05_get_uniprot_info.sh "$TRANSCRIPTOME_NAME"
echo
echo " annotating transcriptome..."
./01_scripts/06_annotate_transcriptome.py 03_data/"$TRANSCRIPTOME_NAME" \
04_annotation/genbank_info 05_results/"${TRANSCRIPTOME_NAME%.fasta}_annotation_table.tsv"
echo
print_finished "Finished: Annotating transcriptome with swissprot"
# Create genome annotation table (.tsv)
print_begin "GAWN: Annotating genome using transcriptome"
echo " creating annotation table..."
./01_scripts/07_create_genome_annotation_table.py \
03_data/"$GENOME_NAME" \
04_annotation/"${GENOME_NAME%.fasta}".gff3 \
05_results/"${TRANSCRIPTOME_NAME%.fasta}"_annotation_table.tsv \
05_results/"${GENOME_NAME%.fasta}"_annotation_table.tsv
print_finished "Finished: Annotating genome using transcriptome"
print_end