/
find_apo_holo.sh
executable file
·103 lines (90 loc) · 4.86 KB
/
find_apo_holo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/bash
#$ -l h_vmem=2G
#$ -l mem_free=2G
#$ -t 1-1
#$ -l h_rt=100:00:00
#$ -R yes
#$ -V
'''
This script will take the information on the cystallographic and sequence information of each holo and apo structure and decide if they are pairs.
INPUT:
OUTPUT: Text file with a list of paired PDBs with the resolution of each of them.
'''
#____________________________________________SOURCE REQUIREMENTS____________________________________
source phenix_env.sh #source phenix (fill in phenix location)
source activate qfit #conda env with qFit
base_dir = '/this/is/where/your/PDB/folders/are'
apo_file=PDB_Apo.txt #this is an output file from get_PDB_info or get_PDB_info_parallel
cd ${base_dir}
mkdir potential_pairs
while read apo_PDB; do
echo ${apo_PDB}
if [ -f ${base_dir}/${apo_PDB}/${apo_PDB}.pdb ]; then
RESO1=$(grep ${apo_PDB} ${base_dir}/space_unit_reso.txt | head -n 1 | awk '{print $2}')
SPACE1=$(grep ${apo_PDB} ${base_dir}/space_unit_reso.txt | head -n 1 | awk '{print $3}')
RESO1_lower=$(echo ${RESO1}-0.1 | bc -l)
RESO1_upper=$(echo ${RESO1}+0.1 | bc -l)
UNIT1=$(grep ${apo_PDB} ${base_dir}/space_unit_reso.txt | tail -n 1 | sed "s/[(),]//g" | awk '{print $4,$5,$6,$7,$8,$9}')
UNIT1_out=$UNIT1
UNIT1=( $UNIT1 )
UNIT1_0_lower=$(echo ${UNIT1[0]}-1 | bc -l)
UNIT1_0_upper=$(echo ${UNIT1[0]}+1 | bc -l)
UNIT1_1_lower=$(echo ${UNIT1[1]}-1 | bc -l)
UNIT1_1_upper=$(echo ${UNIT1[1]}+1 | bc -l)
UNIT1_2_lower=$(echo ${UNIT1[2]}-1 | bc -l)
UNIT1_2_upper=$(echo ${UNIT1[2]}+1 | bc -l)
UNIT1_3_lower=$(echo ${UNIT1[3]}-1 | bc -l)
UNIT1_3_upper=$(echo ${UNIT1[3]}+1 | bc -l)
UNIT1_4_lower=$(echo ${UNIT1[4]}-1 | bc -l)
UNIT1_4_upper=$(echo ${UNIT1[4]}+1 | bc -l)
UNIT1_5_lower=$(echo ${UNIT1[5]}-1 | bc -l)
UNIT1_5_upper=$(echo ${UNIT1[5]}+1 | bc -l)
holo_file=holo_PDBs.txt
while read holo_PDB; do
if [ -f ${base_dir}/${holo_PDB}/${holo_PDB}.pdb ]; then
RESO2=$(grep ${holo_PDB} space_unit_reso.txt | head -n 1 | awk '{print $2}')
SPACE2=$(grep ${holo_PDB} space_unit_reso.txt | head -n 1 | awk '{print $3}')
if (( `echo ${RESO2}'<='${RESO1_upper} | bc` )) && (( `echo ${RESO2}'>='${RESO1_lower} | bc` )); then
if [ $SPACE1 == $SPACE2 ]; then
UNIT2=$(grep ${holo_PDB} space_unit_reso.txt | tail -n 1 | sed "s/[(),]//g" | awk '{print $4,$5,$6,$7,$8,$9}')
UNIT2=( $UNIT2 )
if (( $(echo "${UNIT2[0]} <= ${UNIT1_0_upper}" |bc -l) )) && (( $(echo "${UNIT2[0]} >= ${UNIT1_0_lower}" |bc -l) )) && (( $(echo "${UNIT2[1]} <= ${UNIT1_1_upper}" |bc -l) )) && (( $(echo "${UNIT2[1]} >= ${UNIT1_1_lower}" |bc -l) )) && (( $(echo "${UNIT2[2]} <= ${UNIT1_2_upper}" |bc -l) )) && (( $(echo "${UNIT2[2]} >= ${UNIT1_2_lower}" |bc -l) )); then
if (( $(echo "${UNIT2[3]} <= ${UNIT1_3_upper}"|bc -l) )) && (( $(echo "${UNIT2[3]} >= ${UNIT1_3_lower}" |bc -l) )) && (( $(echo "${UNIT2[4]} <= ${UNIT1_4_upper}" |bc -l) )) && (( $(echo "${UNIT2[4]} >= ${UNIT1_4_lower}" |bc -l) )) && (( $(echo "${UNIT2[5]} <= ${UNIT1_5_upper}" |bc -l) )) && (( $(echo "${UNIT2[5]} >= ${UNIT1_5_lower}" |bc -l) )); then
echo ${holo_PDB} >> ${base_dir}/potential_pairs/${apo_PDB}_potential_pairs.txt
fi
fi
fi
fi
fi
done<$holo_file
#now we have a list of potential pairs, and we are going to go through and match up the more difficult things
SEQ1=$(grep ${apo_PDB} ${base_dir}/sequences.txt | head -n 1 | awk '{print $2}')
pot_pairs=${base_dir}/potential_pairs/${apo_PDB}_potential_pairs.txt
for pair in $(cat $pot_pairs); do
if [ ! -f ${base_dir}/${pair}/${pair}.pdb ]; then
echo 'PDB not found'
continue
fi
RESO2=$(grep ${pair} ${base_dir}/space_unit_reso.txt | head -n 1 | awk '{print $2}')
UNIT2=$(grep ${pair} ${base_dir}space_unit_reso.txt | tail -n 1 | sed "s/[(),]//g" | awk '{print $4,$5,$6,$7,$8,$9}')
SEQ2=$(grep ${pair} ${base_dir}/sequences.txt | head -n 1 | awk '{print $2}')
if [[ -z ${SEQ1} ]]; then
continue
elif [[ -z ${SEQ2} ]]; then
continue
else
if [ "$SEQ1" = "$SEQ2" ]; then
echo $line $line2 $RESO1 $RESO2 >> ${base_dir}/holo_apo_pairs.txt
else
SEQ1_end5=${SEQ1:-5}
SEQ2_end5=${SEQ2:-5}
SEQ1_begin5=${SEQ1:5}
SEQ2_begin5=${SEQ2:5}
if [ "$SEQ1" = "$SEQ2_begin5" ] || [ "$SEQ1" = "$SEQ2_end5" ] || [ "$SEQ2" = "$SEQ1_end5" ] || [ "$SEQ2" = "$SEQ1_begin5" ]; then
echo $line $line2 $RESO1 $RESO2 >> ${base_dir}/holo_apo_pairs.txt
fi
fi
fi
done
fi
done <$apo_file