-
Notifications
You must be signed in to change notification settings - Fork 4
/
alphafold-analyser.py
executable file
·142 lines (105 loc) · 5.13 KB
/
alphafold-analyser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/local/bin/python3.7
# Import all relevant libraries
import argparse
import os
import pickle
import matplotlib.pyplot as plt
# Create a PAE plot from the pkl file produced by AlphaFold - N.B code taken from AlphaFold CoLab
def pae_plotter(pickle_input, output):
try:
# Load as a dictionary from pickle file
data = open(pickle_input, 'rb')
prediction_result = pickle.load(data)
data.close()
# Generate dictionary for predicted aligned error results from pkl file
pae_outputs = {'protein': (
prediction_result['predicted_aligned_error'],
prediction_result['max_predicted_aligned_error']
)}
# Output file_path for the plot
pae_output = f'{output}/pae.png'
# Plot predicted align error results for each aligned residue
pae, max_pae = list(pae_outputs.values())[0]
fig = plt.figure() # generate figure
fig.set_facecolor('white') # color background white
plt.imshow(pae, vmin=0., vmax=max_pae) # plot pae
plt.colorbar(fraction=0.46, pad=0.04) # create color bar
plt.title('Predicted Aligned Error') # plot title
plt.xlabel('Scored residue') # plot x-axis label
plt.ylabel('Aligned residue') # plot y-axis label
plt.savefig(pae_output, dpi=1000, bbox_inches='tight') # save plot to output directory
print('\n predicted aligned error plotted\n')
except EOFError:
print(' Error: Data could not be found, predicted aligned error plotting failed\n')
except FileNotFoundError:
print(' Error: File could not be found, predicted aligned error plotting failed\n')
# Create a PyMOL session from the pdb file generated by AlphaFold
def protein_painter(pdb_input, output):
# File path for the PyMol session
session_path = f'{output}/pLDDT.pse'
# Terminal Command to open pdb file, color protein by pLDDT (b-factor) and save the session in the output directory
pymol_command = f'PyMol -cq {str(pdb_input)} -d "spectrum b, yellow_green_blue; save {session_path}"'
# Run terminal command
os.system(pymol_command)
if os.path.isfile(session_path):
print('\n pLDDT data visualised\n')
else:
print('\n Error: visualisation failed\n')
# Generate CLI and define arguments with Argparse
def cmd_lineparser():
parser = argparse.ArgumentParser(prog='AlphaFold Analyser', add_help=False, formatter_class=argparse.RawTextHelpFormatter)
group_inputs = parser.add_argument_group('Inputs')
# Get pdb structure path
group_inputs.add_argument('-p', '--pdb', metavar='\b', type=str, action='store', help='path to pdb file - generates pLDDT coloured structure',
default=None)
# Get pkl file path
group_inputs.add_argument('-l', '--pkl', metavar='\b', type=str, action='store', help='path to pkl file - generates predicted aligned error plot',
default=None)
group_output = parser.add_argument_group('Outputs')
# Get output directory
group_output.add_argument('-o', '--output', metavar='\b', type=str, action='store',
help='directory to store all generated outputs', default=None)
group_options = parser.add_argument_group('Options')
# Get Version
group_options.add_argument('-v', '--version', action='version', version='%(prog)s v1.0')
# Get help
group_options.add_argument("-h", "--help", action="help", help="show this help message and exit\n ")
# Parse arguments
args = parser.parse_args()
input_list = [args.pkl, args.pdb, args.output]
# If all arguments are None display help text by parsing help
if input_list.count(input_list[0]) == len(input_list):
parser.parse_args(['-h'])
# Check arg.pdb input is a pdb file
if args.pdb is not None:
if not args.pdb.endswith('.pdb'):
parser.error('ERROR: --pdb requires pdb file as input')
# Check arg.pkl input is a pkl file
if args.pkl is not None:
if not args.pkl.endswith('.pkl'):
parser.error('ERROR: --pkl requires pkl file as input')
# Check output directory exists
if not os.path.isdir(args.output):
parser.error('ERROR: Output directory not found')
return args
# Perform analysis of alphafold results
def main():
args = cmd_lineparser()
# if pdb structure provided and generates PyMol session with pLDDT coloured
if args.pdb is not None:
print('\n Visualising pLDDT data...\n')
protein_painter(args.pdb, args.output)
# if no pdb structure provided skips process
elif args.pdb is None:
print('\n no pdb file provided, skipping pLDDT data visualisation...\n')
# if pkl structure provided, generate predicted aligned error plot
if args.pkl is not None:
print(' plotting predicted aligned error...')
pae_plotter(args.pkl, args.output)
# if no pkl file provided skips process
elif args.pkl is None:
print(' no pickle file provided, skipping predicted aligned error visualisation...\n')
print(' all processes finished, shutting down...\n')
# Run analysis
if __name__ == '__main__':
main()