/
VariableSequenceMonteCarloJob.java
358 lines (317 loc) · 16.7 KB
/
VariableSequenceMonteCarloJob.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import java.io.*;
import java.util.*;
import com.google.common.collect.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.*;
import org.apache.commons.math3.geometry.euclidean.threed.Vector3D;
import org.apache.commons.io.FileUtils;
import org.apache.commons.math3.distribution.NormalDistribution;
/**
* Optimizes the non-active-site residues of a catalyst design.
*/
public class VariableSequenceMonteCarloJob extends MonteCarloJob implements Serializable
{
/** For serialization. */
public static final long serialVersionUID = 1L;
/** How many rotamers should be minimized on each iteration. */
public final int rotamersPerIteration;
/** The current peptide. */
public Peptide currentPeptide;
/** Stores the best results. */
public final MonteCarloJob.PeptideList bestPeptides;
/** Where to save the result to periodically. */
public final String checkpointFilename;
/** Unique ID. */
private long serverID;
/** The current iteration. */
public int currentIteration = 0;
/**
* Sets up job but doesn't run it.
* @param startingPeptide the starting structure
* @param deltaAlpha the cooling rate
* @param maxIterations how many MC iterations to perform
* @param maxSize the number of peptides to keep
* @param how many rotamers should be minimized on each iteration
* @param checkpointFilename job will be serialized here when done
*/
public VariableSequenceMonteCarloJob(Peptide startingPeptide, double deltaAlpha, int maxIterations, int maxSize,
int rotamersPerIteration, String checkpointFilename)
{
super(startingPeptide, deltaAlpha, maxIterations);
this.bestPeptides = new MonteCarloJob.PeptideList(maxSize);
this.rotamersPerIteration = rotamersPerIteration;
this.checkpointFilename = checkpointFilename;
this.serverID = RemoteWorkUnit.ID_GENERATOR.incrementAndGet();
this.currentPeptide = startingPeptide;
}
public boolean isDone()
{
return currentIteration >= maxIterations;
}
/**
* Mutates a residue at random without changing the backbone. The procedure is:<p>
* (1) Select a non-active-site residue at random.<p>
* (2) Mutate to a different amino acid.<p>
* (3) Rotamer pack, minimize, and retain only catalytic results.<p>
* Expects and outputs close contact peptides.
* @param peptide the peptide whose backbone we should perturb
* @return the perturbed peptide
*/
public Peptide mutate(Peptide inputPeptide)
{
//Peptide.writeGJFs(bestPeptides.getList(), "test_peptides/best_", 2, 10);
//Peptide.writeCHKs(bestPeptides.getList(), "test_peptides/best_", 2, 10);
// random number generator
//System.out.println(inputPeptide.name);
ThreadLocalRandom random = ThreadLocalRandom.current();
Peptide peptide = HydrogenBondMutator.unmutate(inputPeptide);
// figure out which positions can be mutated
List<Integer> validPositions = new ArrayList<>(peptide.sequence.size());
for (int i=0; i < peptide.sequence.size(); i++)
{
Residue residue = peptide.sequence.get(i);
if ( residue.isHairpin || residue.aminoAcid == AminoAcid.TS ||
residue.aminoAcid == AminoAcid.HIS || residue.aminoAcid == AminoAcid.ARG )
continue;
validPositions.add(i);
}
// create a list of positions in the peptide sequence that can be mutated and what they can be mutated to
List<Pair<Integer,ProtoAminoAcid>> mutationOutcomes = new ArrayList<>();
for (Integer i : validPositions)
{
// make a list of all the amino acids that we can mutate to at this position
Residue residue = peptide.sequence.get(i);
AminoAcid currentAminoAcid = residue.aminoAcid;
List<AminoAcid> excludeAminoAcids = new ArrayList<>();
excludeAminoAcids.add(currentAminoAcid);
int charge = PeptideChargeCalculator.getCharge(peptide, i);
if ( charge == 0 )
{
excludeAminoAcids.add(AminoAcid.ASP);
excludeAminoAcids.add(AminoAcid.GLU);
}
for (ProtoAminoAcid paa : CatalystRotamerSpace.MUTATION_OUTCOMES)
{
AminoAcid aa = paa.residue.aminoAcid;
if ( excludeAminoAcids.contains(aa) )
continue;
Pair<Integer,ProtoAminoAcid> pair = new Pair<>(i, paa);
mutationOutcomes.add(pair);
}
}
Collections.shuffle(mutationOutcomes);
// run through the first structure in each possibility
List<Pair<Peptide,RotamerIterator.Node>> otherSolutionsToTry = new ArrayList<>();
for (int mutationCount=0; mutationCount < mutationOutcomes.size(); mutationCount++)
{
Pair<Integer,ProtoAminoAcid> mutationOutcome = mutationOutcomes.get(mutationCount);
int i = mutationOutcome.getFirst();
ProtoAminoAcid paa = mutationOutcome.getSecond();
System.out.printf("[%3d] Mutating position %d to %s (%d of %d possibilities).\n", serverID, i, paa.residue.description, mutationCount+1, mutationOutcomes.size());
// make the mutation
Residue residue = peptide.sequence.get(i);
Peptide newPeptide = SidechainMutator.mutateSidechain(peptide, residue, paa);
// pack the peptide
VariableSequenceRotamerSpace variableSequenceRotamerSpace = null;
try
{
// note that the includeHN should be set to false if we want to do A* here
variableSequenceRotamerSpace = new VariableSequenceRotamerSpace(newPeptide, false, false);
}
catch (Exception e)
{
if ( e instanceof IllegalArgumentException )
System.out.printf("[%3d] microiteration rejected (packing failure: %s)\n", serverID, e.getMessage());
else
{
System.out.printf("[%3d] microiteration rejected (rotamer packing unknown error)\n", serverID);
e.printStackTrace();
}
continue;
}
// check for empty nodes
boolean empty = true;
for (List<Rotamer> list : variableSequenceRotamerSpace.rotamerSpace)
{
if ( list.size() > 1 )
{
empty = false;
break;
}
}
if ( empty )
{
System.out.printf("[%3d] microiteration rejected (empty node)\n", serverID);
continue;
}
// rotamer pack
AStarEnergyCalculator calculator = AStarEnergyCalculator.analyze(variableSequenceRotamerSpace, false);
RotamerIterator iterator = new RotamerIterator(variableSequenceRotamerSpace.rotamerSpace, calculator.rotamerSelfEnergies, calculator.rotamerInteractionEnergies, rotamersPerIteration, false);
List<RotamerIterator.Node> solutions = iterator.iterate();
if ( solutions.size() == 0 )
{
System.out.printf("[%3d] microiteration rejected (no A* solutions)\n", serverID);
continue;
}
// keep the other solutions, up to rotamersPerIteration
List<RotamerIterator.Node> otherSolutions = new ArrayList<>();
for (int solutionCount=1; solutionCount < Math.min(solutions.size(), rotamersPerIteration); solutionCount++)
otherSolutions.add(solutions.get(solutionCount));
for (RotamerIterator.Node node : otherSolutions)
{
Pair<Peptide,RotamerIterator.Node> pair = new Pair<>(newPeptide, node);
otherSolutionsToTry.add(pair);
}
// reconstitute the first pose
RotamerIterator.Node firstNode = solutions.get(0);
List<Rotamer> rotamers = firstNode.rotamers;
Peptide thisPeptide = Rotamer.reconstitute(newPeptide, rotamers);
thisPeptide = HydrogenBondMutator.mutate(thisPeptide);
// minimize with AMOEBA with analysis (approximate OMNISOL solvation)
Peptide minimizedPeptide = null;
try { minimizedPeptide = TinkerJob.minimize(thisPeptide, Forcefield.AMOEBA, 250, false, false, true, false, true); }
catch (Exception e) { e.printStackTrace(); continue; }
// check that the geometry is still catalytic
if ( !CatalystDesigner.isCatalytic(minimizedPeptide) )
continue;
// get the reference energy
double referenceEnergy = Interaction.getAMOEBAReferenceEnergy(minimizedPeptide);
// adjust total energy for reference energy
double referencedEnergy = minimizedPeptide.energyBreakdown.totalEnergy - referenceEnergy;
//System.out.printf("referenced energy is %.2f\n", referencedEnergy);
EnergyBreakdown energyBreakdown = new EnergyBreakdown(null, referencedEnergy, 0.0, referencedEnergy, null, Forcefield.AMOEBA);
minimizedPeptide = minimizedPeptide.setEnergyBreakdown(energyBreakdown);
bestPeptides.add(minimizedPeptide);
return minimizedPeptide;
}
// run randomly through the remaining
Collections.shuffle(otherSolutionsToTry);
for (int i=0; i < otherSolutionsToTry.size(); i++)
{
Pair<Peptide,RotamerIterator.Node> pair = otherSolutionsToTry.get(i);
Peptide newPeptide = pair.getFirst();
System.out.printf("Trying remaining pose %d of %d (%s).\n", i+1, otherSolutionsToTry.size(), newPeptide.name.split("@")[0]);
RotamerIterator.Node node = pair.getSecond();
List<Rotamer> rotamers = node.rotamers;
Peptide thisPeptide = Rotamer.reconstitute(newPeptide, rotamers);
thisPeptide = HydrogenBondMutator.mutate(thisPeptide);
// minimize with AMOEBA with analysis (approximate OMNISOL solvation)
Peptide minimizedPeptide = null;
try { minimizedPeptide = TinkerJob.minimize(thisPeptide, Forcefield.AMOEBA, 250, false, false, true, false, true); }
catch (Exception e) { e.printStackTrace(); continue; }
// check that the geometry is still catalytic
if ( !CatalystDesigner.isCatalytic(minimizedPeptide) )
continue;
// get the reference energy
double referenceEnergy = Interaction.getAMOEBAReferenceEnergy(minimizedPeptide);
// adjust total energy for reference energy
double referencedEnergy = minimizedPeptide.energyBreakdown.totalEnergy - referenceEnergy;
System.out.printf("referenced energy is %.2f\n", referencedEnergy);
EnergyBreakdown energyBreakdown = new EnergyBreakdown(null, referencedEnergy, 0.0, referencedEnergy, null, Forcefield.AMOEBA);
minimizedPeptide = minimizedPeptide.setEnergyBreakdown(energyBreakdown);
bestPeptides.add(minimizedPeptide);
return minimizedPeptide;
}
// we've run out of things to do, so throw an exception
throw new IllegalArgumentException("nothing left to do");
}
/** Writes the job to disk. */
public void checkpoint()
{
// if there is already a checkpoint, back it up
System.out.printf("[%3d] Serializing...", serverID);
File oldFile = new File(checkpointFilename);
if ( oldFile.exists() )
{
File backupFile = new File(checkpointFilename + ".bak");
try { org.apache.commons.io.FileUtils.copyFile(oldFile,backupFile); }
catch (IOException e) { System.out.printf("[%3d] Error backing up file: %s\n", serverID, checkpointFilename); }
//System.out.printf("backup made...");
}
try
{
FileOutputStream fileOut = new FileOutputStream(checkpointFilename);
ObjectOutputStream out = new ObjectOutputStream(fileOut);
out.writeObject(this);
out.close();
fileOut.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
@Override
public MonteCarloResult call()
{
// perform the Metropolis Monte Carlo
double currentAlpha = 0.0;
double firstEnergy = bestPeptides.getBestEnergy();
double lastBestEnergy = firstEnergy;
while ( currentIteration < maxIterations )
{
if ( currentIteration > 10 && currentIteration % 10 == 0 )
checkpoint();
// abort if kill file found
if ( new File("kill.txt").isFile() )
{
System.out.println("Kill file found. Aborting.");
break;
}
// make a mutation (minimization is done inside this)
Peptide candidate = mutate(currentPeptide);
// apply modified Metropolis criterion
boolean isAccepted = true;
if ( currentIteration > 0 ) // auto-accept first iteration
isAccepted = MonteCarloJob.acceptChange(currentPeptide, candidate, currentAlpha);
double thisEnergy = candidate.energyBreakdown.totalEnergy;
double bestEnergy = bestPeptides.getBestEnergy();
if ( isAccepted )
{
currentPeptide = candidate;
if ( bestEnergy < lastBestEnergy )
System.out.printf("[%3d] Iter %d of %d (***new best***, alpha = %.6f, E = %.2f, bestE = %.2f, initialE = %.2f)\n", serverID, currentIteration+1, maxIterations, currentAlpha, thisEnergy, bestEnergy, firstEnergy);
else
System.out.printf("[%3d] Iter %d of %d (accepted, alpha = %.6f, E = %.2f, bestE = %.2f, initialE = %.2f)\n", serverID, currentIteration+1, maxIterations, currentAlpha, thisEnergy, bestEnergy, firstEnergy);
}
else
System.out.printf("[%3d] Iter %d of %d (rejected, alpha = %.6f, E = %.2f, bestE = %.2f, initialE = %.2f)\n", serverID, currentIteration+1, maxIterations, currentAlpha, thisEnergy, bestEnergy, firstEnergy);
// update alpha
currentAlpha = currentAlpha + deltaAlpha;
lastBestEnergy = bestEnergy;
currentIteration++;
}
// save this result to disk if requested
if ( checkpointFilename != null )
checkpoint();
System.out.printf("[%3d] Finished. (bestE = %.2f)\n", serverID, bestPeptides.getBestEnergy());
// return result
return new MonteCarloResult(bestPeptides.getList());
}
public static VariableSequenceMonteCarloJob load(String filename)
{
if ( filename == null || filename.length() == 0 )
throw new NullPointerException("must specify a filename");
try
{
FileInputStream fileIn = new FileInputStream(filename);
ObjectInputStream in = new ObjectInputStream(fileIn);
VariableSequenceMonteCarloJob j = (VariableSequenceMonteCarloJob)in.readObject();
in.close();
fileIn.close();
return j;
}
catch (Exception e)
{
e.printStackTrace();
return null;
}
}
public static void main(String[] args)
{
Peptide peptide = Peptide.load("test_peptides/singlePeptide.chk");
VariableSequenceMonteCarloJob job = new VariableSequenceMonteCarloJob(peptide, 0.001, 1000, 100, 4, "test_peptides/test.chk");
job.call();
Peptide.writeGJFs(job.bestPeptides.getList(), "test_peptides/vsmcjob_", 2, 100);
}
}