/
PeptideProperties.java
616 lines (583 loc) · 28.2 KB
/
PeptideProperties.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.aaproperties;
import org.biojava.nbio.aaproperties.xml.AminoAcidCompositionTable;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import jakarta.xml.bind.JAXBException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* This is an adaptor class which enable the ease of generating protein properties.
* At least one adaptor method is written for each available properties provided in IPeptideProperties.
*
* @author kohchuanhock
* @version 2011.08.22
* @since 3.0.2
* @see IPeptideProperties
* @see PeptidePropertiesImpl
*/
public class PeptideProperties {
private final static Logger logger = LoggerFactory.getLogger(PeptideProperties.class);
/**
* Enumeration of 20 standard amino acid code
*/
public enum SingleLetterAACode { W, C, M, H, Y, F, Q, N, I, R, D, P, T, K, E, V, S, G, A, L}
/**
* Contains the 20 standard AA code in a set
*/
public static Set<Character> standardAASet;
/**
* To initialize the standardAASet
*/
static{
standardAASet = Arrays.stream(SingleLetterAACode.values())
.map(singleLetterAACode -> singleLetterAACode.toString().charAt(0))
.collect(Collectors.toCollection(HashSet::new));
}
/**
* An adaptor method to return the molecular weight of sequence.
* The sequence argument must be a protein sequence consisting of only non-ambiguous characters.
* This method will sum the molecular weight of each amino acid in the
* sequence. Molecular weights are based on <a href="http://web.expasy.org/findmod/findmod_masses.html">here</a>.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the total molecular weight of sequence + weight of water molecule
*/
public static final double getMolecularWeight(String sequence){
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getMolecularWeight(pSequence);
}
/**
* An adaptor method to return the molecular weight of sequence.
* The sequence argument must be a protein sequence consisting of only non-ambiguous characters.
* This method will sum the molecular weight of each amino acid in the
* sequence. Molecular weights are based on the input xml file.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param elementMassFile
* xml file that details the mass of each elements and isotopes
* @param aminoAcidCompositionFile
* xml file that details the composition of amino acids
* @return the total molecular weight of sequence + weight of water molecule
* @throws FileNotFoundException
* thrown if either elementMassFile or aminoAcidCompositionFile are not found
* @throws JAXBException
* thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile
*/
public static final double getMolecularWeight(String sequence, File elementMassFile, File aminoAcidCompositionFile)
throws FileNotFoundException, JAXBException{
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getMolecularWeight(pSequence, elementMassFile, aminoAcidCompositionFile);
}
/**
* An adaptor method to return the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters.
* This method will sum the molecular weight of each amino acid in the
* sequence. Molecular weights are based on the input files. These input files must be XML using the defined schema.
* Note that it assumes that ElementMass.xml file can be found in default location.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* xml file that details the mass of each elements and isotopes
* @param aminoAcidCompositionFile
* xml file that details the composition of amino acids
* @return the total molecular weight of sequence + weight of water molecule
* @throws JAXBException
* thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile
* @throws FileNotFoundException
* thrown if either elementMassFile or aminoAcidCompositionFile are not found
*/
public static final double getMolecularWeight(String sequence, File aminoAcidCompositionFile) throws FileNotFoundException, JAXBException{
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getMolecularWeight(pSequence, aminoAcidCompositionFile);
}
/**
* An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to
* IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable).
* Note that ElementMass.xml is assumed to be able to be seen in default location.
*
* @param aminoAcidCompositionFile
* xml file that details the composition of amino acids
* @return the initialized amino acid composition table
* @throws JAXBException
* thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile
* @throws FileNotFoundException
* thrown if either elementMassFile or aminoAcidCompositionFile are not found
*/
public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File aminoAcidCompositionFile)
throws JAXBException, FileNotFoundException{
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.obtainAminoAcidCompositionTable(aminoAcidCompositionFile);
}
/**
* An adaptor method would initialize amino acid composition table based on the input xml files and stores the table for usage in future calls to
* IPeptideProperties.getMolecularWeightBasedOnXML(ProteinSequence, AminoAcidCompositionTable).
*
* @param elementMassFile
* xml file that details the mass of each elements and isotopes
* @param aminoAcidCompositionFile
* xml file that details the composition of amino acids
* @return the initialized amino acid composition table
* @throws JAXBException
* thrown if unable to properly parse either elementMassFile or aminoAcidCompositionFile
* @throws FileNotFoundException
* thrown if either elementMassFile or aminoAcidCompositionFile are not found
*/
public static final AminoAcidCompositionTable obtainAminoAcidCompositionTable(File elementMassFile, File aminoAcidCompositionFile)
throws JAXBException, FileNotFoundException{
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.obtainAminoAcidCompositionTable(elementMassFile, aminoAcidCompositionFile);
}
/**
* An adaptor method that returns the molecular weight of sequence. The sequence argument must be a protein sequence consisting of only non-ambiguous characters.
* This method will sum the molecular weight of each amino acid in the
* sequence. Molecular weights are based on the AminoAcidCompositionTable.
* Those input files must be XML using the defined schema.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param aminoAcidCompositionTable
* a amino acid composition table obtained by calling IPeptideProperties.obtainAminoAcidCompositionTable
* @return the total molecular weight of sequence + weight of water molecule
* thrown if the method IPeptideProperties.setMolecularWeightXML(File, File) is not successfully called before calling this method.
*/
public static double getMolecularWeightBasedOnXML(String sequence, AminoAcidCompositionTable aminoAcidCompositionTable){
sequence = Utils.checkSequence(sequence, aminoAcidCompositionTable.getSymbolSet());
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence, aminoAcidCompositionTable.getAminoAcidCompoundSet());
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getMolecularWeightBasedOnXML(pSequence, aminoAcidCompositionTable);
}
/**
* An adaptor method to returns the absorbance (optical density) of sequence. The sequence argument
* must be a protein sequence consisting of only non-ambiguous characters.
* The computation of absorbance (optical density) follows the
* documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param assumeCysReduced
* true if Cys are assumed to be reduced and false if Cys are assumed to form cystines
* @return the absorbance (optical density) of sequence
*/
public static final double getAbsorbance(String sequence, boolean assumeCysReduced){
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getAbsorbance(pSequence, assumeCysReduced);
}
/**
* An adaptor method to return the extinction coefficient of sequence. The sequence argument
* must be a protein sequence consisting of only non-ambiguous characters.
* The extinction coefficient indicates how much light a protein absorbs at
* a certain wavelength. It is useful to have an estimation of this
* coefficient for following a protein which a spectrophotometer when
* purifying it. The computation of extinction coefficient follows the
* documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param assumeCysReduced
* true if Cys are assumed to be reduced and false if Cys are
* assumed to form cystines
* @return the extinction coefficient of sequence
*/
public static final double getExtinctionCoefficient(String sequence, boolean assumeCysReduced) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getExtinctionCoefficient(pSequence, assumeCysReduced);
}
/**
* An adaptor method to return the instability index of sequence. The sequence argument must be
* a protein sequence consisting of only non-ambiguous characters.
* The instability index provides an estimate of the stability of your
* protein in a test tube. The computation of instability index follows the
* documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the instability index of sequence
*/
public static final double getInstabilityIndex(String sequence) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getInstabilityIndex(pSequence);
}
/**
* An adaptor method to return the apliphatic index of sequence. The sequence argument must be a
* protein sequence consisting of only non-ambiguous characters.
* The aliphatic index of a protein is defined as the relative volume
* occupied by aliphatic side chains (alanine, valine, isoleucine, and
* leucine). It may be regarded as a positive factor for the increase of
* thermostability of globular proteins. The computation of aliphatic index
* follows the documentation in <a href="http://web.expasy.org/protparam/protparam-doc.html">here</a>.
* A protein whose instability index is smaller than 40 is predicted as stable, a value above 40 predicts that the protein may be unstable.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the aliphatic index of sequence
*/
public static final double getApliphaticIndex(String sequence) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getApliphaticIndex(pSequence);
}
/**
* An adaptor method to return the average hydropathy value of sequence. The sequence argument
* must be a protein sequence consisting of only non-ambiguous characters.
* The average value for a sequence is calculated as the sum of hydropathy
* values of all the amino acids, divided by the number of residues in the
* sequence. Hydropathy values are based on (Kyte, J. and Doolittle, R.F.
* (1982) A simple method for displaying the hydropathic character of a
* protein. J. Mol. Biol. 157, 105-132).
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the average hydropathy value of sequence
*/
public static final double getAvgHydropathy(String sequence) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getAvgHydropathy(pSequence);
}
/**
* An adaptor method to return the isoelectric point of sequence. The sequence argument must be
* a protein sequence consisting of only non-ambiguous characters.
* The isoelectric point is the pH at which the protein carries no net
* electrical charge. The isoelectric point will be computed based on
* approach stated in
* <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a>
*
* pKa values used will be either
* those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539"
* OR
* A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param useExpasyValues
* whether to use Expasy values (Default) or Innovagen values
* @return the isoelectric point of sequence
*/
public static final double getIsoelectricPoint(String sequence, boolean useExpasyValues) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getIsoelectricPoint(pSequence, useExpasyValues);
}
public static final double getIsoelectricPoint(String sequence){
return getIsoelectricPoint(sequence, true);
}
/**
* An adaptor method to return the net charge of sequence at pH 7. The sequence argument must be
* a protein sequence consisting of only non-ambiguous characters.
* The net charge will be computed using the approach stated in
* <a href="http://www.innovagen.se/custom-peptide-synthesis/peptide-property-calculator/peptide-property-calculator-notes.asp#PI">here</a>
*
* pKa values used will be either
* those used by Expasy which referenced "Electrophoresis 1994, 15, 529-539"
* OR
* A.Lehninger, Principles of Biochemistry, 4th Edition (2005), Chapter 3, page78, Table 3-1.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param useExpasyValues
* whether to use Expasy values (Default) or Innovagen values
* @param pHPoint
* the pH value to use for computation of the net charge. Default at 7.
* @return the net charge of sequence at given pHPoint
*/
public static final double getNetCharge(String sequence, boolean useExpasyValues, double pHPoint){
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getNetCharge(pSequence, useExpasyValues, pHPoint);
}
public static final double getNetCharge(String sequence, boolean useExpasyValues) {
return getNetCharge(sequence, useExpasyValues, 7.0);
}
public static final double getNetCharge(String sequence){
return getNetCharge(sequence, true);
}
/**
* An adaptor method to return the composition of specified amino acid in the sequence. The
* sequence argument must be a protein sequence consisting of only
* non-ambiguous characters. The aminoAcidCode must be a non-ambiguous
* character.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param aminoAcidCode
* the code of the amino acid to compute
* @return the composition of specified amino acid in the sequence
* @see SingleLetterAACode
*/
public static final double getEnrichment(String sequence, SingleLetterAACode aminoAcidCode) {
return getEnrichment(sequence, aminoAcidCode.toString());
}
/**
* An adaptor method to return the composition of specified amino acid in the sequence. The
* sequence argument must be a protein sequence consisting of only
* non-ambiguous characters. The aminoAcidCode must be a non-ambiguous
* character.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param aminoAcidCode
* the code of the amino acid to compute
* @return the composition of specified amino acid in the sequence
*/
public static final double getEnrichment(String sequence, char aminoAcidCode){
return getEnrichment(sequence, aminoAcidCode);
}
/**
* An adaptor method to return the composition of specified amino acid in the sequence. The
* sequence argument must be a protein sequence consisting of only
* non-ambiguous characters. The aminoAcidCode must be a non-ambiguous
* character.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @param aminoAcidCode
* the code of the amino acid to compute
* @return the composition of specified amino acid in the sequence
*/
public static final double getEnrichment(String sequence, String aminoAcidCode){
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet();
return pp.getEnrichment(pSequence, aaSet.getCompoundForString(aminoAcidCode));
}
/**
* An adaptor method to return the composition of the 20 standard amino acid in the sequence.
* The sequence argument must be a protein sequence consisting of only
* non-ambiguous characters.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the composition of the 20 standard amino acid in the sequence
* @see AminoAcidCompound
*/
public static final Map<AminoAcidCompound, Double> getAAComposition(String sequence) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getAAComposition(pSequence);
}
/**
* An adaptor method to return the composition of the 20 standard amino acid in the sequence.
* The sequence argument must be a protein sequence consisting of only
* non-ambiguous characters.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the composition of the 20 standard amino acid in the sequence
*/
public static final Map<String, Double> getAACompositionString(String sequence){
Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence);
Map<String, Double> aaString2Composition = new HashMap<>();
aaString2Composition = aa2Composition.keySet().stream() .collect(Collectors.toMap(aaCompound -> aaCompound.getShortName(),aaCompound ->aa2Composition.get(aaCompound)));
return aaString2Composition;
}
/**
* An adaptor method to return the composition of the 20 standard amino acid in the sequence.
* The sequence argument must be a protein sequence consisting of only
* non-ambiguous characters.
* The composition of an amino acid is the total number of its occurrence,
* divided by the total length of the sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the composition of the 20 standard amino acid in the sequence
*/
public static final Map<Character, Double> getAACompositionChar(String sequence){
Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence);
Map<Character, Double> aaChar2Composition = new HashMap<>();
for(AminoAcidCompound aaCompound:aa2Composition.keySet()){
aaChar2Composition.put(aaCompound.getShortName().charAt(0), aa2Composition.get(aaCompound));
}
return aaChar2Composition;
}
/**
* Returns the array of charges of each amino acid in a protein. At pH=7, two are negative charged: aspartic acid (Asp, D) and glutamic acid (Glu, E) (acidic side chains),
* and three are positive charged: lysine (Lys, K), arginine (Arg, R) and histidine (His, H) (basic side chains).
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the array of charges of amino acids in the protein (1 if amino acid is positively charged, -1 if negatively charged, 0 if not charged)
*/
public static final int[] getChargesOfAminoAcids(String sequence) {
int[] charges = new int[sequence.length()];
for ( int i=0; i < sequence.length(); i++ ) {
char aa = sequence.toCharArray()[i];
charges[i] = AminoAcidProperties.getChargeOfAminoAcid(aa);
}
return charges;
}
/**
* Returns the array of polarity values of each amino acid in a protein sequence.
*
* @param sequence
* a protein sequence consisting of non-ambiguous characters only
* @return the array of polarity of amino acids in the protein (1 if amino acid is polar, 0 if not)
*/
public static final int[] getPolarityOfAminoAcids(String sequence) {
int[] polarity = new int[sequence.length()];
for ( int i=0; i < sequence.length(); i++ ) {
char aa = sequence.toCharArray()[i];
polarity[i] = AminoAcidProperties.getPolarityOfAminoAcid(aa);
}
return polarity;
}
/**
* An adaptor method to return the aromaticity value of sequence. The sequence argument
* must be a protein sequence consisting of only non-ambiguous characters.
* <p>
* Calculates the aromaticity value of a protein according to Lobry, 1994.
* It is simply the relative frequency of Phe+Trp+Tyr.
* *
*
* @param sequence a protein sequence consisting of non-ambiguous characters only
* @return the aromaticity value of sequence
*/
public static final double getAromaticity(String sequence) {
sequence = Utils.checkSequence(sequence);
ProteinSequence pSequence = null;
try {
pSequence = new ProteinSequence(sequence);
} catch (CompoundNotFoundException e) {
// the sequence was checked with Utils.checkSequence, this shouldn't happen
logger.error("The protein sequence contains invalid characters ({}), this should not happen. This is most likely a bug in Utils.checkSequence()", e.getMessage());
}
IPeptideProperties pp = new PeptidePropertiesImpl();
return pp.getAromaticity(pSequence);
}
}