Skip to content

Commit

Permalink
More nullpdbid tests (#1056)
Browse files Browse the repository at this point in the history
* Fix CE-Symm bug

Fix NullPointer if the structure identifier was missing

* Test current handling of identifiers in structures

Currently `Structure` has 4 identifiers which may or may not be set
depending on the source, file format, and parsing method. These should
be revisited and harmonized in the future, but for now I just document
them in this test.

Summary of results:

| Format | With ID | Parse Method    | PdbId | Identifier | Name | StructureIdentifier |
|--------|---------|-----------------|-------|------------|------|---------------------|
| cif    | No      | fromInputStream | null  | ""         | ""   | null                |
| pdb    | No      | fromInputStream | null  | ""         | ""   | null                |
| cif    | No      | fromUrl         | null  | ""         | ""   | null                |
| cif    | No      | StructureIO     | null  | file:      | 5PTI | file:               |
| pdb    | No      | StructureIO     | null  | file:      | ""   | file:               |
| cif    | Yes     | fromInputStream | 5PTI  | ""         | ""   | null                |
| pdb    | Yes     | fromInputStream | 5PTI  | ""         | ""   | null                |
| cif    | Yes     | fromUrl         | 5PTI  | ""         | ""   | null                |
| cif    | Yes     | StructureIO     | 5PTI  | file:      | 5PTI | file:               |
| pdb    | Yes     | StructureIO     | 5PTI  | file:      | 5PTI | file:               |

- `getPdbId` reflects the ID parsed from the file
- Only StructureIO is setting the StructureIdentifier (by design, but
  indicates StructureIO should be used wherever possible)
- StructureIO does not set the name properly for PDB files. This should
  be fixed.

* Test NullPointerException when loading Alphafold models (#1019)

The core URLIdentifier bug was fixed in #1020, but this improves the
test.

The parser changes are mostly for logging, and are there to indicate that
null PdbId is an expected situation rather than an error.

* Truncate file

---------

Co-authored-by: Spencer Bliven <spencer.bliven@gmail.com>
  • Loading branch information
josemduarte and sbliven committed Jan 30, 2023
1 parent 5218e59 commit 043eb40
Show file tree
Hide file tree
Showing 8 changed files with 348 additions and 23 deletions.
@@ -0,0 +1,131 @@
package org.biojava.nbio.structure.test.io;

import static org.junit.Assume.assumeNotNull;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;

import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.align.client.StructureName;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.biojava.nbio.structure.io.cif.CifStructureConverter;
import org.junit.jupiter.api.Test;

/**
* Test that various identifiers in Structure are parsed consistently across
* file types.
*
* These tests are not intended to be prescriptive, but merely to document the
* current state and force changes to be committed to git.
*/
public class TestIdentifiersInStructure {
@Test
public void testCifIdentifiers() throws IOException, StructureException {
String noid = "/5pti_noid.cif";
String yesid = "/5pti.cif";

// No ID, from file
URL url = this.getClass().getResource(noid);
String file = url.getPath();
Structure s = CifStructureConverter.fromURL(url);
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// No ID, from StructureIO
s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
StructureName sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// No id, from stream
InputStream stream = this.getClass().getResourceAsStream(noid);
s = CifStructureConverter.fromInputStream(stream);
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With ID, from file
url = this.getClass().getResource(yesid);
file = url.getPath();
s = CifStructureConverter.fromURL(url);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With ID, from StructureIO
s = StructureIO.getStructure(file);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// With id, from stream
stream = this.getClass().getResourceAsStream(yesid);
s = CifStructureConverter.fromInputStream(stream);
assertEquals("5PTI", s.getPdbId().getId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());
}

@Test
public void testPDBIdentifiers() throws IOException, StructureException {
String yesid = "/5pti.pdb";
String noid = "/noid.pdb";

assumeNotNull(this.getClass().getResource(yesid));
assumeNotNull(this.getClass().getResource(noid));

PDBFileParser parser = new PDBFileParser();

// No id, from StructureIO
String file = this.getClass().getResource(noid).getPath();
// Structure s = parser.parsePDBFile(file);
Structure s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
assertEquals(file, s.getIdentifier());
assertEquals("", s.getName()); // differs from CIF behavior
StructureName sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());

// No id, from stream
InputStream stream = this.getClass().getResourceAsStream(noid);
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream)));
assertNull(s.getPdbId());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());

// With id, from StructureIO
file = this.getClass().getResource(yesid).getPath();
s = StructureIO.getStructure(file);
assertEquals("5PTI", s.getPdbId().toString());
assertEquals(file, s.getIdentifier());
assertEquals("5PTI", s.getName());
sid = (StructureName) s.getStructureIdentifier();
assertEquals(file, sid.getIdentifier());


// With id, from stream
stream = this.getClass().getResourceAsStream(yesid);
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream)));
assertEquals("5PTI", s.getPdbId().toString());
assertEquals("", s.getIdentifier());
assertEquals("", s.getName());
assertNull(s.getStructureIdentifier());
}
}
73 changes: 73 additions & 0 deletions biojava-integrationtest/src/test/resources/5pti_noid.cif
@@ -0,0 +1,73 @@
data_
#
loop_
_entity.id
_entity.type
_entity.src_method
_entity.pdbx_description
_entity.formula_weight
_entity.pdbx_number_of_molecules
_entity.details
1 polymer man 'TRYPSIN INHIBITOR' 6527.598 1 ?
#
loop_
_struct_asym.id
_struct_asym.pdbx_blank_PDB_chainid_flag
_struct_asym.pdbx_modified
_struct_asym.entity_id
_struct_asym.details
A N N 1 ?
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.Cartn_x_esd
_atom_site.Cartn_y_esd
_atom_site.Cartn_z_esd
_atom_site.occupancy_esd
_atom_site.B_iso_or_equiv_esd
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 1 N N . ARG A 1 1 ? 32.231 15.281 -13.143 1.00 28.28 ? ? ? ? ? ? 1 ARG A N 1
ATOM 2 C CA . ARG A 1 1 ? 32.184 14.697 -11.772 1.00 27.90 ? ? ? ? ? ? 1 ARG A CA 1
ATOM 3 C C . ARG A 1 1 ? 33.438 13.890 -11.387 1.00 24.90 ? ? ? ? ? ? 1 ARG A C 1
ATOM 4 O O . ARG A 1 1 ? 34.102 13.070 -12.066 1.00 24.44 ? ? ? ? ? ? 1 ARG A O 1
ATOM 5 C CB . ARG A 1 1 ? 30.797 14.065 -11.625 1.00 27.88 ? ? ? ? ? ? 1 ARG A CB 1
ATOM 6 C CG . ARG A 1 1 ? 30.976 12.589 -11.819 1.00 29.61 ? ? ? ? ? ? 1 ARG A CG 1
ATOM 7 C CD . ARG A 1 1 ? 29.608 12.016 -11.694 1.00 31.91 ? ? ? ? ? ? 1 ARG A CD 1
ATOM 8 N NE . ARG A 1 1 ? 28.942 12.335 -12.945 1.00 33.51 ? ? ? ? ? ? 1 ARG A NE 1
ATOM 9 C CZ . ARG A 1 1 ? 27.670 12.696 -13.050 1.00 34.29 ? ? ? ? ? ? 1 ARG A CZ 1
ATOM 10 N NH1 . ARG A 1 1 ? 26.901 12.777 -11.999 1.00 34.48 ? ? ? ? ? ? 1 ARG A NH1 1
ATOM 11 N NH2 . ARG A 1 1 ? 27.161 12.963 -14.255 1.00 35.44 ? ? ? ? ? ? 1 ARG A NH2 1
ATOM 12 D D1 . ARG A 1 1 ? 32.983 14.824 -13.703 1.00 27.71 ? ? ? ? ? ? 1 ARG A D1 1
ATOM 13 D D2 . ARG A 1 1 ? 31.275 15.112 -13.535 1.00 28.50 ? ? ? ? ? ? 1 ARG A D2 1
ATOM 14 D D3 . ARG A 1 1 ? 32.174 16.346 -13.050 1.00 28.23 ? ? ? ? ? ? 1 ARG A D3 1
ATOM 15 H HA . ARG A 1 1 ? 32.192 15.563 -11.115 1.00 26.97 ? ? ? ? ? ? 1 ARG A HA 1
ATOM 16 H HB2 . ARG A 1 1 ? 30.392 14.428 -10.697 1.00 28.71 ? ? ? ? ? ? 1 ARG A HB2 1
ATOM 17 H HB3 . ARG A 1 1 ? 30.182 14.438 -12.437 1.00 28.97 ? ? ? ? ? ? 1 ARG A HB3 1
ATOM 18 H HG2 . ARG A 1 1 ? 31.369 12.359 -12.800 1.00 29.44 ? ? ? ? ? ? 1 ARG A HG2 1
ATOM 19 H HG3 . ARG A 1 1 ? 31.591 12.143 -11.105 1.00 29.52 ? ? ? ? ? ? 1 ARG A HG3 1
ATOM 20 H HD2 . ARG A 1 1 ? 29.656 10.965 -11.482 1.00 31.41 ? ? ? ? ? ? 1 ARG A HD2 1
ATOM 21 H HD3 . ARG A 1 1 ? 29.218 12.381 -10.794 1.00 31.29 ? ? ? ? ? ? 1 ARG A HD3 1
ATOM 22 D DE . ARG A 1 1 ? 29.457 12.292 -13.843 1.00 34.62 ? ? ? ? ? ? 1 ARG A DE 1
ATOM 23 D DH11 . ARG A 1 1 ? 25.930 13.038 -12.118 1.00 34.80 ? ? ? ? ? ? 1 ARG A DH11 1
ATOM 24 D DH12 . ARG A 1 1 ? 27.324 12.549 -11.100 1.00 34.71 ? ? ? ? ? ? 1 ARG A DH12 1
ATOM 25 D DH21 . ARG A 1 1 ? 27.786 12.886 -15.076 1.00 33.93 ? ? ? ? ? ? 1 ARG A DH21 1
ATOM 26 D DH22 . ARG A 1 1 ? 26.154 13.214 -14.269 1.00 34.53 ? ? ? ? ? ? 1 ARG A DH22 1
9 changes: 9 additions & 0 deletions biojava-integrationtest/src/test/resources/5pti_noid.pdb
@@ -0,0 +1,9 @@
HEADER 01-JUL-21
ATOM 1 N MET A 1 19.682 12.062 34.184 1.00 39.14 N
ATOM 2 CA MET A 1 20.443 10.838 34.522 1.00 39.14 C
ATOM 3 C MET A 1 20.073 9.731 33.538 1.00 39.14 C
ATOM 4 CB MET A 1 20.138 10.405 35.966 1.00 39.14 C
ATOM 5 O MET A 1 19.030 9.110 33.696 1.00 39.14 O
ATOM 6 CG MET A 1 20.829 11.294 37.004 1.00 39.14 C
ATOM 7 SD MET A 1 20.292 10.920 38.687 1.00 39.14 S
ATOM 8 CE MET A 1 21.522 11.848 39.645 1.00 39.14 C
Expand Up @@ -371,13 +371,16 @@ private void pdb_HEADER_Handler(String line) {

logger.debug("Parsing entry " + pdbId);


PdbId pdbIdToSet;
try {
pdbIdToSet = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
if(pdbCode.isBlank()) {
pdbIdToSet = null;
} else {
try {
pdbIdToSet = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.warn("Malformed PDB ID {}. setting PdbId to null", pdbCode);
pdbIdToSet = null;
}
}
structure.setPdbId(pdbIdToSet);
pdbHeader.setPdbId(pdbIdToSet);
Expand Down Expand Up @@ -2719,7 +2722,7 @@ else if ( params.isParseSecStruc()) {
}

makeCompounds(compndLines, sourceLines);

handlePDBKeywords(keywordsLines);

triggerEndFileChecks();
Expand Down Expand Up @@ -2786,18 +2789,18 @@ private void makeCompounds(List<String> compoundList,
}

/**Parse KEYWODS record of the PDB file.<br>
* A keyword may be split over two lines. whether a keyword ends by the end
* of a line or it is aplit over two lines, a <code>space</code> is added
* between the 2 lines's contents, unless the first line ends in
* A keyword may be split over two lines. whether a keyword ends by the end
* of a line or it is aplit over two lines, a <code>space</code> is added
* between the 2 lines's contents, unless the first line ends in
* a '-' character.
* <pre>
* Record Format
* COLUMNS DATA TYPE FIELD DEFINITION
* COLUMNS DATA TYPE FIELD DEFINITION
* ---------------------------------------------------------------------------------
* 1 - 6 Record name "KEYWDS"
* 1 - 6 Record name "KEYWDS"
* 9 - 10 Continuation continuation Allows concatenation of records if necessary.
* 11 - 79 List keywds Comma-separated list of keywords relevant
* to the entry.
* to the entry.
* Example
* 1 2 3 4 5 6 7 8
* 12345678901234567890123456789012345678901234567890123456789012345678901234567890
Expand Down Expand Up @@ -2830,7 +2833,7 @@ private void handlePDBKeywords(List<String> lines) {
}
pdbHeader.setKeywords(lst);
}

/**
* Handles creation of all bonds. Looks at LINK records, SSBOND (Disulfide
* bonds), peptide bonds, and intra-residue bonds.
Expand Down
Expand Up @@ -645,11 +645,11 @@ public void consumeDatabasePDBRevRecord(DatabasePDBRevRecord databasePDBrevRecor
revRecords.add(new org.biojava.nbio.structure.DatabasePDBRevRecord(databasePDBrevRecord, i));
}
}

@Override
public void consumeEm3dReconstruction(Em3dReconstruction em3dReconstruction) {
this.em3dReconstruction = em3dReconstruction;

for (int rowIndex = 0; rowIndex < em3dReconstruction.getRowCount(); rowIndex++) { //can it have more than 1 value?
final FloatColumn resolution = em3dReconstruction.getResolution();
if (ValueKind.PRESENT.equals(resolution.getValueKind(rowIndex)))
Expand Down Expand Up @@ -895,12 +895,16 @@ public void consumeStruct(Struct struct) {
if (struct.isDefined() && struct.getEntryId().isDefined()) {
PdbId pdbId;
String pdbCode = struct.getEntryId().get(0);
try {
pdbId = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
pdbId = null;
}
if(pdbCode.isBlank()){
pdbId = null;
} else {
try {
pdbId = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.warn("Malformed PDB ID {}. setting PdbId to null", pdbCode);
pdbId = null;
}
}
pdbHeader.setPdbId(pdbId);
structure.setPdbId(pdbId);
}
Expand Down
Expand Up @@ -242,8 +242,10 @@ protected static CeSymmResult align(Atom[] atoms, CESymmParameters params)
optimalAFP = selfAlignments.get(0);
StructureIdentifier id = atoms[0].getGroup().getChain().getStructure()
.getStructureIdentifier();
optimalAFP.setName1(id.getIdentifier());
optimalAFP.setName2(id.getIdentifier());
if(id != null) {
optimalAFP.setName1(id.getIdentifier());
optimalAFP.setName2(id.getIdentifier());
}

// Store the optimal self-alignment
result.setSelfAlignment(optimalAFP);
Expand Down
Expand Up @@ -21,13 +21,18 @@
package org.biojava.nbio.structure.symmetry.internal;

import static org.junit.Assert.*;
import static org.junit.Assume.assumeNotNull;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureIO;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.biojava.nbio.structure.symmetry.internal.CeSymm;
import org.junit.Test;

Expand Down Expand Up @@ -58,4 +63,16 @@ public void testEasyCases() throws IOException, StructureException {
assertEquals(result.getNumRepeats(), orders[i]);
}
}

@Test
public void testAlphafold() throws IOException, StructureException {
URL url = this.getClass().getResource("/AF-A0A0R4IYF1-F1-model_v2.pdb");
assumeNotNull(url);
String file = url.getPath();
Structure s = StructureIO.getStructure(file);
assertNull(s.getPdbId());
Atom[] atoms = StructureTools.getRepresentativeAtomArray(s);
CeSymmResult result = CeSymm.analyze(atoms);
assertNotNull(result);
}
}

0 comments on commit 043eb40

Please sign in to comment.