Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Fix CE-Symm bug Fix NullPointer if the structure identifier was missing * Test current handling of identifiers in structures Currently `Structure` has 4 identifiers which may or may not be set depending on the source, file format, and parsing method. These should be revisited and harmonized in the future, but for now I just document them in this test. Summary of results: | Format | With ID | Parse Method | PdbId | Identifier | Name | StructureIdentifier | |--------|---------|-----------------|-------|------------|------|---------------------| | cif | No | fromInputStream | null | "" | "" | null | | pdb | No | fromInputStream | null | "" | "" | null | | cif | No | fromUrl | null | "" | "" | null | | cif | No | StructureIO | null | file: | 5PTI | file: | | pdb | No | StructureIO | null | file: | "" | file: | | cif | Yes | fromInputStream | 5PTI | "" | "" | null | | pdb | Yes | fromInputStream | 5PTI | "" | "" | null | | cif | Yes | fromUrl | 5PTI | "" | "" | null | | cif | Yes | StructureIO | 5PTI | file: | 5PTI | file: | | pdb | Yes | StructureIO | 5PTI | file: | 5PTI | file: | - `getPdbId` reflects the ID parsed from the file - Only StructureIO is setting the StructureIdentifier (by design, but indicates StructureIO should be used wherever possible) - StructureIO does not set the name properly for PDB files. This should be fixed. * Test NullPointerException when loading Alphafold models (#1019) The core URLIdentifier bug was fixed in #1020, but this improves the test. The parser changes are mostly for logging, and are there to indicate that null PdbId is an expected situation rather than an error. * Truncate file --------- Co-authored-by: Spencer Bliven <spencer.bliven@gmail.com>
- Loading branch information
1 parent
5218e59
commit 043eb40
Showing
8 changed files
with
348 additions
and
23 deletions.
There are no files selected for viewing
131 changes: 131 additions & 0 deletions
131
...tiontest/src/test/java/org/biojava/nbio/structure/test/io/TestIdentifiersInStructure.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
package org.biojava.nbio.structure.test.io; | ||
|
||
import static org.junit.Assume.assumeNotNull; | ||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertNull; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.net.URL; | ||
|
||
import org.biojava.nbio.structure.Structure; | ||
import org.biojava.nbio.structure.StructureException; | ||
import org.biojava.nbio.structure.StructureIO; | ||
import org.biojava.nbio.structure.align.client.StructureName; | ||
import org.biojava.nbio.structure.io.PDBFileParser; | ||
import org.biojava.nbio.structure.io.cif.CifStructureConverter; | ||
import org.junit.jupiter.api.Test; | ||
|
||
/** | ||
* Test that various identifiers in Structure are parsed consistently across | ||
* file types. | ||
* | ||
* These tests are not intended to be prescriptive, but merely to document the | ||
* current state and force changes to be committed to git. | ||
*/ | ||
public class TestIdentifiersInStructure { | ||
@Test | ||
public void testCifIdentifiers() throws IOException, StructureException { | ||
String noid = "/5pti_noid.cif"; | ||
String yesid = "/5pti.cif"; | ||
|
||
// No ID, from file | ||
URL url = this.getClass().getResource(noid); | ||
String file = url.getPath(); | ||
Structure s = CifStructureConverter.fromURL(url); | ||
assertNull(s.getPdbId()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
|
||
// No ID, from StructureIO | ||
s = StructureIO.getStructure(file); | ||
assertNull(s.getPdbId()); | ||
assertEquals(file, s.getIdentifier()); | ||
assertEquals("5PTI", s.getName()); | ||
StructureName sid = (StructureName) s.getStructureIdentifier(); | ||
assertEquals(file, sid.getIdentifier()); | ||
|
||
// No id, from stream | ||
InputStream stream = this.getClass().getResourceAsStream(noid); | ||
s = CifStructureConverter.fromInputStream(stream); | ||
assertNull(s.getPdbId()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
|
||
// With ID, from file | ||
url = this.getClass().getResource(yesid); | ||
file = url.getPath(); | ||
s = CifStructureConverter.fromURL(url); | ||
assertEquals("5PTI", s.getPdbId().getId()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
|
||
// With ID, from StructureIO | ||
s = StructureIO.getStructure(file); | ||
assertEquals("5PTI", s.getPdbId().getId()); | ||
assertEquals(file, s.getIdentifier()); | ||
assertEquals("5PTI", s.getName()); | ||
sid = (StructureName) s.getStructureIdentifier(); | ||
assertEquals(file, sid.getIdentifier()); | ||
|
||
// With id, from stream | ||
stream = this.getClass().getResourceAsStream(yesid); | ||
s = CifStructureConverter.fromInputStream(stream); | ||
assertEquals("5PTI", s.getPdbId().getId()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
} | ||
|
||
@Test | ||
public void testPDBIdentifiers() throws IOException, StructureException { | ||
String yesid = "/5pti.pdb"; | ||
String noid = "/noid.pdb"; | ||
|
||
assumeNotNull(this.getClass().getResource(yesid)); | ||
assumeNotNull(this.getClass().getResource(noid)); | ||
|
||
PDBFileParser parser = new PDBFileParser(); | ||
|
||
// No id, from StructureIO | ||
String file = this.getClass().getResource(noid).getPath(); | ||
// Structure s = parser.parsePDBFile(file); | ||
Structure s = StructureIO.getStructure(file); | ||
assertNull(s.getPdbId()); | ||
assertEquals(file, s.getIdentifier()); | ||
assertEquals("", s.getName()); // differs from CIF behavior | ||
StructureName sid = (StructureName) s.getStructureIdentifier(); | ||
assertEquals(file, sid.getIdentifier()); | ||
|
||
// No id, from stream | ||
InputStream stream = this.getClass().getResourceAsStream(noid); | ||
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream))); | ||
assertNull(s.getPdbId()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
|
||
// With id, from StructureIO | ||
file = this.getClass().getResource(yesid).getPath(); | ||
s = StructureIO.getStructure(file); | ||
assertEquals("5PTI", s.getPdbId().toString()); | ||
assertEquals(file, s.getIdentifier()); | ||
assertEquals("5PTI", s.getName()); | ||
sid = (StructureName) s.getStructureIdentifier(); | ||
assertEquals(file, sid.getIdentifier()); | ||
|
||
|
||
// With id, from stream | ||
stream = this.getClass().getResourceAsStream(yesid); | ||
s = parser.parsePDBFile(new BufferedReader(new InputStreamReader(stream))); | ||
assertEquals("5PTI", s.getPdbId().toString()); | ||
assertEquals("", s.getIdentifier()); | ||
assertEquals("", s.getName()); | ||
assertNull(s.getStructureIdentifier()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
data_ | ||
# | ||
loop_ | ||
_entity.id | ||
_entity.type | ||
_entity.src_method | ||
_entity.pdbx_description | ||
_entity.formula_weight | ||
_entity.pdbx_number_of_molecules | ||
_entity.details | ||
1 polymer man 'TRYPSIN INHIBITOR' 6527.598 1 ? | ||
# | ||
loop_ | ||
_struct_asym.id | ||
_struct_asym.pdbx_blank_PDB_chainid_flag | ||
_struct_asym.pdbx_modified | ||
_struct_asym.entity_id | ||
_struct_asym.details | ||
A N N 1 ? | ||
# | ||
loop_ | ||
_atom_site.group_PDB | ||
_atom_site.id | ||
_atom_site.type_symbol | ||
_atom_site.label_atom_id | ||
_atom_site.label_alt_id | ||
_atom_site.label_comp_id | ||
_atom_site.label_asym_id | ||
_atom_site.label_entity_id | ||
_atom_site.label_seq_id | ||
_atom_site.pdbx_PDB_ins_code | ||
_atom_site.Cartn_x | ||
_atom_site.Cartn_y | ||
_atom_site.Cartn_z | ||
_atom_site.occupancy | ||
_atom_site.B_iso_or_equiv | ||
_atom_site.Cartn_x_esd | ||
_atom_site.Cartn_y_esd | ||
_atom_site.Cartn_z_esd | ||
_atom_site.occupancy_esd | ||
_atom_site.B_iso_or_equiv_esd | ||
_atom_site.pdbx_formal_charge | ||
_atom_site.auth_seq_id | ||
_atom_site.auth_comp_id | ||
_atom_site.auth_asym_id | ||
_atom_site.auth_atom_id | ||
_atom_site.pdbx_PDB_model_num | ||
ATOM 1 N N . ARG A 1 1 ? 32.231 15.281 -13.143 1.00 28.28 ? ? ? ? ? ? 1 ARG A N 1 | ||
ATOM 2 C CA . ARG A 1 1 ? 32.184 14.697 -11.772 1.00 27.90 ? ? ? ? ? ? 1 ARG A CA 1 | ||
ATOM 3 C C . ARG A 1 1 ? 33.438 13.890 -11.387 1.00 24.90 ? ? ? ? ? ? 1 ARG A C 1 | ||
ATOM 4 O O . ARG A 1 1 ? 34.102 13.070 -12.066 1.00 24.44 ? ? ? ? ? ? 1 ARG A O 1 | ||
ATOM 5 C CB . ARG A 1 1 ? 30.797 14.065 -11.625 1.00 27.88 ? ? ? ? ? ? 1 ARG A CB 1 | ||
ATOM 6 C CG . ARG A 1 1 ? 30.976 12.589 -11.819 1.00 29.61 ? ? ? ? ? ? 1 ARG A CG 1 | ||
ATOM 7 C CD . ARG A 1 1 ? 29.608 12.016 -11.694 1.00 31.91 ? ? ? ? ? ? 1 ARG A CD 1 | ||
ATOM 8 N NE . ARG A 1 1 ? 28.942 12.335 -12.945 1.00 33.51 ? ? ? ? ? ? 1 ARG A NE 1 | ||
ATOM 9 C CZ . ARG A 1 1 ? 27.670 12.696 -13.050 1.00 34.29 ? ? ? ? ? ? 1 ARG A CZ 1 | ||
ATOM 10 N NH1 . ARG A 1 1 ? 26.901 12.777 -11.999 1.00 34.48 ? ? ? ? ? ? 1 ARG A NH1 1 | ||
ATOM 11 N NH2 . ARG A 1 1 ? 27.161 12.963 -14.255 1.00 35.44 ? ? ? ? ? ? 1 ARG A NH2 1 | ||
ATOM 12 D D1 . ARG A 1 1 ? 32.983 14.824 -13.703 1.00 27.71 ? ? ? ? ? ? 1 ARG A D1 1 | ||
ATOM 13 D D2 . ARG A 1 1 ? 31.275 15.112 -13.535 1.00 28.50 ? ? ? ? ? ? 1 ARG A D2 1 | ||
ATOM 14 D D3 . ARG A 1 1 ? 32.174 16.346 -13.050 1.00 28.23 ? ? ? ? ? ? 1 ARG A D3 1 | ||
ATOM 15 H HA . ARG A 1 1 ? 32.192 15.563 -11.115 1.00 26.97 ? ? ? ? ? ? 1 ARG A HA 1 | ||
ATOM 16 H HB2 . ARG A 1 1 ? 30.392 14.428 -10.697 1.00 28.71 ? ? ? ? ? ? 1 ARG A HB2 1 | ||
ATOM 17 H HB3 . ARG A 1 1 ? 30.182 14.438 -12.437 1.00 28.97 ? ? ? ? ? ? 1 ARG A HB3 1 | ||
ATOM 18 H HG2 . ARG A 1 1 ? 31.369 12.359 -12.800 1.00 29.44 ? ? ? ? ? ? 1 ARG A HG2 1 | ||
ATOM 19 H HG3 . ARG A 1 1 ? 31.591 12.143 -11.105 1.00 29.52 ? ? ? ? ? ? 1 ARG A HG3 1 | ||
ATOM 20 H HD2 . ARG A 1 1 ? 29.656 10.965 -11.482 1.00 31.41 ? ? ? ? ? ? 1 ARG A HD2 1 | ||
ATOM 21 H HD3 . ARG A 1 1 ? 29.218 12.381 -10.794 1.00 31.29 ? ? ? ? ? ? 1 ARG A HD3 1 | ||
ATOM 22 D DE . ARG A 1 1 ? 29.457 12.292 -13.843 1.00 34.62 ? ? ? ? ? ? 1 ARG A DE 1 | ||
ATOM 23 D DH11 . ARG A 1 1 ? 25.930 13.038 -12.118 1.00 34.80 ? ? ? ? ? ? 1 ARG A DH11 1 | ||
ATOM 24 D DH12 . ARG A 1 1 ? 27.324 12.549 -11.100 1.00 34.71 ? ? ? ? ? ? 1 ARG A DH12 1 | ||
ATOM 25 D DH21 . ARG A 1 1 ? 27.786 12.886 -15.076 1.00 33.93 ? ? ? ? ? ? 1 ARG A DH21 1 | ||
ATOM 26 D DH22 . ARG A 1 1 ? 26.154 13.214 -14.269 1.00 34.53 ? ? ? ? ? ? 1 ARG A DH22 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
HEADER 01-JUL-21 | ||
ATOM 1 N MET A 1 19.682 12.062 34.184 1.00 39.14 N | ||
ATOM 2 CA MET A 1 20.443 10.838 34.522 1.00 39.14 C | ||
ATOM 3 C MET A 1 20.073 9.731 33.538 1.00 39.14 C | ||
ATOM 4 CB MET A 1 20.138 10.405 35.966 1.00 39.14 C | ||
ATOM 5 O MET A 1 19.030 9.110 33.696 1.00 39.14 O | ||
ATOM 6 CG MET A 1 20.829 11.294 37.004 1.00 39.14 C | ||
ATOM 7 SD MET A 1 20.292 10.920 38.687 1.00 39.14 S | ||
ATOM 8 CE MET A 1 21.522 11.848 39.645 1.00 39.14 C |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.