Skip to content

Commit

Permalink
Merge pull request #1063 from josemduarte/more-cif-output
Browse files Browse the repository at this point in the history
Some more categories written out to CIF
  • Loading branch information
josemduarte committed Jun 1, 2023
2 parents 88b51de + 4d146e1 commit d09477e
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 21 deletions.
Expand Up @@ -59,6 +59,12 @@ public void test6ELW() throws IOException {
// a structure with insertion codes
testRoundTrip("6ELW");
}

@Test
public void test4HHB() throws IOException {
// a structure with multiple poly entities
testRoundTrip("4HHB");
}

private static void testRoundTrip(String pdbId) throws IOException {
URL url = new URL("https://files.rcsb.org/download/" + pdbId + ".cif");
Expand Down Expand Up @@ -121,6 +127,14 @@ private static void testRoundTrip(String pdbId) throws IOException {
// Test cell and symmetry
assertEquals(originalStruct.getCrystallographicInfo().getSpaceGroup(),
readStruct.getCrystallographicInfo().getSpaceGroup());

// entity
assertEquals(originalStruct.getEntityInfos().size(), readStruct.getEntityInfos().size());
for (int i=0; i<originalStruct.getEntityInfos().size(); i++) {
assertEquals(originalStruct.getEntityInfos().get(i).getMolId(), readStruct.getEntityInfos().get(i).getMolId());
assertEquals(originalStruct.getEntityInfos().get(i).getType(), readStruct.getEntityInfos().get(i).getType());
}

}

/**
Expand Down
Expand Up @@ -819,40 +819,35 @@ private boolean mapDNAChains(List<Group> seqResGroups, List<Group> atomRes,
*/
public static void storeUnAlignedSeqRes(Structure structure, List<Chain> seqResChains, boolean headerOnly) {


if (headerOnly) {

List<Chain> atomChains = new ArrayList<>();
for (Chain seqRes: seqResChains) {
// In header-only mode skip ATOM records.
// Here we store chains with SEQRES instead of AtomGroups.
seqRes.setSeqResGroups(seqRes.getAtomGroups());
seqRes.setAtomGroups(new ArrayList<>()); // clear out the atom groups.

atomChains.add(seqRes);

}
structure.setChains(0, atomChains);

} else {

for (int i = 0; i < structure.nrModels(); i++) {
List<Chain> atomChains = structure.getModel(i);

if (seqResChains.isEmpty()) {
// in files without SEQRES, seqResChains object is empty: we replace by atomChains resulting below in a trivial alignment and a copy of atom groups to seqres groups
seqResChains = atomChains;
}
for (Chain seqRes: seqResChains){
Chain atomRes;

// Otherwise, we find a chain with AtomGroups
// and set this as SEQRES groups.
// TODO no idea if new parameter useChainId should be false or true here, used true as a guess - JD 2016-05-09
atomRes = SeqRes2AtomAligner.getMatchingAtomRes(seqRes,atomChains,true);
Chain atomRes = SeqRes2AtomAligner.getMatchingAtomRes(seqRes,atomChains,true);
if ( atomRes != null)
atomRes.setSeqResGroups(seqRes.getAtomGroups());
else
logger.warn("Could not find atom records for chain " + seqRes.getId());
}


}
}
}
Expand Down
@@ -1,12 +1,6 @@
package org.biojava.nbio.structure.io.cif;

import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.Element;
import org.biojava.nbio.structure.EntityType;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.rcsb.cif.CifBuilder;
Expand All @@ -23,8 +17,10 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collector;
import java.util.stream.Collectors;

/**
* Convert a BioJava object to a CifFile.
Expand All @@ -41,9 +37,11 @@ protected CifFile getInternal(Structure structure, List<WrappedAtom> wrappedAtom
SpaceGroup spaceGroup = structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup();
// atom_site
Category atomSite = wrappedAtoms.stream().collect(toAtomSite());
// entity information
List<EntityInfo> entityInfos = structure.getEntityInfos();

MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
.enterBlock(structure.getPDBCode());
.enterBlock(structure.getPdbId() == null? "" : structure.getPdbId().getId());

blockBuilder.enterStructKeywords().enterText()
.add(String.join(", ", structure.getPDBHeader().getKeywords()))
Expand Down Expand Up @@ -92,6 +90,49 @@ protected CifFile getInternal(Structure structure, List<WrappedAtom> wrappedAtom
.leaveCategory();
}

if (entityInfos != null) {

String[] entityIds = new String[entityInfos.size()];
String[] entityTypes = new String[entityInfos.size()];
String[] entityDescriptions = new String[entityInfos.size()];

for (int i=0; i<entityInfos.size(); i++) {
EntityInfo e = entityInfos.get(i);
entityIds[i] = Integer.toString(e.getMolId());
entityTypes[i] = e.getType().getEntityType();
entityDescriptions[i] = e.getDescription() == null? "?" : e.getDescription();
}

String[] polyEntityIds = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> Integer.toString(e.getMolId())).toArray(String[]::new);
String[] polyEntitySeqs = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> e.getChains().get(0).getSeqResSequence()).toArray(String[]::new);

blockBuilder.enterEntity()
.enterId()
.add(entityIds)
.leaveColumn()

.enterType()
.add(entityTypes)
.leaveColumn()

.enterPdbxDescription()
.add(entityDescriptions)
.leaveColumn()

.leaveCategory();

blockBuilder.enterEntityPoly()
.enterEntityId()
.add(polyEntityIds)
.leaveColumn()

.enterPdbxSeqOneLetterCodeCan()
.add(polyEntitySeqs)
.leaveColumn()

.leaveCategory();
}

return blockBuilder.leaveBlock().leaveFile();
}

Expand Down
Expand Up @@ -1612,7 +1612,12 @@ private void alignSeqRes() {
// fix SEQRES residue numbering for all models

for (int model = 0; model < structure.nrModels(); model++) {
List<Chain> atomList = structure.getModel(model);
List<Chain> atomList = structure.getPolyChains(model);

if (seqResChains.isEmpty()) {
// in files without _entity, seqResChains object is empty: we replace by atomChains resulting below in a trivial alignment and a copy of atom groups to seqres groups
seqResChains = atomList;
}

for (Chain seqResChain : seqResChains){

Expand Down
Expand Up @@ -99,7 +99,8 @@ public void test1B8GnoSeqresPdb() throws IOException, StructureException {
//System.out.println("Chains from incomplete header file: ");
//checkChains(s);


// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());

// trying without seqAlignSeqRes
params.setAlignSeqRes(false);
Expand Down Expand Up @@ -145,6 +146,9 @@ public void test3C5F() throws IOException, StructureException {
assertTrue(s.nrModels()>1);
assertNull(s.getPDBHeader().getExperimentalTechniques());

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());

}

@Test
Expand All @@ -165,6 +169,8 @@ public void test4B19() throws IOException, StructureException {
assertTrue(s.nrModels()>1);
assertNull(s.getPDBHeader().getExperimentalTechniques());

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());
}

@Test
Expand All @@ -187,6 +193,9 @@ public void test2M7Y() throws IOException {

// testing that on single chain pdb files we assign an entity type, issue #767
assertEquals(EntityType.POLYMER, s.getEntityById(1).getType());

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());
}

private void checkChains(Structure s) {
Expand Down Expand Up @@ -227,6 +236,8 @@ public void testPhenixCifFile() throws IOException {
assertEquals(1, counts[1]);
assertEquals(1, counts[2]);

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());

}

Expand Down Expand Up @@ -255,6 +266,8 @@ public void testPhenixPdbFile() throws IOException {
assertEquals(1, counts[1]);
assertEquals(1, counts[2]);

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());
}

@Test
Expand All @@ -275,6 +288,9 @@ public void testPhaserPdbFile() throws IOException {
assertEquals(2, s.getChains().size());

assertEquals(1, s.getEntityInfos().size());

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());
}


Expand Down Expand Up @@ -303,6 +319,9 @@ public void testRefmacPdbFile() throws IOException {
assertEquals(1, counts[1]);
assertEquals(1, counts[2]);

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());

}

/**
Expand All @@ -329,6 +348,9 @@ public void testIssue931() throws IOException {
assertSame(s.getEntityById(2), s.getPolyChains().get(4).getEntityInfo());
assertSame(s.getEntityById(2), s.getPolyChains().get(5).getEntityInfo());
assertSame(s.getEntityById(2), s.getPolyChains().get(6).getEntityInfo());

// we should have seqres groups (testing getSeqResSequence() is equivalent)
assertFalse(s.getPolyChains().get(0).getSeqResSequence().isEmpty());
}

/**
Expand Down
@@ -0,0 +1,44 @@
package org.biojava.nbio.structure.io.cif;

import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

import static org.junit.Assert.*;

public class CifFileSupplierImplTest {

@Test
public void shouldReadRawPdbOutputtingCifWithEntity() throws IOException {
InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4lup_phaser_output.pdb.gz"));

PDBFileParser pdbpars = new PDBFileParser();
FileParsingParameters params = new FileParsingParameters();
params.setAlignSeqRes(true);
pdbpars.setFileParsingParameters(params);

Structure s = pdbpars.parsePDBFile(inStream);

String cifText = CifStructureConverter.toText(s);
assertTrue(cifText.contains("_entity.type"));
assertTrue(cifText.contains("_entity_poly.pdbx_seq_one_letter_code_can"));
assertFalse(cifText.contains("null"));
assertTrue(cifText.contains("MSEQLTDQVLVERVQKGDQKAFNLLVVRYQHKVASLVSRYVPSGDVPDVVQEAFIKA"));

InputStream inputStream = new ByteArrayInputStream(cifText.getBytes());
Structure readStruct = CifStructureConverter.fromInputStream(inputStream);

assertEquals(s.getEntityInfos().size(), readStruct.getEntityInfos().size());
for (int i=0; i<s.getEntityInfos().size(); i++) {
assertEquals(s.getEntityInfos().get(i).getMolId(), readStruct.getEntityInfos().get(i).getMolId());
assertEquals(s.getEntityInfos().get(i).getType(), readStruct.getEntityInfos().get(i).getType());
}

}
}
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -44,7 +44,7 @@
<log4j.version>2.17.2</log4j.version>
<junit-jupiter.version>5.7.2</junit-jupiter.version>
<ciftools.artifact>ciftools-java</ciftools.artifact>
<ciftools.version>5.0.0</ciftools.version>
<ciftools.version>5.0.1</ciftools.version>
</properties>
<scm>
<connection>scm:git:git://github.com/biojava/biojava.git</connection>
Expand Down

0 comments on commit d09477e

Please sign in to comment.