Skip to content

Commit

Permalink
Fix IteratingSdfReader - Ensure extra empty lines don't affect the pr…
Browse files Browse the repository at this point in the history
…operty reading of the SDF property fields.
  • Loading branch information
johnmay authored and egonw committed Mar 27, 2023
1 parent c8849c9 commit 0f581a8
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,13 @@ private void readDataBlockInto(IAtomContainer m) throws IOException {
if (str.startsWith(SDF_DATA_HEADER)) {
dataHeader = extractFieldName(str);
skipOtherFieldHeaderLines(str);
String data = extractFieldData(sb);
String data = extractFieldData(sb).trim();
if (dataHeader != null) {
logger.info("fieldName, data: ", dataHeader, ", ", data);
m.setProperty(dataHeader, data);
}
} else if (currentLine.isEmpty()) {
currentLine = input.readLine();
} else {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import org.hamcrest.CoreMatchers;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.openscience.cdk.test.CDKTestCase;
Expand All @@ -42,6 +45,8 @@
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* TestCase for the reading MDL mol files using one test file.
*
Expand All @@ -63,7 +68,7 @@ void testSDF() throws Exception {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
Assertions.assertEquals(MDLV2000Format.getInstance(), reader.getFormat(), "Molecule # was not in MDL V2000 format: " + molCount);
}
Expand Down Expand Up @@ -91,7 +96,7 @@ public boolean ready() throws IOException {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
Assertions.assertEquals(MDLV2000Format.getInstance(), reader.getFormat(), "Molecule # was not in MDL V2000 format: " + molCount);
}
Expand All @@ -108,10 +113,10 @@ void testReadTitle() throws Exception {
IteratingSDFReader reader = new IteratingSDFReader(ins, DefaultChemObjectBuilder.getInstance());

//int molCount = 0;
Assertions.assertTrue(reader.hasNext());
assertTrue(reader.hasNext());
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
Assertions.assertEquals("2-methylbenzo-1,4-quinone", ((IAtomContainer) object).getTitle());
Assertions.assertEquals(MDLV2000Format.getInstance(), reader.getFormat());
reader.close();
Expand All @@ -125,10 +130,10 @@ void testReadDataItems() throws Exception {
IteratingSDFReader reader = new IteratingSDFReader(ins, DefaultChemObjectBuilder.getInstance());

//int molCount = 0;
Assertions.assertTrue(reader.hasNext());
assertTrue(reader.hasNext());
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
IAtomContainer m = (IAtomContainer) object;
Assertions.assertEquals("1", m.getProperty("E_NSC"));
Assertions.assertEquals("553-97-9", m.getProperty("E_CAS"));
Expand Down Expand Up @@ -159,7 +164,7 @@ void testOnMDLMolfile() throws Exception {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
}

Expand All @@ -178,7 +183,7 @@ void testOnSingleEntrySDFile() throws Exception {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
}

Expand All @@ -196,7 +201,7 @@ void testEmptyEntryIteratingReader() throws IOException {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;

if (molCount == 2) {
Expand Down Expand Up @@ -228,10 +233,10 @@ void testZeroZCoordinates() throws Exception {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
boolean has3d = GeometryUtil.has3DCoordinates((IAtomContainer) object);
Assertions.assertTrue(has3d);
assertTrue(has3d);
}
Assertions.assertNotSame(0, molCount);
reader.close();
Expand All @@ -249,7 +254,7 @@ void testNo3DCoordsButForcedAs() throws IOException {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
mol = (IAtomContainer) object;
}
Expand All @@ -270,7 +275,7 @@ void testNo3DCoordsButForcedAs() throws IOException {
while (reader.hasNext()) {
Object object = reader.next();
Assertions.assertNotNull(object);
Assertions.assertTrue(object instanceof IAtomContainer);
assertTrue(object instanceof IAtomContainer);
molCount++;
mol = (IAtomContainer) object;
}
Expand Down Expand Up @@ -346,4 +351,26 @@ void testV3000MolfileFormat() throws IOException, CDKException {

}

// extra spaces from the ChEMBL API
@Test
void testExtraSpaces() throws IOException {
try (InputStream in = getClass().getResourceAsStream("chemblApiExamples.sdf")) {
IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
IteratingSDFReader reader = new IteratingSDFReader(in, builder);
reader.setSkip(true); // skip over null entries and keep reading until EOF
assertTrue(reader.hasNext());
IAtomContainer mol = reader.next();
Assertions.assertEquals(mol.<String>getProperty("chembl_id"), "CHEMBL564829");
Assertions.assertEquals(mol.<String>getProperty("chembl_pref_name"), "MILCICLIB");
assertTrue(reader.hasNext());
mol = reader.next();
Assertions.assertEquals(mol.<String>getProperty("chembl_id"), "CHEMBL603469");
Assertions.assertEquals(mol.<String>getProperty("chembl_pref_name"), "LESTAURTINIB");
assertTrue(reader.hasNext());
mol = reader.next();
Assertions.assertEquals(mol.<String>getProperty("chembl_id"), "CHEMBL1946170");
Assertions.assertEquals(mol.<String>getProperty("chembl_pref_name"), "REGORAFENIB");
}
}

}

0 comments on commit 0f581a8

Please sign in to comment.