-
Notifications
You must be signed in to change notification settings - Fork 154
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #501 from cdk/patch/dfpattern
Patch/dfpattern
- Loading branch information
Showing
6 changed files
with
710 additions
and
16 deletions.
There are no files selected for viewing
185 changes: 185 additions & 0 deletions
185
base/isomorphism/src/main/java/org/openscience/cdk/isomorphism/DfPattern.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
/* | ||
* Copyright (C) 2018 NextMove Software | ||
* | ||
* Contact: cdk-devel@lists.sourceforge.net | ||
* | ||
* This program is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU Lesser General Public License as published by | ||
* the Free Software Foundation; either version 2.1 of the License, or (at | ||
* your option) any later version. All we ask is that proper credit is given | ||
* for our work, which includes - but is not limited to - adding the above | ||
* copyright notice to the beginning of your source code files, and to any | ||
* copyright notice that you may distribute with programs based on this work. | ||
* | ||
* This program is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | ||
* License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this program; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
*/ | ||
|
||
package org.openscience.cdk.isomorphism; | ||
|
||
import org.openscience.cdk.interfaces.IAtom; | ||
import org.openscience.cdk.interfaces.IAtomContainer; | ||
import org.openscience.cdk.isomorphism.matchers.Expr; | ||
import org.openscience.cdk.isomorphism.matchers.IQueryAtom; | ||
import org.openscience.cdk.isomorphism.matchers.IQueryAtomContainer; | ||
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer; | ||
|
||
import java.util.Collections; | ||
|
||
import static org.openscience.cdk.isomorphism.matchers.Expr.Type.*; | ||
|
||
/** | ||
* The depth-first (DF) backtracking sub-structure matching algorithm so named | ||
* because it matches the molecule in a depth-first manner (bond by bond). The | ||
* algorithm is a simple but elegant backtracking search iterating over the | ||
* bonds of a query. Like the popular VF2 the algorithm, it uses linear memory | ||
* but unlike VF2 bonded atoms are selected from the neighbor lists of already | ||
* mapped atoms. | ||
* <br><br> | ||
* In practice VF2 take O(N<sup>2</sup>) to match a linear chain against it's | ||
* self whilst this algorithm is O(N). | ||
* <br><br> | ||
* Usage: | ||
* <pre>{@code | ||
* DfPattern ptrn = DfPattern.findSubstructure(query); | ||
* | ||
* // has match? | ||
* if (ptrn.matches(mol)) { | ||
* | ||
* } | ||
* | ||
* // get lazy mapping iterator | ||
* Mappings mappings = ptrn.matchAll(mol); | ||
* for (int[] amap : mappings) { | ||
* | ||
* } | ||
* | ||
* // test if pattern matches at a given atom | ||
* for (IAtom atom : mol.atoms()) { | ||
* if (ptrn.matchesRoot(atom)) { | ||
* | ||
* } | ||
* } | ||
* }</pre> | ||
* <b>References</b> | ||
* <ul> | ||
* <li>{@cdk.cite Ray57}</li> | ||
* <li>{@cdk.cite Ullmann76}</li> | ||
* <li>{@cdk.cite Cordella04}</li> | ||
* <li>{@cdk.cite Jeliazkova18}</li> | ||
* </ul> | ||
* | ||
* @author John Mayfield | ||
* @see DfPattern | ||
* @see Mappings | ||
*/ | ||
public class DfPattern extends Pattern { | ||
|
||
private final IQueryAtomContainer query; | ||
private final DfState state; | ||
|
||
private DfPattern(IQueryAtomContainer query) { | ||
this.query = query; | ||
determineFilters(query); | ||
state = new DfState(query); | ||
} | ||
|
||
private static void checkCompatibleAPI(IAtom atom) { | ||
if (atom.getContainer() == null) { | ||
throw new IllegalArgumentException( | ||
"This API can only be used with the option " + | ||
"CdkUseLegacyAtomContainer=false (default). The atoms in " + | ||
"the molecule provided do not know about their parent " + | ||
"molecule" | ||
); | ||
} | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
@Override | ||
public int[] match(IAtomContainer target) { | ||
return matchAll(target).first(); | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
@Override | ||
public boolean matches(IAtomContainer target) { | ||
return matchAll(target).atLeast(1); | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
@Override | ||
public Mappings matchAll(IAtomContainer mol) { | ||
if (mol.getAtomCount() < query.getAtomCount()) | ||
return new Mappings(query, mol, Collections.<int[]>emptySet()); | ||
if (mol.getAtomCount() > 0) | ||
checkCompatibleAPI(mol.getAtom(0)); | ||
DfState local = new DfState(state); | ||
local.setMol(mol); | ||
return filter(new Mappings(query, mol, local), query, mol); | ||
} | ||
|
||
/** | ||
* Match the pattern at the provided root. | ||
* | ||
* @param root the root atom of the molecule | ||
* @return mappings | ||
* @see Mappings | ||
*/ | ||
Mappings matchRoot(IAtom root) { | ||
checkCompatibleAPI(root); | ||
IAtomContainer mol = root.getContainer(); | ||
if (query.getAtomCount() > 0 && ((IQueryAtom) query.getAtom(0)).matches(root)) { | ||
DfState local = new DfState(state); | ||
local.setRoot(root); | ||
return filter(new Mappings(query, mol, local), query, mol); | ||
} else { | ||
return new Mappings(query, mol, Collections.<int[]>emptySet()); | ||
} | ||
} | ||
|
||
/** | ||
* Test whether the pattern matches at the provided atom. | ||
* | ||
* @param root the root atom of the molecule | ||
* @return the pattern matches | ||
*/ | ||
public boolean matchesRoot(IAtom root) { | ||
return matchRoot(root).atLeast(1); | ||
} | ||
|
||
/** | ||
* Create a pattern which can be used to find molecules which contain the | ||
* {@code query} structure. If a 'real' molecule is provided is is converted | ||
* with {@link QueryAtomContainer#create(IAtomContainer, Expr.Type...)} | ||
* matching elements, aromaticity status, and bond orders. | ||
* | ||
* @param query the substructure to find | ||
* @return a pattern for finding the {@code query} | ||
* @see QueryAtomContainer#create(IAtomContainer, Expr.Type...) | ||
*/ | ||
public static DfPattern findSubstructure(IAtomContainer query) { | ||
if (query instanceof IQueryAtomContainer) | ||
return new DfPattern((IQueryAtomContainer) query); | ||
else | ||
return new DfPattern(QueryAtomContainer.create(query, | ||
ALIPHATIC_ELEMENT, | ||
AROMATIC_ELEMENT, | ||
SINGLE_OR_AROMATIC, | ||
ALIPHATIC_ORDER, | ||
STEREOCHEMISTRY)); | ||
} | ||
|
||
} |
Oops, something went wrong.