Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hashes to Q-Chem task documents #786

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
49 changes: 49 additions & 0 deletions atomate/qchem/drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
from monty.json import jsanitize
from pymatgen.apps.borg.hive import AbstractDrone
from pymatgen.core import Molecule
from pymatgen.core.periodic_table import Element
from pymatgen.io.babel import BabelMolAdaptor
from pymatgen.io.qchem.inputs import QCInput
from pymatgen.io.qchem.outputs import QCOutput
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender
from pymatgen.symmetry.analyzer import PointGroupAnalyzer
from pymatgen.util.graph_hashing import weisfeiler_lehman_graph_hash

from atomate import __version__ as atomate_version
from atomate.utils.utils import get_logger
Expand All @@ -31,6 +35,13 @@
logger = get_logger(__name__)


METALS = {
str(e)
for e in [Element.from_Z(i) for i in range(1, 119)]
if e.is_metal
}


class QChemDrone(AbstractDrone):
"""
A QChem drone to parse QChem calculations and insert an organized, searchable entry into the database.
Expand All @@ -53,6 +64,9 @@ class QChemDrone(AbstractDrone):
"chemsys",
"pointgroup",
"formula_alphabetical",
"species_hash",
"coord_hash",
"species_hash_nometal"
},
"input": {"initial_molecule", "job_type"},
"output": {"initial_molecule", "job_type", "final_energy"},
Expand Down Expand Up @@ -346,6 +360,41 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
smiles = pbmol.write("smi").split()[0]
d["smiles"] = smiles

# Add graph hashes
# This is primarily for emmet builders
if "optimized_molecule" in d["output"]:
hash_mol = d["output"]["optimized_molecule"]
else:
hash_mol = d["output"]["initial_molecule"]

hash_mg = MoleculeGraph.with_local_env_strategy(hash_mol, OpenBabelNN())
hash_mg = metal_edge_extender(hash_mg)
undir_mg = hash_mg.graph.to_undirected()

metal_inds = [i for i, e in enumerate(hash_mol.species) if str(e) in METALS]

to_delete = list()
for bond in hash_mg.graph.edges():
if bond[0] in metal_inds or bond[1] in metal_inds:
to_delete.append((bond[0], bond[1]))

mg_nometal = copy.deepcopy(hash_mg)
for b in to_delete:
mg_nometal.break_edge(b[0], b[1], allow_reverse=True)

d["coord_hash"] = weisfeiler_lehman_graph_hash(
undir_mg,
node_attr="coords"
)
d["species_hash"] = weisfeiler_lehman_graph_hash(
undir_mg,
node_attr="specie"
)
d["species_hash_nometal"] = weisfeiler_lehman_graph_hash(
mg_nometal.graph.to_undirected(),
node_attr="specie"
)

d["state"] = "successful" if d_calc_final["completion"] else "unsuccessful"
if "special_run_type" in d:
if d["special_run_type"] in [
Expand Down
33 changes: 33 additions & 0 deletions atomate/qchem/tests/test_drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def test_assimilate_opt(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -77,6 +80,9 @@ def test_assimilate_pes_scan(self):
self.assertEqual(doc["smiles"], "[O]C(=O)[O]")
self.assertEqual(doc["formula_pretty"], "CO3")
self.assertEqual(doc["formula_anonymous"], "AB3")
self.assertEqual(doc["species_hash"], "75e7a4125709cb5a14d1ce2b84c3cdbd")
self.assertEqual(doc["coord_hash"], "65b93a5088773337b9372c4ce65aeb37")
self.assertEqual(doc["species_hash_nometal"], "75e7a4125709cb5a14d1ce2b84c3cdbd")
self.assertEqual(doc["chemsys"], "C-O")
self.assertEqual(doc["pointgroup"], "C2v")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -130,6 +136,9 @@ def test_assimilate_freq(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -203,6 +212,9 @@ def test_assimilate_FF(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -313,6 +325,9 @@ def test_assimilate_ffts(self):
self.assertEqual(doc["smiles"], "O(C(=O)[O])[Li].[CH2]COC(=O)O[Li]")
self.assertEqual(doc["formula_pretty"], "LiH2C2O3")
self.assertEqual(doc["formula_anonymous"], "AB2C2D3")
self.assertEqual(doc["species_hash"], "b58892da682cac0193cf85f25fe8c25b")
self.assertEqual(doc["coord_hash"], "ba40774a9d7a39f8354d0ca7efaff6d0")
self.assertEqual(doc["species_hash_nometal"], "d7ab8a26c0d207bad6bd1316a368c8d5")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -396,6 +411,9 @@ def test_multirun(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "d11f6abeef573141250f38af1388ca0c")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "C2")
self.assertIn("calcs_reversed", doc)
Expand Down Expand Up @@ -437,6 +455,9 @@ def test_assimilate_unstable_opt(self):
self.assertEqual(doc["cputime"], None)
self.assertEqual(doc["formula_pretty"], "CS2NO")
self.assertEqual(doc["formula_anonymous"], "ABCD2")
self.assertEqual(doc["species_hash"], "1559ce7584cf8c27f1c6044a6af76dd1")
self.assertEqual(doc["coord_hash"], "8698b987cdb70eed57bd0a7e77b7e00c")
self.assertEqual(doc["species_hash_nometal"], "1559ce7584cf8c27f1c6044a6af76dd1")
self.assertEqual(doc["chemsys"], "C-N-O-S")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand Down Expand Up @@ -471,6 +492,9 @@ def test_assimilate_opt_with_hidden_changes_from_handler(self):
self.assertEqual(doc["cputime"], 7471.17)
self.assertEqual(doc["formula_pretty"], "HC2O")
self.assertEqual(doc["formula_anonymous"], "ABC2")
self.assertEqual(doc["species_hash"], "6dc4aca792bcd6bd45bc5176f42f6aee")
self.assertEqual(doc["coord_hash"], "7cd547f71ddf74efcfb00743161d07f2")
self.assertEqual(doc["species_hash_nometal"], "6dc4aca792bcd6bd45bc5176f42f6aee")
self.assertEqual(doc["chemsys"], "C-H-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand Down Expand Up @@ -504,6 +528,9 @@ def test_assimilate_disconnected_opt(self):
self.assertEqual(doc["cputime"], 8825.76)
self.assertEqual(doc["formula_pretty"], "H2C2O3")
self.assertEqual(doc["formula_anonymous"], "A2B2C3")
self.assertEqual(doc["species_hash"], "c87c6b5a4bb8632cdb934e400a0237fb")
self.assertEqual(doc["coord_hash"], "6adeeb1d55585a35a6bcf9e1513218f2")
self.assertEqual(doc["species_hash_nometal"], "c87c6b5a4bb8632cdb934e400a0237fb")
self.assertEqual(doc["chemsys"], "C-H-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand All @@ -527,6 +554,9 @@ def test_assimilate_sp(self):
self.assertEqual(doc["smiles"], "[O]")
self.assertEqual(doc["formula_pretty"], "O2")
self.assertEqual(doc["formula_anonymous"], "A")
self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["coord_hash"], "d1e604b6971a7e2d889e3172456da7db")
self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["chemsys"], "O")
self.assertEqual(doc["pointgroup"], "Kh")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -572,6 +602,9 @@ def test_sp_with_orig(self):
self.assertEqual(doc["smiles"], "[O]")
self.assertEqual(doc["formula_pretty"], "O2")
self.assertEqual(doc["formula_anonymous"], "A")
self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["coord_hash"], "4b44e5b5c47ec269779254ae49ca0b51")
self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["chemsys"], "O")
self.assertEqual(doc["pointgroup"], "Kh")
self.assertIn("custodian", doc)
Expand Down