Skip to content

Commit

Permalink
Restore old crystal water parsing in v0.8.x (#229)
Browse files Browse the repository at this point in the history
* Add (currently failing) test for crystal water parsing for 0.8.x

* latest cython

* apply old parsing behavior to 0.8.x

* tweak tests in 0.8.x

* linting
  • Loading branch information
bjodah committed Apr 22, 2024
1 parent 578bfb1 commit c7a9c1d
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 59 deletions.
1 change: 1 addition & 0 deletions .woodpecker.yaml
Expand Up @@ -27,6 +27,7 @@ steps:
- export CPATH=$SUNDBASE/include:$CPATH
- export LIBRARY_PATH=$SUNDBASE/lib
- export LD_LIBRARY_PATH=$SUNDBASE/lib
- python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user --upgrade-strategy=eager --upgrade cython
- python3 -m pip install --cache-dir $CACHE_ROOT/pip_cache --user -e .[all]
- python3 -c "import pycvodes; import pyodesys; import pygslodeiv2" # debug this CI config
- git fetch -tq
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -385,7 +385,7 @@ If you make use of ChemPy in e.g. academic work you may cite the following peer-
Depending on what underlying solver you are using you should also cite the appropriate paper
(you can look at the list of references in the JOSS article). If you need to reference,
in addition to the paper, a specific point version of ChemPy (for e.g. reproducibility)
you can get per-version DOIs from the zendodo archive:
you can get per-version DOIs from the zenodo archive:

.. image:: https://zenodo.org/badge/8840/bjodah/chempy.svg
:target: https://zenodo.org/badge/latestdoi/8840/bjodah/chempy
Expand Down
40 changes: 33 additions & 7 deletions chempy/util/parsing.py
Expand Up @@ -5,8 +5,9 @@
from collections import defaultdict

import re
import warnings

from .pyutil import memoize
from .pyutil import memoize, ChemPyDeprecationWarning
from .periodic import symbols

parsing_library = "pyparsing" # info used for selective testing.
Expand Down Expand Up @@ -95,7 +96,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: '.' count? formula
hydrate :: ( '.' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?
Expand All @@ -114,7 +115,7 @@ def _get_formula_parser():
| '{' formula '}'
| '[' formula ']' ) count prime charge?
formula :: term+
hydrate :: '..' count? formula
hydrate :: ( '..' | '\u00B7' | '*' ) count? formula
state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')'
compound :: count formula hydrate? state?
"""
Expand Down Expand Up @@ -334,7 +335,7 @@ def _parse_stoich(stoich):

_unicode_mapping = {k + "-": v + "-" for k, v in zip(_greek_letters, _greek_u)}
_unicode_mapping["."] = "⋅"
_unicode_infix_mapping = {"..": "·"}
_unicode_infix_mapping = {"..": "\u00b7"}

_html_mapping = {k + "-": "&" + k + ";-" for k in _greek_letters}
_html_mapping["."] = "⋅"
Expand Down Expand Up @@ -377,7 +378,7 @@ def formula_to_composition(
True
>>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1}
True
>>> formula_to_composition('Na2CO3..7H2O') == {11: 2, 6: 1, 8: 10, 1: 14}
>>> formula_to_composition('Na2CO3*7H2O') == {11: 2, 6: 1, 8: 10, 1: 14}
True
"""
Expand All @@ -386,7 +387,19 @@ def formula_to_composition(

stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2]
tot_comp = {}
parts = stoich_tok.split("..")
if ".." in stoich_tok:
parts = stoich_tok.split("..")
elif "\u00b7" in stoich_tok:
parts = stoich_tok.split('\u00b7')
elif '.' in stoich_tok:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts = stoich_tok.split('.')
else:
parts = list(filter(len, internal_asterisk.split(stoich_tok)))

for idx, stoich in enumerate(parts):
if idx == 0:
Expand Down Expand Up @@ -523,6 +536,9 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs):
)


internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)")


def _formula_to_format(
sub,
sup,
Expand All @@ -532,7 +548,17 @@ def _formula_to_format(
suffixes=("(s)", "(l)", "(g)", "(aq)"),
):
parts = _formula_to_parts(formula, prefixes.keys(), suffixes)
stoichs = parts[0].split("..")
parts0 = parts[0].replace("..", "\u00B7")
parts0 = internal_asterisk.sub("\u00B7", parts0)
if '.' in parts0:
warnings.warn(
("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes."
" Dot will be interpreted as floating point in chempy-0.9+"),
ChemPyDeprecationWarning
)
parts0 = parts0.replace('.', "\u00B7")
stoichs = parts0.split("\u00B7")

string = ""
for idx, stoich in enumerate(stoichs):
if idx == 0:
Expand Down
118 changes: 67 additions & 51 deletions chempy/util/tests/test_parsing.py
Expand Up @@ -303,45 +303,46 @@ def test_formula_to_composition_bad_complexes(species):
formula_to_composition(species)


@pytest.mark.parametrize(
"species, composition",
[
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
{
6: 6,
8: 18,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
{
1: 16,
6: 6,
8: 26,
12: 5.395,
20: 2.832,
26: 0.6285,
},
),
],
)
@requires(parsing_library)
def test_formula_to_composition_fractional_subscripts(species, composition):
assert formula_to_composition(species) == composition
# This test is enabled in chempy-0.9+
# @pytest.mark.parametrize(
# "species, composition",
# [
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# {
# 6: 6,
# 8: 18,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# {
# 1: 16,
# 6: 6,
# 8: 26,
# 12: 5.395,
# 20: 2.832,
# 26: 0.6285,
# },
# ),
# ],
# )
# @requires(parsing_library)
# def test_formula_to_composition_fractional_subscripts(species, composition):
# assert formula_to_composition(species) == composition


@pytest.mark.parametrize(
Expand Down Expand Up @@ -535,18 +536,19 @@ def test_to_reaction():
),
("[Fe(CN)6]-3", r"[Fe(CN)_{6}]^{3-}"),
("[Fe(CN)6]-3(aq)", r"[Fe(CN)_{6}]^{3-}(aq)"),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
),
(
"Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
),
# This test is enabled in chempy-0.9+:
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}(s)",
# ),
# (
# "Ca2.832Fe0.6285Mg5.395(CO3)6..8H2O(s)",
# r"Ca_{2.832}Fe_{0.6285}Mg_{5.395}(CO_{3})_{6}\cdot 8H_{2}O(s)",
# ),
],
)
@requires(parsing_library)
Expand Down Expand Up @@ -712,3 +714,17 @@ def test_formula_to_html(species, html):
def test_formula_to_html_caged(species, html):
"""Should produce HTML for cage species."""
assert formula_to_html(species) == html


def test_composition_dot_as_crystal_water_chempy08x():
"""In Chempy v0.8.x a dot will signify crystal water. But an asterisk '*'
or and interpunct (·) is also accepted (and preferred).
From Chempy v0.9.x on-wards, only interpunct and asterisk will be
interpreted as crystal water delimiters, and a dot will be interpreted
as floating point delimiter in fractional stoichiometric coefficients."""
ref = {30: 1, 7: 2, 8: 12, 1: 12}
assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref
assert formula_to_composition('Zn(NO3)2*6H2O') == ref
# https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning
with pytest.deprecated_call():
assert formula_to_composition('Zn(NO3)2.6H2O') == ref

0 comments on commit c7a9c1d

Please sign in to comment.