From da3f3efef6cd4923bd339b5710d2cf2a4de59e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Tue, 23 Apr 2024 11:07:38 +0200 Subject: [PATCH 1/2] Asterisk is already used for excited state in photochemistry --- chempy/util/parsing.py | 14 +++++--------- chempy/util/tests/test_parsing.py | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index aca30723..12ac5a57 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -96,7 +96,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: ( '.' | '\u00B7' | '*' ) count? formula + hydrate :: ( '..' | '\u00B7' | '.' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? @@ -115,7 +115,7 @@ def _get_formula_parser(): | '{' formula '}' | '[' formula ']' ) count prime charge? formula :: term+ - hydrate :: ( '..' | '\u00B7' | '*' ) count? formula + hydrate :: ( '..' | '\u00B7' | '.' ) count? formula state :: '(' ( 's' | 'l' | 'g' | 'aq' | 'cr' ) ')' compound :: count formula hydrate? state? """ @@ -387,9 +387,7 @@ def formula_to_composition( stoich_tok, chg_tok = _formula_to_parts(formula, prefixes, suffixes)[:2] tot_comp = {} - if ".." in stoich_tok: - parts = stoich_tok.split("..") - elif "\u00b7" in stoich_tok: + if "\u00b7" in stoich_tok: parts = stoich_tok.split('\u00b7') elif '.' in stoich_tok: warnings.warn( @@ -399,7 +397,8 @@ def formula_to_composition( ) parts = stoich_tok.split('.') else: - parts = list(filter(len, internal_asterisk.split(stoich_tok))) + parts = stoich_tok.split("..") + for idx, stoich in enumerate(parts): if idx == 0: @@ -536,8 +535,6 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): ) -internal_asterisk = re.compile(r"([^\s\*]+)\*([a-zA-Z0-9]+)") - def _formula_to_format( sub, @@ -549,7 +546,6 @@ def _formula_to_format( ): parts = _formula_to_parts(formula, prefixes.keys(), suffixes) parts0 = parts[0].replace("..", "\u00B7") - parts0 = internal_asterisk.sub("\u00B7", parts0) if '.' in parts0: warnings.warn( ("dot is ambiguous in chempy-0.8.x, prefer '*' or '' for complexes." diff --git a/chempy/util/tests/test_parsing.py b/chempy/util/tests/test_parsing.py index d6418e67..98196e0f 100644 --- a/chempy/util/tests/test_parsing.py +++ b/chempy/util/tests/test_parsing.py @@ -724,7 +724,7 @@ def test_composition_dot_as_crystal_water_chempy08x(): as floating point delimiter in fractional stoichiometric coefficients.""" ref = {30: 1, 7: 2, 8: 12, 1: 12} assert formula_to_composition('Zn(NO3)2{}6H2O'.format('\u00B7')) == ref - assert formula_to_composition('Zn(NO3)2*6H2O') == ref + assert formula_to_composition('Zn(NO3)2..6H2O') == ref # https://docs.pytest.org/en/7.1.x/how-to/capture-warnings.html#ensuring-code-triggers-a-deprecation-warning with pytest.deprecated_call(): assert formula_to_composition('Zn(NO3)2.6H2O') == ref From 51557e4d998b64baba4bb730ddde6a6cfa1acc57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Ingvar=20Dahlgren?= Date: Tue, 23 Apr 2024 11:19:23 +0200 Subject: [PATCH 2/2] tidy up --- chempy/util/parsing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chempy/util/parsing.py b/chempy/util/parsing.py index 12ac5a57..9df71e91 100644 --- a/chempy/util/parsing.py +++ b/chempy/util/parsing.py @@ -378,7 +378,7 @@ def formula_to_composition( True >>> formula_to_composition('.NHO-(aq)') == {0: -1, 1: 1, 7: 1, 8: 1} True - >>> formula_to_composition('Na2CO3*7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} + >>> formula_to_composition('Na2CO3..7H2O') == {11: 2, 6: 1, 8: 10, 1: 14} True """ @@ -389,6 +389,8 @@ def formula_to_composition( tot_comp = {} if "\u00b7" in stoich_tok: parts = stoich_tok.split('\u00b7') + elif '..' in stoich_tok: + parts = stoich_tok.split("..") elif '.' in stoich_tok: warnings.warn( ("dot is ambiguous in chempy-0.8.x, prefer '*' or '\u00b7' for complexes." @@ -397,8 +399,7 @@ def formula_to_composition( ) parts = stoich_tok.split('.') else: - parts = stoich_tok.split("..") - + parts = [stoich_tok] for idx, stoich in enumerate(parts): if idx == 0: @@ -535,7 +536,6 @@ def to_reaction(line, substance_keys, token, Cls, globals_=None, **kwargs): ) - def _formula_to_format( sub, sup,