diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..7af96b1 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = tests/*,setup.py,beamspy/__main__.py,beamspy/qt/form.py,beamspy/gui.py \ No newline at end of file diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 0000000..857fe05 --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,60 @@ +name: beamspy + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.7, 3.8] + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python-version }} + + steps: + - uses: actions/checkout@v2 + + - name: Setup conda - Python ${{ matrix.python-version }} + uses: s-weigand/setup-conda@v1 + with: + update-conda: true + python-version: ${{ matrix.python-version }} + conda-channels: conda-forge, bioconda + + - name: Install dependencies + run: | + + python --version + conda env update --file environment.yml --name base + + - name: Lint with flake8 + run: | + + conda install flake8 + + # stop build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Test with pytest-cov + run: | + + python setup.py install + beamspy --help + + conda install pytest codecov pytest-cov -c conda-forge + pytest --cov ./ --cov-config=.coveragerc --cov-report=xml + + - name: Upload code coverage to codecov + uses: codecov/codecov-action@v1 + with: + flags: unittests + env_vars: OS,PYTHON + fail_ci_if_error: true + verbose: true diff --git a/.travis.yml b/.travis.yml index 0c91b56..7635162 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,9 +6,13 @@ python: services: - xvfb +before_install: + - python --version + - pip install -U pip + - pip install -U pytest + - pip install codecov pytest-cov + install: - - pip install coverage - - pip install green codecov - pip install . before_script: @@ -16,7 +20,8 @@ before_script: script: - beamspy --help - - green -vvv --run-coverage + - pytest --cov=beamspy tests/ after_script: - - python -m codecov + - codecov + diff --git a/MANIFEST.in b/MANIFEST.in index 7c3f5c2..bc21db5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,8 @@ include README.rst include LICENSE -include requirements*.txt -include examples/*.* +include requirements.txt +include examples/run.py +include examples/run.sh include tests/*.py include tests/test_data/*.* include tests/test_results/.gitignore diff --git a/README.rst b/README.rst index 26218bc..3ddd994 100644 --- a/README.rst +++ b/README.rst @@ -1,106 +1,113 @@ -BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry (Python package) -================================================================================== -|Version| |Py versions| |Git| |Bioconda| |Build Status (Travis)| |Build Status (AppVeyor)| |License| |RTD doc| |codecov| |mybinder| - -BEAMSpy (Birmingham mEtabolite Annotation for Mass Spectrometry) is a Python package that includes several automated and -seamless computational modules that are applied to putatively annotate metabolites detected in untargeted ultra (high) -performance liquid chromatography-mass spectrometry or untargeted direct infusion mass spectrometry metabolomic assays -in an automated process. All reported metabolites are annotated to level 3 of the Metabolomics Standards -Initiative (MSI) reporting standards (Metabolomics. 2007 Sep; 3(3): 211–221. `doi: 10.1007/s11306-007-0082-2 `_). - -The package is highly flexible to suit the diversity of sample types studied and mass spectrometers applied in -untargeted metabolomics studies. The user can use the standard reference files included in the package or can develop -their own reference files. - - -- `Documentation (Read the Docs) `_ -- `Bug reports `_ - - -.. _quick_installation: - -Quick installation -------------------- - -Conda_ -~~~~~~~ - -1. `Install Miniconda `_. -2. Run the following commands to install BEAMSpy. - -Windows-64, Linux-64 and OSx - -:: - - $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics - $ activate beamspy - -Linux-64 and OSx - -:: - - $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics - $ source activate beamspy - - -Usage ------------------------- - -Command line interface (CLI) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - $ beamspy --help - -Graphical user interface (GUI) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - $ beamspy start-gui - -Bug reports ------------------------- - -Please report any bugs that you find `here `_. -Or fork the repository on `GitHub `_ -and create a pull request (PR). We welcome all contributions, and we will help you to make the PR if you are new to `git `_. - -License ------------------------- - -Released under the GNU General Public License v3.0 (see `LICENSE file `_) - -.. |Build Status (Travis)| image:: https://img.shields.io/travis/computational-metabolomics/beamspy.svg?branch=dev&style=flat&maxAge=3600&label=Travis-CI - :target: https://travis-ci.com/computational-metabolomics/beamspy - -.. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/beamspy.svg?style=flat&maxAge=3600&label=AppVeyor - :target: https://ci.appveyor.com/project/RJMW/beamspy - -.. |Py versions| image:: https://img.shields.io/pypi/pyversions/beamspy.svg?style=flat&maxAge=3600 - :target: https://pypi.python.org/pypi/beamspy/ - -.. |Version| image:: https://img.shields.io/pypi/v/beamspy.svg?style=flat&maxAge=3600 - :target: https://pypi.python.org/pypi/beamspy/ - -.. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600 - :target: https://github.com/computational-metabolomics/beamspy - -.. |Bioconda| image:: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat&maxAge=3600 - :target: http://bioconda.github.io/recipes/beamspy/README.html - -.. |License| image:: https://img.shields.io/badge/License-GPL%20v3-blue.svg - :target: https://www.gnu.org/licenses/gpl-3.0.html - -.. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600 - :target: https://beamspy.readthedocs.io/en/latest/ - -.. |codecov| image:: https://codecov.io/gh/computational-metabolomics/beamspy/branch/master/graph/badge.svg - :target: https://codecov.io/gh/computational-metabolomics/beamspy - -.. |mybinder| image:: https://mybinder.org/badge_logo.svg - :target: https://mybinder.org/v2/gh/computational-metabolomics/beamspy/master?filepath=notebooks - -.. _pip: https://pip.pypa.io/ -.. _Conda: http://conda.pydata.org/docs/ +BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry (Python package) +================================================================================== +|Version| |Py versions| |Git| |Bioconda| |Build Status| |Build Status (AppVeyor)| |License| |RTD doc| |codecov| |mybinder| + +BEAMSpy (Birmingham mEtabolite Annotation for Mass Spectrometry) is a Python package that includes several automated and +seamless computational modules that are applied to putatively annotate metabolites detected in untargeted ultra (high) +performance liquid chromatography-mass spectrometry or untargeted direct infusion mass spectrometry metabolomic assays. +All reported metabolites are annotated to level 2 or 3 of the Metabolomics Standards +Initiative (MSI) reporting standards (Metabolomics. 2007 Sep; 3(3): 211–221. `doi: 10.1007/s11306-007-0082-2 `_). +The package is highly flexible to suit the diversity of sample types studied and mass spectrometers applied in +untargeted metabolomics studies. The user can use the standard reference files included in the package or can develop +their own reference files. + + +- `Documentation (Read the Docs) `_ +- `Bug reports `_ + + +Quick installation +------------------- + +Conda_ +~~~~~~~ + +1. Install `Miniconda `_. Follow the steps described `here `__. +2. Run the following commands to install BEAMSpy. + +Windows-64, Linux-64 and OSx + +:: + + $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics + $ activate beamspy + +Linux-64 and OSx + +:: + + $ conda create -n beamspy beamspy -c conda-forge -c bioconda -c computational-metabolomics + $ source activate beamspy + + +Usage +------------------------ + +Command line interface (CLI) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + $ beamspy --help + +Graphical user interface (GUI) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + $ beamspy start-gui + + +Bug reports +------------------------ + +Please report any bugs that you find `here `__. +Or fork the repository on `GitHub `_ +and create a pull request (PR). We welcome all contributions, and we will help you to make the PR if you are new to `git `_. + + +Credits +------- + - `Team (University of Birmingham and EMBL-EBI) `__ + +**Code base** + - Ralf J. M. Weber (r.j.weber@bham.ac.uk) - `University of Birmingham (UK) `__ + + +License +------------------------ + +Released under the GNU General Public License v3.0 (see `LICENSE `_) + +.. |Build Status| image:: https://github.com/computational-metabolomics/beamspy/workflows/beamspy/badge.svg + :target: https://github.com/computational-metabolomics/beamspy/actions + +.. |Build Status (AppVeyor)| image:: https://img.shields.io/appveyor/ci/RJMW/beamspy.svg?style=flat&maxAge=3600&label=AppVeyor + :target: https://ci.appveyor.com/project/RJMW/beamspy + +.. |Py versions| image:: https://img.shields.io/pypi/pyversions/beamspy.svg?style=flat&maxAge=3600 + :target: https://pypi.python.org/pypi/beamspy/ + +.. |Version| image:: https://img.shields.io/pypi/v/beamspy.svg?style=flat&maxAge=3600 + :target: https://pypi.python.org/pypi/beamspy/ + +.. |Git| image:: https://img.shields.io/badge/repository-GitHub-blue.svg?style=flat&maxAge=3600 + :target: https://github.com/computational-metabolomics/beamspy + +.. |Bioconda| image:: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat&maxAge=3600 + :target: http://bioconda.github.io/recipes/beamspy/README.html + +.. |License| image:: https://img.shields.io/badge/License-GPL%20v3-blue.svg + :target: https://www.gnu.org/licenses/gpl-3.0.html + +.. |RTD doc| image:: https://img.shields.io/badge/documentation-RTD-71B360.svg?style=flat&maxAge=3600 + :target: https://beamspy.readthedocs.io/en/latest/ + +.. |codecov| image:: https://codecov.io/gh/computational-metabolomics/beamspy/branch/master/graph/badge.svg + :target: https://codecov.io/gh/computational-metabolomics/beamspy + +.. |mybinder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/computational-metabolomics/beamspy/master?filepath=notebooks + +.. _pip: https://pip.pypa.io/ +.. _Conda: https://conda.io/en/latest/ diff --git a/appveyor.yml b/appveyor.yml index 4392d05..d3a17ee 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ build_script: - '%PYTHON%\python.exe -m pip install .' before_test: - - '%PYTHON%\python.exe -m pip install coverage green' + - '%PYTHON%\python.exe -m pip install pytest' test_script: - - '%PYTHON%\python.exe -m green -vvv --run-coverage' + - '%PYTHON%\python.exe -m pytest' diff --git a/beamspy/__init__.py b/beamspy/__init__.py index aa19d21..8084deb 100644 --- a/beamspy/__init__.py +++ b/beamspy/__init__.py @@ -1,4 +1,4 @@ __author__ = 'r.j.weber@bham.ac.uk' __credits__ = 'r.j.weber@bham.ac.uk' -__version__ = '1.0.0' +__version__ = '1.1.0' __license__ = 'GPLv3' \ No newline at end of file diff --git a/beamspy/__main__.py b/beamspy/__main__.py index 832a173..a1c9ed7 100644 --- a/beamspy/__main__.py +++ b/beamspy/__main__.py @@ -96,8 +96,8 @@ def main(): parser_app.add_argument('-a', '--adducts', action='store_true', required=False, help="Annotate adducts.") - parser_app.add_argument('-b', '--adducts-library', action='append', required=False, - default=[], help="List of adducts.") + parser_app.add_argument('-b', '--adducts-library', type=str, default=None, required=False, + help="List of adducts.") parser_app.add_argument('-e', '--isotopes', action='store_true', required=False, help="Annotate isotopes.") @@ -105,15 +105,15 @@ def main(): parser_app.add_argument('-f', '--isotopes-library', required=False, help="List of isotopes.") - parser_app.add_argument('-r', '--multiple-charged-ions', action='store_true', required=False, - help="Annotate multiple-charged ions.") - - parser_app.add_argument('-s', '--multiple-charged-ions-library', required=False, - help="List of multiple charged ions.") - parser_app.add_argument('-o', '--oligomers', action='store_true', required=False, help="Annotate oligomers.") + parser_app.add_argument('-n', '--neutral-losses', action='store_true', required=False, + help="Annotate neutral losses.") + + parser_app.add_argument('-s', '--neutral-losses-library', required=False, + help="List of neutral losses.") + parser_app.add_argument('-m', '--ion-mode', choices=["pos", "neg"], required=True, help="Ion mode of the libraries.") @@ -123,7 +123,7 @@ def main(): parser_app.add_argument('-u', '--max-monomer-units', default=2, type=int, required=False, help="Maximum number of monomer units.") - + ################################# # ANNOTATE MOLECULAR FORMULAE ################################# @@ -149,6 +149,12 @@ def main(): parser_amf.add_argument('-p', '--ppm', default=3.0, type=float, required=True, help="Mass tolerance in parts per million.") + parser_amf.add_argument('-e', '--skip-patterns', action="store_false", + help="Skip applying/using peak patterns (e.g. adduct and isotope patterns) to filter annotations.") + + parser_amf.add_argument('-r', '--skip-rules', action="store_false", + help="Skip heuritic rules to filter annotations.") + parser_amf.add_argument('-z', '--max-mz', type=float, required=False, default=500.0, help="Maximum m/z value to assign molecular formula(e).") @@ -166,7 +172,8 @@ def main(): parser_am.add_argument('-d', '--db', type=str, required=True, help="Sqlite database to write results.") - parser_am.add_argument('-c', '--db-compounds', type=str, required=False, help="Metabolite database (reference).") + parser_am.add_argument('-c', '--db-compounds', type=str, default="", required=False, + help="Metabolite database (reference).") parser_am.add_argument('-n', '--db-name', type=str, default="", required=True, help="Name compound / metabolite database (within --db-compounds).") @@ -180,6 +187,12 @@ def main(): parser_am.add_argument('-p', '--ppm', default=3.0, type=float, required=True, help="Mass tolerance in parts per million.") + parser_am.add_argument('-e', '--skip-patterns', action="store_false", + help="Skip applying/using peak patterns (e.g. adduct and isotope patterns) to filter annotations.") + + parser_am.add_argument('-r', '--rt', default=None, type=float, + help="Retention time tolerance in seconds.") + ################################# # SUMMARY RESULTS ################################# @@ -237,57 +250,44 @@ def main(): inp = in_out.read_peaklist(args.peaklist) if args.adducts: - if len(args.adducts_library) > 0 and args.adducts_library is not None: - for i, a in enumerate(args.adducts_library): - try: - lib = in_out.read_adducts(a, args.ion_mode) - except: - lib = in_out.read_mass_differences(a, args.ion_mode) - if i > 0: - add = True - else: - add = False - annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=add) + if args.adducts_library: + lib = in_out.read_adducts(args.adducts_library, args.ion_mode) else: path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) - annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=False) + annotation.annotate_adducts(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=False) if args.isotopes: - if args.isotopes_library is not None: + if args.isotopes_library: lib = in_out.read_isotopes(args.isotopes_library, args.ion_mode) - annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) else: path = 'data/isotopes.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_isotopes(p, args.ion_mode) - annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) - - if args.multiple_charged_ions: - if len(args.multiple_charged_ions_library) > 0 and args.multiple_charged_ions_library is not None: - for i, m in enumerate(args.multiple_charged_ions_library): - try: - lib = in_out.read_multiple_charged_ions(m, args.ion_mode) - except: - lib = in_out.read_mass_differences(m, args.ion_mode) - - if i > 0: - add = True - else: - add = False - - annotation.annotate_multiple_charged_ions(inp, db_out=args.db, ppm=args.ppm, lib=lib, add=add) + annotation.annotate_isotopes(inp, db_out=args.db, ppm=args.ppm, lib=lib) + + if args.neutral_losses: + if args.neutral_losses_library: + lib = in_out.read_neutral_losses(args.neutral_losses_library) else: - path = 'data/multiple_charged_ions.txt' + path = 'data/neutral_losses.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - lib = in_out.read_multiple_charged_ions(p, args.ion_mode) - + lib = in_out.read_neutral_losses(p) + annotation.neutral_losses(inp, db_out=args.db, ppm=args.ppm, lib=lib) + if args.oligomers: - annotation.annotate_oligomers(inp, db_out=args.db, ppm=args.ppm, lib=lib) + if args.adducts_library: + lib = in_out.read_adducts(args.adducts_library, args.ion_mode) + else: + path = 'data/adducts.txt' + p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) + lib = in_out.read_adducts(p, args.ion_mode) + + annotation.annotate_oligomers(inp, db_out=args.db, ppm=args.ppm, lib=lib, maximum=args.max_monomer_units) if args.step == "annotate-mf": - + if args.intensity_matrix: df = in_out.combine_peaklist_matrix(args.peaklist, args.intensity_matrix) else: @@ -299,8 +299,8 @@ def main(): path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) - - annotation.annotate_molecular_formulae(df, ppm=args.ppm, lib_adducts=lib, db_out=args.db, db_in=args.db_mf, max_mz=args.max_mz) + annotation.annotate_molecular_formulae(df, ppm=args.ppm, lib_adducts=lib, db_out=args.db, db_in=args.db_mf, + patterns=args.skip_patterns, rules=args.skip_rules, max_mz=args.max_mz) if args.step == "annotate-compounds": @@ -315,8 +315,7 @@ def main(): path = 'data/adducts.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, args.ion_mode) - - annotation.annotate_compounds(df, lib_adducts=lib, ppm=args.ppm, db_out=args.db, db_name=args.db_name, db_in="") + annotation.annotate_compounds(df, lib_adducts=lib, ppm=args.ppm, db_out=args.db, db_name=args.db_name, patterns=args.skip_patterns, db_in=args.db_compounds, rt_tol=args.rt) if args.step == "summary-results": @@ -336,10 +335,11 @@ def main(): from PySide2 import QtWidgets from beamspy.gui import BeamsApp app = QtWidgets.QApplication(sys.argv) - app.setStyle("Fusion") + # app.setStyle("Fusion") form = BeamsApp() form.show() sys.exit(app.exec_()) + if __name__ == "__main__": main() diff --git a/beamspy/annotation.py b/beamspy/annotation.py index 01be839..5c3a64c 100644 --- a/beamspy/annotation.py +++ b/beamspy/annotation.py @@ -2,13 +2,13 @@ # -*- coding: utf-8 -*- import os -import time import itertools import gzip import sqlite3 from collections import OrderedDict from urllib.parse import urlparse import requests +import difflib import pandas as pd import numpy as np import networkx as nx @@ -19,14 +19,42 @@ from beamspy.auxiliary import composition_to_string +_sql_create_table_isotopes_ = """ + CREATE TABLE isotopes ( + peak_id_a TEXT DEFAULT NULL, + peak_id_b TEXT DEFAULT NULL, + label_a TEXT DEFAULT NULL, + label_b TEXT DEFAULT NULL, + atoms REAL DEFAULT NULL, + exact_mass_diff REAL DEFAULT NULL, + ppm_error REAL DEFAULT NULL, + charge INT DEFAULT 1, + PRIMARY KEY (peak_id_a, peak_id_b, label_a, label_b)); + """ + +_sql_create_table_neutral_losses_ = """ + CREATE TABLE neutral_losses ( + peak_id_a TEXT DEFAULT NULL, + peak_id_b TEXT DEFAULT NULL, + label TEXT DEFAULT NULL, + exact_mass_diff REAL DEFAULT NULL, + ppm_error REAL DEFAULT NULL, + PRIMARY KEY (peak_id_a, peak_id_b, label)); + """ + + def calculate_mz_tolerance(mass, ppm): min_tol = mass - (mass * 0.000001 * ppm) max_tol = mass + (mass * 0.000001 * ppm) return min_tol, max_tol +def calculate_rt_tolerance(rt, rt_tol): + return rt - rt_tol, rt + rt_tol + + def calculate_ppm_error(mass, theo_mass): - return float(theo_mass - mass) / (theo_mass * 0.000001) + return float(mass - theo_mass) / (theo_mass * 0.000001) def _remove_elements_from_compositions(records, keep): @@ -56,14 +84,24 @@ def _prep_lib(lib): if isinstance(lib, OrderedDict): combs = list(itertools.combinations(lib, 2)) for pair in combs: - if isinstance(lib[pair[0]], float): - lib_pairs.append(OrderedDict([(pair[0], {"mass": lib[pair[0]], "charge": 1}), - (pair[1], {"mass": lib[pair[1]], "charge": 1})])) - else: + if lib[pair[0]]["charge"] == lib[pair[1]]["charge"]: + if lib[pair[0]]["mass"] < lib[pair[1]]["mass"]: + # print("yes", lib[pair[0]]["mass"], lib[pair[1]]["mass"], lib[pair[0]]["mass"] < lib[pair[1]]["mass"]) + lib_pairs.append(OrderedDict([(pair[0], {"mass": lib[pair[0]]["mass"], "charge": lib[pair[0]]["charge"]}), + (pair[1], {"mass": lib[pair[1]]["mass"], "charge": lib[pair[1]]["charge"]})])) + else: + lib_pairs.append(OrderedDict([(pair[1], {"mass": lib[pair[1]]["mass"], "charge": lib[pair[1]]["charge"]}), + (pair[0], {"mass": lib[pair[0]]["mass"], "charge": lib[pair[0]]["charge"]})])) + elif lib[pair[0]]["charge"] > lib[pair[1]]["charge"]: lib_pairs.append(OrderedDict([(pair[0], {"mass": lib[pair[0]]["mass"], "charge": lib[pair[0]]["charge"]}), (pair[1], {"mass": lib[pair[1]]["mass"], "charge": lib[pair[1]]["charge"]})])) - lib_pairs = sorted(lib_pairs, key=lambda pair: (list(pair.items())[0][1]["mass"] - list(pair.items())[1][1]["mass"]), reverse=True) + else: + lib_pairs.append(OrderedDict([(pair[1], {"mass": lib[pair[1]]["mass"], "charge": lib[pair[1]]["charge"]}), + (pair[0], {"mass": lib[pair[0]]["mass"], "charge": lib[pair[0]]["charge"]})])) + + #lib_pairs = sorted(lib_pairs, key=lambda pair: (list(pair.items())[0][1]["mass"] - list(pair.items())[1][1]["mass"]), reverse=True) return lib_pairs + elif isinstance(lib, list) and isinstance(lib[0], OrderedDict): if "mass_difference" in lib[0]: return sorted(lib, key=lambda d: d["mass_difference"], reverse=True) @@ -85,20 +123,30 @@ def _annotate_artifacts(peaklist, diff=0.02): yield i, j, mz_diff, ppm_error -def _check_tolerance(mz_x, mz_y, lib_pair, ppm): +def _check_tolerance(mz_x, mz_y, lib_pair, ppm, charge): min_tol_a, max_tol_a = calculate_mz_tolerance(mz_x, ppm) min_tol_b, max_tol_b = calculate_mz_tolerance(mz_y, ppm) if "mass_difference" in lib_pair.keys(): - # Need to fix the order, charge is one - min_tol_b = (min_tol_b - lib_pair["mass_difference"]) - max_tol_b = (max_tol_b - lib_pair["mass_difference"]) + min_tol_b = min_tol_b - (lib_pair["mass_difference"]) + max_tol_b = max_tol_b - (lib_pair["mass_difference"]) elif "mass" in list(lib_pair.items())[0][1]: - # Need to fix the order - min_tol_a = (min_tol_a - list(lib_pair.items())[0][1]["mass"]) * list(lib_pair.items())[0][1]["charge"] - max_tol_a = (max_tol_a - list(lib_pair.items())[0][1]["mass"]) * list(lib_pair.items())[0][1]["charge"] + charge_a = list(lib_pair.items())[0][1]["charge"] + charge_b = list(lib_pair.items())[1][1]["charge"] + mass_a = list(lib_pair.items())[0][1]["mass"] + mass_b = list(lib_pair.items())[1][1]["mass"] + + min_tol_a -= mass_a + max_tol_a -= mass_a + min_tol_b -= mass_b + max_tol_b -= mass_b + + if charge_a > 1: + min_tol_a += (charge_a - 1) * (mz_x - mass_a) + max_tol_a += (charge_a - 1) * (mz_x - mass_a) + if charge_b > 1: + min_tol_b += (charge_b - 1) * (mz_y - mass_b) + max_tol_b += (charge_b - 1) * (mz_y - mass_b) - min_tol_b = (min_tol_b - list(lib_pair.items())[1][1]["mass"]) * list(lib_pair.items())[1][1]["charge"] - max_tol_b = (max_tol_b - list(lib_pair.items())[1][1]["mass"]) * list(lib_pair.items())[1][1]["charge"] else: raise ValueError("Incorrect format: {}".format(lib_pair)) #if min_tol_b > min_tol_a and min_tol_b > max_tol_a: @@ -114,90 +162,114 @@ def _check_tolerance(mz_x, mz_y, lib_pair, ppm): return 0 -def _annotate_pairs_from_graph(G, ppm, lib_pairs): +def _annotate_pairs_from_graph(G, ppm, lib_pairs, charge): for e in G.edges(data=True): - #if G.nodes[e[0]]["mz"] < G.nodes[e[1]]["mz"]: - # mz_x = G.nodes[e[0]]["mz"] - # mz_y = G.nodes[e[1]]["mz"] - #else: + mz_x = G.nodes[e[0]]["mz"] mz_y = G.nodes[e[1]]["mz"] for lib_pair in lib_pairs: - ct = _check_tolerance(mz_x, mz_y, lib_pair, ppm) - if ct == 1 or ct == True: + + ct = _check_tolerance(mz_x, mz_y, lib_pair, ppm, charge) + + if ct: if "charge" in list(lib_pair.items())[0][1]: charge_a = list(lib_pair.items())[0][1]["charge"] charge_b = list(lib_pair.items())[1][1]["charge"] + elif charge: + charge_a = charge + charge_b = charge else: charge_a = 1 charge_b = 1 if "mass_difference" in lib_pair: + charge = lib_pair["charge"] ppm_error = calculate_ppm_error( mz_x, - mz_y - lib_pair["mass_difference"]) + mz_y - (lib_pair["mass_difference"])) + exact_mass_diff = lib_pair["mass_difference"] else: ppm_error = calculate_ppm_error( (mz_x - list(lib_pair.items())[0][1]["mass"]) * charge_a, (mz_y - list(lib_pair.items())[1][1]["mass"]) * charge_b) + exact_mass_diff = list(lib_pair.items())[1][1]["mass"] - list(lib_pair.items())[0][1]["mass"] yield OrderedDict([("peak_id_a", e[0]), ("peak_id_b", e[1]), ("label_a", list(lib_pair.keys())[0]), ("label_b", list(lib_pair.keys())[1]), ('charge_a', charge_a), ('charge_b', charge_b), + ('exact_mass_diff', exact_mass_diff), ('ppm_error', round(ppm_error, 2))]) -def _annotate_pairs_from_peaklist(peaklist, ppm, lib_pairs): - n = len(peaklist.iloc[:,1]) +def _annotate_pairs_from_peaklist(peaklist, ppm, lib_pairs, charge): + n = len(peaklist.iloc[:, 1]) for i in range(n): for j in range(i + 1, n): for lib_pair in lib_pairs: - ct = _check_tolerance(peaklist.iloc[i,1], peaklist.iloc[j,1], lib_pair, ppm) - if ct == 1: + ct = _check_tolerance(peaklist.iloc[i, 1], peaklist.iloc[j, 1], lib_pair, ppm, charge) + if ct: if "charge" in list(lib_pair.items())[0][1]: charge_a = list(lib_pair.items())[0][1]["charge"] charge_b = list(lib_pair.items())[1][1]["charge"] + elif charge: + charge_a = charge + charge_b = charge else: charge_a = 1 charge_b = 1 if "mass_difference" in lib_pair: ppm_error = calculate_ppm_error( - peaklist.iloc[i,1], - peaklist.iloc[j,1] - lib_pair["mass_difference"]) - + peaklist.iloc[i, 1], + peaklist.iloc[j, 1] - lib_pair["mass_difference"]) + exact_mass_diff = lib_pair["mass_difference"] + charge_a = lib_pair["charge"] + charge_b = lib_pair["charge"] else: ppm_error = calculate_ppm_error( - (peaklist.iloc[i,1] - list(lib_pair.items())[0][1]["mass"]) * list(lib_pair.items())[0][1]["charge"], - (peaklist.iloc[j,1] - list(lib_pair.items())[1][1]["mass"]) * list(lib_pair.items())[1][1]["charge"]) + (peaklist.iloc[i, 1] - list(lib_pair.items())[0][1]["mass"]) * charge_a, + (peaklist.iloc[j, 1] - list(lib_pair.items())[1][1]["mass"]) * charge_b) + exact_mass_diff = list(lib_pair.items())[1][1]["mass"] - list(lib_pair.items())[0][1]["mass"] yield OrderedDict([("peak_id_a", peaklist.iloc[i,0]), ("peak_id_b", peaklist.iloc[j,0]), ("label_a", list(lib_pair.keys())[0]), ("label_b", list(lib_pair.keys())[1]), ('charge_a', charge_a), ('charge_b', charge_b), - ('ppm_error', round(ppm_error,2))]) + ('exact_mass_diff', exact_mass_diff), + ('ppm_error', round(ppm_error, 2))]) class DbCompoundsMemory: - def __init__(self, filename): + def __init__(self, filename, lib_adducts=[]): self.filename = filename + self.lib_adducts = lib_adducts + + records = read_compounds(self.filename, lib_adducts=self.lib_adducts) + + if "retention_time" in list(records[0].keys()): + rt_column = "retention_time REAL DEFAULT NULL," + else: + rt_column = "" + self.conn = sqlite3.connect(":memory:") self.cursor = self.conn.cursor() + self.cursor.execute("""CREATE TABLE COMPOUNDS( - compound_id TEXT PRIMARY KEY NOT NULL, + compound_id TEXT PRIMARY KEY NOT NULL, compound_name TEXT, exact_mass REAL, + {} C INTEGER DEFAULT 0, H INTEGER DEFAULT 0, N INTEGER DEFAULT 0, @@ -205,10 +277,10 @@ def __init__(self, filename): P INTEGER DEFAULT 0, S INTEGER DEFAULT 0, CHNOPS INTEGER DEFAULT NULL, - molecular_formula TEXT DEFAULT NULL - );""") + molecular_formula TEXT DEFAULT NULL, + adduct TEXT DEFAULT NULL + );""".format(rt_column)) - records = read_compounds(self.filename) records = _remove_elements_from_compositions(records, keep=["C", "H", "N", "O", "P", "S"]) records = _flatten_composition(records) for record in records: @@ -220,11 +292,17 @@ def __init__(self, filename): self.cursor.execute("""CREATE INDEX IDX_EXACT_MASS ON COMPOUNDS (exact_mass);""") self.conn.commit() - def select_compounds(self, min_tol, max_tol): - col_names = ["compound_id", "compound_name", "exact_mass", "C", "H", "N", "O", "P", "S", "CHNOPS", "molecular_formula"] - self.cursor.execute("""SELECT {} FROM COMPOUNDS WHERE - exact_mass >= {} and exact_mass <= {} - """.format(",".join(map(str, col_names)), min_tol, max_tol)) + self.cursor.execute("select * from COMPOUNDS") + + def select_compounds(self, min_tol, max_tol, min_rt=None, max_rt=None): + col_names = ["compound_id", "compound_name", "exact_mass", "C", "H", "N", "O", "P", "S", "CHNOPS", "molecular_formula", "adduct"] + if min_rt: + col_names.insert(3, "retention_time") + sql_rt = " and retention_time >= {} and retention_time <= {}".format(min_rt, max_rt) + else: + sql_rt = "" + sql_str = """SELECT {} FROM COMPOUNDS WHERE exact_mass >= {} and exact_mass <= {}{}""".format(",".join(map(str, col_names)), min_tol, max_tol, sql_rt) + self.cursor.execute(sql_str) return [OrderedDict(zip(col_names, list(record))) for record in self.cursor.fetchall()] def close(self): @@ -285,6 +363,9 @@ def select_mf(self, min_tol, max_tol, rules): return [OrderedDict(zip(col_names, list(record))) for record in self.cursor.fetchall()] + def close(self): + self.conn.close() + def annotate_adducts(source, db_out, ppm, lib, add=False): @@ -295,10 +376,13 @@ def annotate_adducts(source, db_out, ppm, lib, add=False): cursor.execute("DROP TABLE IF EXISTS adduct_pairs") cursor.execute("""CREATE TABLE adduct_pairs ( - peak_id_a INTEGER DEFAULT NULL, - peak_id_b INTEGER DEFAULT NULL, + peak_id_a TEXT DEFAULT NULL, + peak_id_b TEXT DEFAULT NULL, label_a TEXT DEFAULT NULL, label_b TEXT DEFAULT NULL, + charge_a INTEGER DEFAULT NULL, + charge_b INTEGER DEFAULT NULL, + exact_mass_diff REAL DEFAULT NULL, ppm_error REAL DEFAULT NULL, PRIMARY KEY (peak_id_a, peak_id_b, label_a, label_b));""") @@ -309,16 +393,32 @@ def annotate_adducts(source, db_out, ppm, lib, add=False): if isinstance(source, list) and len(source) > 0 and isinstance(source[0], nx.classes.digraph.DiGraph): for i, graph in enumerate(source): - for assignment in _annotate_pairs_from_graph(graph, lib_pairs=lib_pairs, ppm=ppm): - cursor.execute("""INSERT OR REPLACE into adduct_pairs (peak_id_a, peak_id_b, label_a, label_b, ppm_error) - values (?,?,?,?,?)""", (str(assignment["peak_id_a"]), str(assignment["peak_id_b"]), - assignment["label_a"], assignment["label_b"], float(assignment["ppm_error"]))) + for assignment in _annotate_pairs_from_graph(graph, lib_pairs=lib_pairs, ppm=ppm, charge=None): + cursor.execute("""INSERT OR REPLACE into adduct_pairs (peak_id_a, peak_id_b, + label_a, label_b, + charge_a, charge_b, + exact_mass_diff, ppm_error) + values (?,?,?,?,?,?,?,?)""", (str(assignment["peak_id_a"]), str(assignment["peak_id_b"]), + assignment["label_a"], assignment["label_b"], + assignment["charge_a"], assignment["charge_b"], + float(assignment["exact_mass_diff"]), + float(assignment["ppm_error"]))) elif isinstance(source, pd.core.frame.DataFrame): - for assignment in _annotate_pairs_from_peaklist(source, lib_pairs=lib_pairs, ppm=ppm): - cursor.execute("""INSERT OR REPLACE into adduct_pairs (peak_id_a, peak_id_b, label_a, label_b, ppm_error) - values (?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], - assignment["label_a"], assignment["label_b"], assignment["ppm_error"])) + for assignment in _annotate_pairs_from_peaklist(source, lib_pairs=lib_pairs, ppm=ppm, charge=None): + cursor.execute("""INSERT OR REPLACE into adduct_pairs (peak_id_a, peak_id_b, + label_a, label_b, + charge_a, charge_b, + exact_mass_diff, ppm_error) + values (?,?,?,?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], + assignment["label_a"], assignment["label_b"], + assignment["charge_a"], assignment["charge_b"], + float(assignment["exact_mass_diff"]), + float(assignment["ppm_error"]))) + + cursor.execute("""CREATE INDEX IDX_peak_id_a ON adduct_pairs (peak_id_a);""") + cursor.execute("""CREATE INDEX IDX_peak_id_b ON adduct_pairs (peak_id_b);""") + conn.commit() conn.close() return @@ -331,14 +431,7 @@ def annotate_isotopes(source, db_out, ppm, lib): cursor.execute("DROP TABLE IF EXISTS isotopes") - cursor.execute("""CREATE TABLE isotopes ( - peak_id_a INTEGER DEFAULT NULL, - peak_id_b INTEGER DEFAULT NULL, - label_a TEXT DEFAULT NULL, - label_b TEXT DEFAULT NULL, - atoms REAL DEFAULT NULL, - ppm_error REAL DEFAULT NULL, - PRIMARY KEY (peak_id_a, peak_id_b, label_a, label_b));""") + cursor.execute(_sql_create_table_isotopes_) lib_pairs = _prep_lib(lib.lib) @@ -356,28 +449,39 @@ def annotate_isotopes(source, db_out, ppm, lib): peaklist = graph.nodes(data=True) - for assignment in _annotate_pairs_from_graph(graph, lib_pairs=lib_pairs, ppm=ppm): + for assignment in _annotate_pairs_from_graph(G=graph, lib_pairs=lib_pairs, ppm=ppm, charge=None): - y = abundances[assignment["label_a"]]['abundance'] * peaklist[assignment["peak_id_b"]]["intensity"] - x = abundances[assignment["label_b"]]['abundance'] * peaklist[assignment["peak_id_a"]]["intensity"] + if abundances[assignment["label_a"]]["abundance"] < abundances[assignment["label_b"]]["abundance"]: + # Lithium + y = abundances[assignment["label_a"]]['abundance'] * peaklist[assignment["peak_id_b"]]["intensity"] + x = 100.0 * peaklist[assignment["peak_id_a"]]["intensity"] + else: + y = 100.0 * peaklist[assignment["peak_id_b"]]["intensity"] + x = abundances[assignment["label_b"]]['abundance'] * peaklist[assignment["peak_id_a"]]["intensity"] if x == 0.0 or y == 0.0: atoms = None elif abundances[assignment["label_a"]]["abundance"] < abundances[assignment["label_b"]]["abundance"]: - atoms = x / y + atoms = x/y else: atoms = y/x - cursor.execute("""insert into isotopes (peak_id_a, peak_id_b, label_a, label_b, atoms, ppm_error) - values (?,?,?,?,?,?)""", (str(assignment["peak_id_a"]), str(assignment["peak_id_b"]), - assignment["label_a"], assignment["label_b"], float(atoms), float(assignment["ppm_error"]))) + cursor.execute("""insert into isotopes (peak_id_a, peak_id_b, label_a, label_b, + atoms, exact_mass_diff, ppm_error, charge) + values (?,?,?,?,?,?,?,?)""", (str(assignment["peak_id_a"]), str(assignment["peak_id_b"]), + assignment["label_a"], assignment["label_b"], float(atoms), + float(assignment["exact_mass_diff"]), float(assignment["ppm_error"]), assignment["charge_a"])) elif isinstance(source, pd.core.frame.DataFrame): + for assignment in _annotate_pairs_from_peaklist(peaklist=source, lib_pairs=lib_pairs, ppm=ppm, charge=None): - for assignment in _annotate_pairs_from_peaklist(source, lib_pairs=lib_pairs, ppm=ppm): - - y = abundances[assignment["label_a"]]["abundance"] * source.loc[source['name'] == assignment["peak_id_b"]]["intensity"].iloc[0] - x = abundances[assignment["label_b"]]["abundance"] * source.loc[source['name'] == assignment["peak_id_a"]]["intensity"].iloc[0] + if abundances[assignment["label_a"]]["abundance"] < abundances[assignment["label_b"]]["abundance"]: + # Lithium + y = abundances[assignment["label_a"]]["abundance"] * source.loc[source['name'] == assignment["peak_id_b"]]["intensity"].iloc[0] + x = 100.0 * source.loc[source['name'] == assignment["peak_id_a"]]["intensity"].iloc[0] + else: + y = 100.0 * source.loc[source['name'] == assignment["peak_id_b"]]["intensity"].iloc[0] + x = abundances[assignment["label_b"]]["abundance"] * source.loc[source['name'] == assignment["peak_id_a"]]["intensity"].iloc[0] if x == 0.0 or y == 0.0: atoms = None @@ -386,17 +490,21 @@ def annotate_isotopes(source, db_out, ppm, lib): else: atoms = y/x - cursor.execute("""insert into isotopes (peak_id_a, peak_id_b, label_a, label_b, atoms, ppm_error) - values (?,?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], - assignment["label_a"], assignment["label_b"], atoms, assignment["ppm_error"])) - conn.commit() + cursor.execute("""insert into isotopes (peak_id_a, peak_id_b, label_a, label_b, + atoms, exact_mass_diff, ppm_error, charge) + values (?,?,?,?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], + assignment["label_a"], assignment["label_b"], atoms, + float(assignment["exact_mass_diff"]), assignment["ppm_error"], assignment["charge_a"])) + + cursor.execute("""CREATE INDEX IDX_isotopes_peak_id_a ON isotopes (peak_id_a);""") + cursor.execute("""CREATE INDEX IDX_isotopes_peak_id_b ON isotopes (peak_id_b);""") conn.commit() conn.close() return -def annotate_oligomers(source, db_out, ppm, lib, maximum=2): +def annotate_oligomers(source, db_out, ppm, lib, maximum=3): conn = sqlite3.connect(db_out) cursor = conn.cursor() @@ -404,15 +512,17 @@ def annotate_oligomers(source, db_out, ppm, lib, maximum=2): cursor.execute("DROP TABLE IF EXISTS oligomers") cursor.execute("""CREATE TABLE oligomers ( - peak_id_a INTEGER DEFAULT NULL, - peak_id_b INTEGER DEFAULT NULL, + peak_id_a TEXT DEFAULT NULL, + peak_id_b TEXT DEFAULT NULL, mz_a REAL DEFAULT NULL, mz_b REAL DEFAULT NULL, label_a TEXT DEFAULT NULL, label_b TEXT DEFAULT NULL, + charge_a INTEGER DEFAULT NULL, + charge_b INTEGER DEFAULT NULL, mz_ratio REAL DEFAULT NULL, ppm_error REAL DEFAULT NULL, - PRIMARY KEY (peak_id_a, peak_id_b));""") + PRIMARY KEY (peak_id_a, peak_id_b, label_a, label_b));""") if isinstance(source, nx.classes.digraph.DiGraph): source = list(source.subgraph(c) for c in nx.weakly_connected_components(source)) @@ -425,7 +535,7 @@ def annotate_oligomers(source, db_out, ppm, lib, maximum=2): neighbors = list(graph.neighbors(n)) - for d in range(1, len(neighbors)+1): + for d in range(1, maximum): for nn in neighbors: @@ -436,70 +546,140 @@ def annotate_oligomers(source, db_out, ppm, lib, maximum=2): for adduct in lib.lib.keys(): - min_tol_a, max_tol_a = calculate_mz_tolerance(mz_x + ((mz_x - lib.lib[adduct]) * d), ppm) + if lib.lib[adduct]["charge"] > 1: + continue + + min_tol_a, max_tol_a = calculate_mz_tolerance(mz_x + ((mz_x - lib.lib[adduct]["mass"]) * d), ppm) min_tol_b, max_tol_b = calculate_mz_tolerance(mz_y, ppm) if (min_tol_b > max_tol_a and max_tol_b > max_tol_a):# or (min_tol_a < min_tol_b and max_tol_a < min_tol_b): #print(source.iloc[i][1], source.iloc[j][1], adduct) break - min_tol_a = min_tol_a - lib.lib[adduct] - max_tol_a = max_tol_a - lib.lib[adduct] + min_tol_a -= lib.lib[adduct]["mass"] + max_tol_a -= lib.lib[adduct]["mass"] - min_tol_b = min_tol_b - lib.lib[adduct] - max_tol_b = max_tol_b - lib.lib[adduct] + min_tol_b -= lib.lib[adduct]["mass"] + max_tol_b -= lib.lib[adduct]["mass"] if min_tol_a < max_tol_b and min_tol_b < max_tol_a: - a = (mz_x - lib.lib[adduct]) + (mz_x - lib.lib[adduct]) * d - b = mz_y - lib.lib[adduct] + a = (mz_x - lib.lib[adduct]["mass"]) + (mz_x - lib.lib[adduct]["mass"]) * d + b = mz_y - lib.lib[adduct]["mass"] - ratio = (mz_y - lib.lib[adduct]) / (mz_x - lib.lib[adduct]) - ppm_error =calculate_ppm_error(a, b) + ratio = (mz_y - lib.lib[adduct]["mass"]) / (mz_x - lib.lib[adduct]["mass"]) + ppm_error = calculate_ppm_error(a, b) if "M" in adduct: adduct_oligo = adduct.replace("M", "{}M".format(int(round(ratio)))) else: adduct_oligo = "{}{}".format(int(round(ratio)), adduct) - cursor.execute("""insert into oligomers (peak_id_a, peak_id_b, mz_a, mz_b, label_a, label_b, mz_ratio, ppm_error) - values (?,?,?,?,?,?,?,?)""", (n, nn, mz_x, mz_y, adduct, adduct_oligo, round(ratio, 2), round(ppm_error, 2))) - + cursor.execute("""insert into oligomers (peak_id_a, peak_id_b, mz_a, mz_b, label_a, label_b, charge_a, charge_b, mz_ratio, ppm_error) + values (?,?,?,?,?,?,?,?,?,?)""", (n, nn, mz_x, mz_y, adduct, adduct_oligo, + lib.lib[adduct]["charge"], lib.lib[adduct]["charge"], round(ratio, 2), round(ppm_error, 2))) elif isinstance(source, pd.core.frame.DataFrame): n = len(source.iloc[:,0]) for adduct in lib.lib.keys(): + + if lib.lib[adduct]["charge"] > 1: + continue + for i in range(n): for d in range(1, maximum): for j in range(i + 1, n): - min_tol_a, max_tol_a = calculate_mz_tolerance(source.iloc[i][1] + ((source.iloc[i][1] - lib.lib[adduct]) * d), ppm) + min_tol_a, max_tol_a = calculate_mz_tolerance(source.iloc[i][1] + ((source.iloc[i][1] - lib.lib[adduct]["mass"]) * d), ppm) min_tol_b, max_tol_b = calculate_mz_tolerance(source.iloc[j][1], ppm) if (min_tol_b > max_tol_a and max_tol_b > max_tol_a):# or (min_tol_a < min_tol_b and max_tol_a < min_tol_b): #print(source.iloc[i][1], source.iloc[j][1], adduct) break - min_tol_a = min_tol_a - lib.lib[adduct] - max_tol_a = max_tol_a - lib.lib[adduct] + min_tol_a -= lib.lib[adduct]["mass"] + max_tol_a -= lib.lib[adduct]["mass"] - min_tol_b = min_tol_b - lib.lib[adduct] - max_tol_b = max_tol_b - lib.lib[adduct] + min_tol_b -= lib.lib[adduct]["mass"] + max_tol_b -= lib.lib[adduct]["mass"] if min_tol_a < max_tol_b and min_tol_b < max_tol_a: - a = (source.iloc[i][1] - lib.lib[adduct]) + (source.iloc[i][1] - lib.lib[adduct]) * d - b = source.iloc[j][1] - lib.lib[adduct] + a = (source.iloc[i][1] - lib.lib[adduct]["mass"]) + (source.iloc[i][1] - lib.lib[adduct]["mass"]) * d + b = source.iloc[j][1] - lib.lib[adduct]["mass"] - ratio = (source.iloc[j][1] - lib.lib[adduct]) / (source.iloc[i][1] - lib.lib[adduct]) + ratio = (source.iloc[j][1] - lib.lib[adduct]["mass"]) / (source.iloc[i][1] - lib.lib[adduct]["mass"]) ppm_error = calculate_ppm_error(a, b) if "M" in adduct: adduct_oligo = adduct.replace("M", "{}M".format(int(round(ratio)))) else: adduct_oligo = "{}{}".format(int(round(ratio)), adduct) - cursor.execute("""insert into oligomers (peak_id_a, peak_id_b, mz_a, mz_b, label_a, label_b, mz_ratio, ppm_error) - values (?,?,?,?,?,?,?,?)""", (source.iloc[i][0], source.iloc[j][0], source.iloc[i][1], source.iloc[j][1], adduct, adduct_oligo, round(ratio, 2), round(ppm_error, 2))) + + cursor.execute("""insert into oligomers (peak_id_a, peak_id_b, mz_a, mz_b, label_a, label_b, charge_a, charge_b, mz_ratio, ppm_error) + values (?,?,?,?,?,?,?,?,?,?)""", (source.iloc[i][0], source.iloc[j][0], source.iloc[i][1], source.iloc[j][1], adduct, adduct_oligo, + lib.lib[adduct]["charge"], lib.lib[adduct]["charge"], round(ratio, 2), round(ppm_error, 2))) + + cursor.execute("""CREATE INDEX IDX_oligomers_peak_id_a ON oligomers (peak_id_a);""") + cursor.execute("""CREATE INDEX IDX_oligomers_peak_id_b ON oligomers (peak_id_b);""") + + conn.commit() + conn.close() + return + + +def annotate_neutral_losses(source, db_out, ppm, lib): + + conn = sqlite3.connect(db_out) + cursor = conn.cursor() + + cursor.execute("DROP TABLE IF EXISTS neutral_losses") + + cursor.execute(_sql_create_table_neutral_losses_) + + if isinstance(source, nx.classes.digraph.DiGraph): + source = list(source.subgraph(c) for c in nx.weakly_connected_components(source)) + + if isinstance(source, list) and len(source) > 0 and isinstance(source[0], nx.classes.digraph.DiGraph): + + for graph in source: + + for e in graph.edges(data=True): + + mz_x = graph.nodes[e[0]]["mz"] + mz_y = graph.nodes[e[1]]["mz"] + + for nl in lib.lib: + + ct = _check_tolerance(mz_x, mz_y, nl, ppm, charge=1) + if ct: + ppm_error = calculate_ppm_error( + mz_x, mz_y - nl["mass_difference"]) + + cursor.execute("""insert into neutral_losses (peak_id_a, peak_id_b, label, + exact_mass_diff, ppm_error) + values (?,?,?,?,?)""", (e[0], e[1], + nl["label"], nl["mass_difference"], ppm_error)) + + elif isinstance(source, pd.core.frame.DataFrame): + + n = len(source.iloc[:, 1]) + for i in range(n): + for j in range(i + 1, n): + for nl in lib.lib: + ct = _check_tolerance(source.iloc[i, 1], source.iloc[j, 1], nl, ppm, charge=1) + if ct: + ppm_error = calculate_ppm_error( + source.iloc[i, 1], + source.iloc[j, 1] - nl["mass_difference"]) + cursor.execute("""insert into neutral_losses (peak_id_a, peak_id_b, label, + exact_mass_diff, ppm_error) + values (?,?,?,?,?)""", (source.iloc[i, 0], source.iloc[j, 0], + nl["label"], nl["mass_difference"], ppm_error)) + + cursor.execute("""CREATE INDEX IDX_nls_peak_id_a ON neutral_losses (peak_id_a);""") + cursor.execute("""CREATE INDEX IDX_nls_peak_id_b ON neutral_losses (peak_id_b);""") + conn.commit() conn.close() return @@ -513,8 +693,8 @@ def annotate_artifacts(source, db_out, diff): cursor.execute("DROP TABLE IF EXISTS artifacts") cursor.execute("""CREATE TABLE artifacts ( - peak_id_a INTEGER DEFAULT NULL, - peak_id_b INTEGER DEFAULT NULL, + peak_id_a TEXT DEFAULT NULL, + peak_id_b TEXT DEFAULT NULL, mz_diff REAL DEFAULT NULL, ppm_error REAL DEFAULT NULL, PRIMARY KEY (peak_id_a, peak_id_b));""") @@ -538,47 +718,169 @@ def annotate_artifacts(source, db_out, diff): return -def annotate_multiple_charged_ions(source, db_out, ppm, lib, add=False): +def _select_unions_peak_patterns(cursor): - conn = sqlite3.connect(db_out) - cursor = conn.cursor() - - if not add: - cursor.execute("DROP TABLE IF EXISTS multiple_charged_ions") - - cursor.execute("""CREATE TABLE multiple_charged_ions ( - peak_id_a INTEGER DEFAULT NULL, - peak_id_b INTEGER DEFAULT NULL, - label_a TEXT DEFAULT NULL, - label_b TEXT DEFAULT NULL, - charge_a INTEGER DEFAULT NULL, - charge_b INTEGER DEFAULT NULL, - ppm_error REAL DEFAULT NULL, - PRIMARY KEY (peak_id_a, peak_id_b, label_a, label_b, charge_a, charge_b));""") + cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") + tables = cursor.fetchall() - lib_pairs = _prep_lib(lib.lib) + # Prepare (empty) sqlite tables if not exist to make union queries more consistent + if ("isotopes",) not in tables: + cursor.execute(_sql_create_table_isotopes_.replace("CREATE", "CREATE TEMP")) + if ("neutral_losses",) not in tables: + cursor.execute(_sql_create_table_neutral_losses_.replace("CREATE", "CREATE TEMP")) - if isinstance(source, nx.classes.digraph.DiGraph): - source = list(source.subgraph(c) for c in nx.weakly_connected_components(source)) + records = [] + sql_unions = [] + excl_ids_pp = [] + for t in ["adduct_pairs", "oligomers"]: + if (t,) in tables: - if (isinstance(source, list) or isinstance(source, np.ndarray)) and isinstance(source[0], nx.classes.graph.Graph): - for graph in source: - for assignment in _annotate_pairs_from_graph(graph, lib_pairs=lib_pairs, ppm=ppm): - cursor.execute("""INSERT OR REPLACE into multiple_charged_ions (peak_id_a, peak_id_b, label_a, label_b, charge_a, charge_b, ppm_error) - values (?,?,?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], assignment["label_a"], assignment["label_b"], - assignment["charge_a"], assignment["charge_b"], assignment["ppm_error"])) + if t == "oligomers": + col_mz_ratio = "ap.mz_ratio AS mz_ratio" + else: + col_mz_ratio = "1 AS mz_ratio" + + sql_str = """ + SELECT ap.peak_id_a AS peak_id_a, + iso.peak_id_b AS peak_id_b, + nl_ap.peak_id_a AS peak_id_a_nl, + nl_ap.peak_id_b AS peak_id_aa_nl, + nl_iso.peak_id_a AS peak_id_b_nl, + nl_iso.peak_id_b AS peak_id_bb_nl, + ap.label_a AS adduct_label, + IFNULL(ap.peak_id_b = iso.peak_id_b, 0) AS flag, + iso.label_a AS iso_label_a, + iso.label_b AS iso_label_b, + ap.charge_a AS ap_charge, + iso.charge AS iso_charge, + 1 AS mz_ratio, + IFNULL(iso.exact_mass_diff, 0.0) AS iso_exact_mass_diff, + IFNULL(nl_ap.exact_mass_diff, 0.0) AS nl_exact_mass_diff, + nl_ap.label AS nl_label + FROM {} AS ap + LEFT JOIN isotopes AS iso + ON ap.peak_id_a = iso.peak_id_a AND ap.charge_a = iso.charge + LEFT JOIN neutral_losses AS nl_ap + ON (ap.peak_id_a = nl_ap.peak_id_a OR ap.peak_id_a = nl_ap.peak_id_b) + LEFT JOIN neutral_losses AS nl_iso + ON (iso.peak_id_b = nl_iso.peak_id_a OR iso.peak_id_b = nl_iso.peak_id_b) + UNION + SELECT ap.peak_id_b AS peak_id_a, + iso.peak_id_b AS peak_id_b, + nl_ap.peak_id_a AS peak_id_a_nl, + nl_ap.peak_id_b AS peak_id_aa_nl, + nl_iso.peak_id_a AS peak_id_b_nl, + nl_iso.peak_id_b AS peak_id_bb_nl, + ap.label_b AS adduct_label, + 0 AS flag, + iso.label_a AS iso_label_a, + iso.label_b AS iso_label_b, + ap.charge_b AS ap_charge, + iso.charge AS iso_charge, + {}, + IFNULL(iso.exact_mass_diff, 0) AS iso_exact_mass_diff, + IFNULL(nl_ap.exact_mass_diff, 0) AS nl_exact_mass_diff, + nl_ap.label AS nl_label + FROM {} AS ap + LEFT JOIN isotopes AS iso + ON ap.peak_id_b = iso.peak_id_a AND ap.charge_b = iso.charge + LEFT JOIN neutral_losses AS nl_ap + ON (ap.peak_id_b = nl_ap.peak_id_a OR ap.peak_id_b = nl_ap.peak_id_b) + LEFT JOIN neutral_losses AS nl_iso + ON (iso.peak_id_b = nl_iso.peak_id_a OR iso.peak_id_b = nl_iso.peak_id_b) + """.format(t, col_mz_ratio, t) + + sql_unions.append(sql_str) + excl_ids_pp.append("""SELECT peak_id_a FROM {} UNION SELECT peak_id_b FROM {}""".format(t, t)) + + cursor.execute(" UNION ".join(map(str, sql_unions))) + records.extend([dict(zip([c[0] for c in cursor.description], record)) for record in cursor.fetchall()]) + + if len(excl_ids_pp) > 0: + excl_ids_adducts_oligomers = " union ".join(map(str, excl_ids_pp)) + sql_excl = """AND iso.peak_id_a NOT IN ({}) + AND iso.peak_id_b NOT IN ({})""".format(excl_ids_adducts_oligomers, excl_ids_adducts_oligomers) + else: + sql_excl = "" + + # WHERE (nl_ap.label = nl_iso.label + # OR IFNULL(nl_ap.label, nl_iso.label) is NOT NULL + # OR IFNULL(nl_ap.label, nl_iso.label) is NULL) + + sql_str = """ + SELECT iso.peak_id_a AS peak_id_a, + iso.peak_id_b AS peak_id_b, + nl_ap.peak_id_a AS peak_id_a_nl, + nl_ap.peak_id_b AS peak_id_aa_nl, + nl_iso.peak_id_a AS peak_id_b_nl, + nl_iso.peak_id_b AS peak_id_bb_nl, + NULL AS adduct_label, + 0 AS flag, + iso.label_a AS iso_label_a, + iso.label_b AS iso_label_b, + NULL AS ap_charge, + iso.charge AS iso_charge, + 1 AS mz_ratio, + IFNULL(iso.exact_mass_diff, 0.0) AS iso_exact_mass_diff, + IFNULL(nl_ap.exact_mass_diff, 0.0) AS nl_exact_mass_diff, + nl_ap.label AS nl_label + FROM isotopes AS iso + LEFT JOIN neutral_losses AS nl_ap + ON (iso.peak_id_a = nl_ap.peak_id_a OR iso.peak_id_a = nl_ap.peak_id_b) + LEFT JOIN neutral_losses AS nl_iso + ON (iso.peak_id_b = nl_iso.peak_id_a OR iso.peak_id_b = nl_iso.peak_id_b) + + {}""".format(sql_excl) + + cursor.execute(sql_str) + records.extend([dict(zip([c[0] for c in cursor.description], record)) for record in cursor.fetchall()]) + + excl_ids_pp.append("""SELECT peak_id_a FROM isotopes UNION SELECT peak_id_b FROM isotopes""") + excl_ids = " UNION ".join(map(str, excl_ids_pp)) + sql_excl = """WHERE peak_id_a NOT IN ({}) + AND peak_id_a_nl NOT IN ({})""".format(excl_ids, excl_ids) + + sql_str = """ + SELECT peak_id_a, + NULL AS peak_id_b, + peak_id_b AS peak_id_a_nl, + NULL AS peak_id_b_nl, + NULL AS adduct_label, + 0 AS flag, + NULL AS iso_label_a, + NULL AS iso_label_b, + 1 AS ap_charge, + 1 AS iso_charge, + 1 AS mz_ratio, + 0.0 AS iso_exact_mass_diff, + exact_mass_diff AS nl_exact_mass_diff, + label AS nl_label + FROM neutral_losses + {} + UNION + SELECT peak_id_b AS peak_id_a, + NULL AS peak_id_b, + peak_id_a AS peak_id_a_nl, + NULL AS peak_id_b_nl, + NULL AS adduct_label, + 0 AS flag, + NULL AS iso_label_a, + NULL AS iso_label_b, + 1 AS ap_charge, + 1 AS iso_charge, + 1 AS mz_ratio, + 0.0 AS iso_exact_mass_diff, + exact_mass_diff AS nl_exact_mass_diff, + label AS nl_label + FROM neutral_losses + {}""".format(sql_excl, sql_excl) + cursor.execute(sql_str) + records.extend([dict(zip([c[0] for c in cursor.description], record)) for record in cursor.fetchall()]) - elif isinstance(source, pd.core.frame.DataFrame): - for assignment in _annotate_pairs_from_peaklist(source, lib_pairs=lib_pairs, ppm=ppm): - cursor.execute("""INSERT OR REPLACE into multiple_charged_ions (peak_id_a, peak_id_b, label_a, label_b, charge_a, charge_b, ppm_error) - values (?,?,?,?,?,?,?)""", (assignment["peak_id_a"], assignment["peak_id_b"], - assignment["label_a"], assignment["label_b"], assignment["charge_a"], assignment["charge_b"], assignment["ppm_error"])) - conn.commit() - conn.close() - return + return records -def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="http://mfdb.bham.ac.uk", rules=True, max_mz=None): +def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="https://mfdb.bham.ac.uk", patterns=True, rules=True, max_mz=None): conn = sqlite3.connect(db_out) cursor = conn.cursor() @@ -591,6 +893,8 @@ def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="http: exact_mass REAL DEFAULT NULL, ppm_error REAL DEFAULT NULL, adduct TEXT DEFAULT NULL, + isotope TEXT DEFAULT '', + neutral_loss TEXT DEFAULT '', C INTEGER DEFAULT 0, H INTEGER DEFAULT 0, N INTEGER DEFAULT 0, @@ -604,13 +908,15 @@ def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="http: lewis INTEGER DEFAULT NULL, senior INTEGER DEFAULT NULL, double_bond_equivalents REAL DEFAULT NULL, - primary key (id, mz, molecular_formula, adduct) + primary key (id, mz, molecular_formula, adduct, isotope, neutral_loss) );""") if os.path.isfile(db_in): conn_mem = DbMolecularFormulaeMemory(db_in) + source = "sqlite" max_mz = None else: + source = "api" url = '{}/api/formula/mass_range'.format(db_in) url_test = '{}/api/formula/mass?mass=180.06339&tol=0.0&tol_unit=ppm&rules=1'.format(db_in) o = urlparse(url) @@ -623,31 +929,29 @@ def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="http: path_nist_database = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') nist_database = nist_database_to_pyteomics(path_nist_database) - for i in range(len(peaklist.iloc[:, 0])): - mz = float(peaklist["mz"].iloc[i]) - name = str(peaklist["name"].iloc[i]) - - min_tol, max_tol = calculate_mz_tolerance(mz, ppm) + def _select_mfs(source, peak_id, mz, ppm, adducts, isotope, neutral_loss, n_oligo, exact_mass_diff=0.0, rules=True): - if max_mz is not None and mz > max_mz: # TODO - continue - - values = [] - for adduct in lib_adducts.lib: + min_mz, max_mz = calculate_mz_tolerance(mz, ppm) - if mz - lib_adducts.lib[adduct] > 0.5: + mf_records = [] + for adduct in adducts: - if "conn_mem" in locals(): - records = conn_mem.select_mf(min_tol - lib_adducts.lib[adduct], max_tol - lib_adducts.lib[adduct], rules) - else: - params = {"lower": min_tol - lib_adducts.lib[adduct], - "upper": max_tol - lib_adducts.lib[adduct], + if mz - lib_adducts.lib[adduct]["mass"] > 0.5: + if source == "api": + params = {"lower": (min_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_diff) * + lib_adducts.lib[adduct]["charge"] / n_oligo, + "upper": (max_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_diff) * + lib_adducts.lib[adduct]["charge"] / n_oligo, "rules": int(rules)} response = requests.get(url, params=params) records = response.json()["records"] - + else: + records = conn_mem.select_mf((min_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_diff) * + lib_adducts.lib[adduct]["charge"] / n_oligo, + (max_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_diff) * + lib_adducts.lib[adduct]["charge"] / n_oligo, rules=rules) for record in records: - record["id"] = name + record["id"] = peak_id if "CHNOPS" not in record: # MFdb API specific record["CHNOPS"] = True # MFdb API specific if "rules" in record: @@ -656,59 +960,222 @@ def annotate_molecular_formulae(peaklist, lib_adducts, ppm, db_out, db_in="http: if "atoms" in record: record.update(record["atoms"]) del record["atoms"] - record["exact_mass"] = record["exact_mass"] + lib_adducts.lib[adduct] + record["exact_mass"] = (record["exact_mass"] / lib_adducts.lib[adduct]["charge"] * n_oligo) + \ + (float(lib_adducts.lib[adduct]["mass"]) + exact_mass_diff) + record["adduct"] = adduct record["mz"] = mz record["ppm_error"] = calculate_ppm_error(mz, record["exact_mass"]) comp = OrderedDict([(item, record[item]) for item in record if item in nist_database.keys()]) record["molecular_formula"] = composition_to_string(comp) record["adduct"] = adduct + if isotope: + record["isotope"] = isotope + else: + record["isotope"] = "" + if neutral_loss: + record["neutral_loss"] = neutral_loss + else: + record["neutral_loss"] = "" records = _remove_elements_from_compositions(records, keep=["C", "H", "N", "O", "P", "S"]) - values.extend([list(record.values()) for record in records]) + mf_records.extend(records) + + return mf_records + + rows = _select_unions_peak_patterns(cursor) + names_to_skip = [] + + for row in rows: + + if row["peak_id_a"] in names_to_skip and row["peak_id_b"] in names_to_skip and not row["adduct_label"]: + continue + + records = [] + match = None + + if row["adduct_label"]: + if row["mz_ratio"] > 1: # oligomers + match = difflib.get_close_matches(row["adduct_label"], lib_adducts.lib.keys(), n=1) + adducts = [match[0]] + else: + adducts = [str(row["adduct_label"])] + else: + if row["iso_charge"]: + adducts = [] + for a in lib_adducts.lib.keys(): + if lib_adducts.lib[a]["charge"] == row["iso_charge"]: + adducts.append(a) + else: + adducts = lib_adducts.lib.keys() + + index_name = peaklist["name"].tolist().index(str(row["peak_id_a"])) + mz = peaklist["mz"].iloc[index_name] + + if max_mz is not None and mz > max_mz: + continue + + # (source, peak_id, mz, ppm, adducts, isotope, neutral_loss, n_oligo, exact_mass_diff=0.0, rules=True): + records_a = _select_mfs(source, row["peak_id_a"], mz, ppm, adducts, row["iso_label_a"], row["nl_label"], row["mz_ratio"], 0.0, rules=rules) + if row["nl_label"] is not None: # Neutral Loss + + if row["peak_id_a_nl"] is not None: + + if row["peak_id_a_nl"] == row["peak_id_a"]: + peak_id_nl = row["peak_id_aa_nl"] + nl_exact_mass_diff = -row["nl_exact_mass_diff"] + a_nl_exact_mass_diff = row["nl_exact_mass_diff"] + else: + peak_id_nl = row["peak_id_a_nl"] + nl_exact_mass_diff = row["nl_exact_mass_diff"] + a_nl_exact_mass_diff = -row["nl_exact_mass_diff"] + + records_nl = _select_mfs(source, row["peak_id_a"], mz, ppm, adducts, row["iso_label_a"], row["nl_label"], row["mz_ratio"], nl_exact_mass_diff, rules=rules) + records_a.extend(records_nl) # Neutral Loss + + index_name = peaklist["name"].tolist().index(str(peak_id_nl)) + mz = peaklist["mz"].iloc[index_name] + records_a_nl = _select_mfs(source, peak_id_nl, mz, ppm, adducts, row['iso_label_a'], row["nl_label"], row["mz_ratio"], 0.0, rules=rules) # Neutral Loss + records_nl = _select_mfs(source, peak_id_nl, mz, ppm, adducts, row['iso_label_a'], row["nl_label"], row["mz_ratio"], a_nl_exact_mass_diff, rules=rules)# Neutral Loss + records_a_nl.extend(records_nl) # Neutral Loss + + for record_a in reversed(records_a): # list changes during iteration + for record_a_nl in records_a_nl: + if record_a["molecular_formula"] == record_a_nl["molecular_formula"]: + records_a.append(record_a_nl) + + names_to_skip.append(peak_id_nl) + + if row["peak_id_b"]: + + index_name = peaklist["name"].tolist().index(str(row["peak_id_b"])) + mz = peaklist["mz"].iloc[index_name] + + # if row["flag"]: # different adducts - label_a and label_b? + # exact_mass_diff = 0.0 # adduct == isotope e.g. K / (41K) and [M+K]+ / [M+(41K)]+ + # else: + exact_mass_diff = float(row["iso_exact_mass_diff"]) + + records_b = _select_mfs(source, row["peak_id_b"], mz, ppm, adducts, row["iso_label_b"], row["nl_label"], row["mz_ratio"], exact_mass_diff, rules=rules) + + if row["nl_label"] is not None: # Neutral Loss + + if row["peak_id_b_nl"] is not None: + + if row["peak_id_b_nl"] == row["peak_id_b"]: + peak_id_nl = row["peak_id_bb_nl"] + nl_exact_mass_diff = -(row["nl_exact_mass_diff"] - exact_mass_diff) + b_nl_exact_mass_diff = exact_mass_diff + row["nl_exact_mass_diff"] + else: + peak_id_nl = row["peak_id_b_nl"] + nl_exact_mass_diff = (row["nl_exact_mass_diff"] + exact_mass_diff) + b_nl_exact_mass_diff = -row["nl_exact_mass_diff"] + exact_mass_diff + + records_nl = _select_mfs(source, row["peak_id_b"], mz, ppm, adducts, row["iso_label_b"], row["nl_label"], row["mz_ratio"], nl_exact_mass_diff, rules=rules) + records_b.extend(records_nl) # Neutral Loss + + index_name = peaklist["name"].tolist().index(str(peak_id_nl)) + mz = peaklist["mz"].iloc[index_name] + records_b_nl = _select_mfs(source, peak_id_nl, mz, ppm, adducts, row['iso_label_b'], row["nl_label"], row["mz_ratio"], exact_mass_diff, rules=rules) # Neutral Loss + records_nl = _select_mfs(source, peak_id_nl, mz, ppm, adducts, row['iso_label_b'], row["nl_label"], row["mz_ratio"], b_nl_exact_mass_diff, rules=rules) # Neutral Loss + records_b_nl.extend(records_nl) # Neutral Loss + + for record_b in reversed(records_b): + for record_b_nl in records_b_nl: + if record_b["molecular_formula"] == record_b_nl["molecular_formula"]: + records_b.append(record_b_nl) + + names_to_skip.append(peak_id_nl) + + for record_a in records_a: + for record_b in records_b: + if record_a["molecular_formula"] == record_b["molecular_formula"]: + if record_a not in records: + records.append(record_a) + if record_b not in records: + records.append(record_b) + + names_to_skip.append(row["peak_id_a"]) + names_to_skip.append(row["peak_id_b"]) + else: + names_to_skip.append(row["peak_id_a"]) + records.extend(records_a) + + if len(records) > 0: + + if match: + for record in records: + record["adduct"] = row["adduct_label"] + + sql_str = """INSERT OR IGNORE INTO molecular_formulae ({}) VALUES (:{}) + """.format(",".join(map(str, records[0].keys())), ", :".join(map(str, records[0].keys()))) + cursor.executemany(sql_str, records) + conn.commit() + + cursor.execute("select id, molecular_formula from molecular_formulae") + mfs_subset = cursor.fetchall() + + for i in range(len(peaklist.iloc[:, 0])): + + mz = float(peaklist["mz"].iloc[i]) + name = str(peaklist["name"].iloc[i]) + + if name in names_to_skip and patterns: + continue + + if max_mz is not None and mz > max_mz: + continue + + records = _select_mfs(source, name, mz, ppm, lib_adducts.lib.keys(), None, None, 1.0, 0.0, rules=rules) + + records_filt = [record for record in records if (record["id"], record["molecular_formula"]) not in mfs_subset] + if len(records_filt) > 0: + sql_str = """INSERT INTO molecular_formulae ({}) VALUES (:{}) + """.format(",".join(map(str, records_filt[0].keys())), ", :".join(map(str, records_filt[0].keys()))) + cursor.executemany(sql_str, records_filt) + + if source != "api": + conn_mem.close() - time.sleep(0.02) - if len(values) > 0: - cursor.executemany("""insert into molecular_formulae ({}) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) - """.format(",".join(map(str, list(record.keys())))), values) conn.commit() conn.close() return -def annotate_compounds(peaklist, lib_adducts, ppm, db_out, db_name, db_in=""): +def annotate_compounds(peaklist, lib_adducts, ppm, db_out, db_name, patterns=True, db_in="", rt_tol=None): if db_in is None or db_in == "": + conn_cpds = None path_dbs = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'databases') - conn_local = None for db_local in os.listdir(path_dbs): if db_name == db_local.replace(".sql.gz", ""): with gzip.GzipFile(os.path.join(path_dbs, db_local), mode='rb') as db_dump: - conn_local = sqlite3.connect(":memory:") - cursor_local = conn_local.cursor() - cursor_local.executescript(db_dump.read().decode('utf-8')) - conn_local.commit() + conn_cpds = sqlite3.connect(":memory:") + cursor_cpds = conn_cpds.cursor() + cursor_cpds.executescript(db_dump.read().decode('utf-8')) + conn_cpds.commit() - cursor_local.execute("CREATE INDEX idx_exact_mass ON {} (exact_mass)".format(db_name.replace(".sql.gz", ""))) + cursor_cpds.execute("CREATE INDEX idx_exact_mass ON {} (exact_mass)".format(db_name.replace(".sql.gz", ""))) - cursor_local.execute("SELECT name FROM sqlite_master WHERE type='table'") - if (db_name.replace(".sql.gz", ""), ) not in cursor_local.fetchall(): + cursor_cpds.execute("SELECT name FROM sqlite_master WHERE type='table'") + if (db_name.replace(".sql.gz", ""), ) not in cursor_cpds.fetchall(): raise ValueError("Database {} not available".format(db_name)) break - if conn_local is None: + if conn_cpds is None: raise ValueError("Database {} not available".format(db_name)) elif os.path.isfile(db_in): with open(db_in, 'rb') as fd: if fd.read(100)[:16].decode() == 'SQLite format 3\x00': - conn_local = sqlite3.connect(db_in) - cursor_local = conn_local.cursor() - cursor_local.execute("SELECT name FROM sqlite_master WHERE type='table'") - if not (db_name, ) in cursor_local.fetchall(): + conn_cpds = sqlite3.connect(db_in) + cursor_cpds = conn_cpds.cursor() + cursor_cpds.execute("SELECT name FROM sqlite_master WHERE type='table'") + if not (db_name, ) in cursor_cpds.fetchall(): raise ValueError("Database {} not available".format(db_name)) else: - conn_mem = DbCompoundsMemory(db_in) + cursor_cpds = DbCompoundsMemory(db_in, lib_adducts=lib_adducts) + else: raise IOError("[Errno 2] No such file or directory: {}".format(db_in)) @@ -717,11 +1184,14 @@ def annotate_compounds(peaklist, lib_adducts, ppm, db_out, db_name, db_in=""): cursor.execute("DROP TABLE IF EXISTS compounds_{}".format(db_name)) cursor.execute("""CREATE TABLE compounds_{} ( - id TEXT DEFAULT NULL, + id TEXT NOT NULL, mz REAL DEFAULT NULL, - exact_mass REAL DEFAULT NULL, + exact_mass REAL NOT NULL, ppm_error REAL DEFAULT NULL, - adduct TEXT DEFAULT NULL, + rt_diff REAL DEFAULT NULL, + adduct TEXT NOT NULL, + isotope TEXT DEFAULT '', + neutral_loss TEXT DEFAULT '', C INTEGER DEFAULT 0, H INTEGER DEFAULT 0, N INTEGER DEFAULT 0, @@ -730,38 +1200,229 @@ def annotate_compounds(peaklist, lib_adducts, ppm, db_out, db_name, db_in=""): S INTEGER DEFAULT 0, CHNOPS INTEGER DEFAULT NULL, molecular_formula TEXT DEFAULT NULL, - compound_id TEXT DEFAULT NULL, + compound_id TEXT NOT NULL, compound_name TEXT DEFAULT NULL, - primary key (id, compound_id, adduct) + primary key (id, compound_id, adduct, isotope, neutral_loss) );""".format(db_name)) + conn.commit() + + def _select_compounds(db_cursor, peak_id, mz, ppm, adducts, isotope, neutral_loss, min_rt=None, max_rt=None, n_oligo=1, exact_mass_isotope=0.0): + + cpd_records = [] + + min_mz, max_mz = calculate_mz_tolerance(mz, ppm) + + if min_rt and max_rt: + cpd_records = db_cursor.select_compounds(min_mz, max_mz, min_rt=min_rt, max_rt=max_rt) + else: + for adduct in adducts: + + if min_mz - lib_adducts.lib[adduct]["mass"] < 0.5: + continue + + if isinstance(db_cursor, sqlite3.Cursor): + col_names = ["compound_id", "C", "H", "N", "O", "P", "S", "CHNOPS", + "molecular_formula", "compound_name", "exact_mass"] + db_cursor.execute("""SELECT id, C, H, N, O, P, S, CHNOPS, + molecular_formula, name, exact_mass + from {} where exact_mass >= {} and exact_mass <= {} + """.format(db_name, + (min_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope) * + lib_adducts.lib[adduct]["charge"] / n_oligo, + (max_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope) * + lib_adducts.lib[adduct]["charge"] / n_oligo)) + cpd_records_subset = [OrderedDict(zip(col_names, list(record))) for record in db_cursor.fetchall()] + + else: + if min_rt and max_rt: + cpd_records_subset = db_cursor.select_compounds(min_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope, + max_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope, + min_rt=min_rt, max_rt=max_rt) + else: + cpd_records_subset = db_cursor.select_compounds((min_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope) * + lib_adducts.lib[adduct]["charge"] / n_oligo, + (max_mz - lib_adducts.lib[adduct]["mass"] - exact_mass_isotope) * + lib_adducts.lib[adduct]["charge"] / n_oligo) + for record in cpd_records_subset: + record["exact_mass"] = (record["exact_mass"] / lib_adducts.lib[adduct]["charge"] * n_oligo) + \ + (float(lib_adducts.lib[adduct]["mass"]) + exact_mass_isotope) + record["adduct"] = adduct + + cpd_records.extend(cpd_records_subset) + + for record in cpd_records: + record["id"] = peak_id + record["mz"] = mz + record["ppm_error"] = calculate_ppm_error(mz, record["exact_mass"]) + if isotope: + record["isotope"] = isotope + else: + record["isotope"] = "" # NULL/None is not used here as isotope is part of the primary key + if neutral_loss: + record["neutral_loss"] = neutral_loss + else: + record["neutral_loss"] = "" # NULL/None is not used here as isotope is part of the primary key + if "retention_time" in record: + record["rt_diff"] = rt - float(record["retention_time"]) + del record["retention_time"] + + return cpd_records + + rows = _select_unions_peak_patterns(cursor) + + names_to_skip = [] + for row in rows: + + if row["peak_id_a"] in names_to_skip and row["peak_id_b"] in names_to_skip and not row["adduct_label"]: + continue + + records = [] + match = None + + if row["adduct_label"]: + if row["mz_ratio"] > 1: # oligomers + match = difflib.get_close_matches(row["adduct_label"], lib_adducts.lib.keys(), n=1) + adducts = [match[0]] + else: + adducts = [str(row["adduct_label"])] + else: + if row["iso_charge"]: + adducts = [] + for a in lib_adducts.lib.keys(): + if int(lib_adducts.lib[a]["charge"]) == int(row["iso_charge"]): + adducts.append(a) + else: + adducts = lib_adducts.lib.keys() + + index_name = peaklist["name"].tolist().index(str(row["peak_id_a"])) + mz = peaklist["mz"].iloc[index_name] + + records_a = _select_compounds(cursor_cpds, row["peak_id_a"], mz, ppm, adducts, row['iso_label_a'], row["nl_label"], None, None, row["mz_ratio"], 0.0) + if row["nl_label"] is not None: # Neutral Loss + + if row["peak_id_a_nl"] is not None: + + if row["peak_id_a_nl"] == row["peak_id_a"]: + peak_id_nl = row["peak_id_aa_nl"] + nl_exact_mass_diff = -row["nl_exact_mass_diff"] + a_nl_exact_mass_diff = row["nl_exact_mass_diff"] + else: + peak_id_nl = row["peak_id_a_nl"] + nl_exact_mass_diff = row["nl_exact_mass_diff"] + a_nl_exact_mass_diff = -row["nl_exact_mass_diff"] + + records_nl = _select_compounds(cursor_cpds, row["peak_id_a"], mz, ppm, adducts, row['iso_label_a'], row["nl_label"], None, None, row["mz_ratio"], nl_exact_mass_diff) # Neutral Loss + records_a.extend(records_nl) # Neutral Loss + + index_name = peaklist["name"].tolist().index(str(peak_id_nl)) + mz = peaklist["mz"].iloc[index_name] + records_a_nl = _select_compounds(cursor_cpds, peak_id_nl, mz, ppm, adducts, row['iso_label_a'], row["nl_label"], None, None, row["mz_ratio"], 0.0) # Neutral Loss + records_nl = _select_compounds(cursor_cpds, peak_id_nl, mz, ppm, adducts, row['iso_label_a'], row["nl_label"], None, None, row["mz_ratio"], a_nl_exact_mass_diff) # Neutral Loss + records_a_nl.extend(records_nl) # Neutral Loss + + for record_a in reversed(records_a): # list changes during iteration + for record_a_nl in records_a_nl: + if record_a["compound_id"] == record_a_nl["compound_id"]: + records_a.append(record_a_nl) + + names_to_skip.append(peak_id_nl) + + if row["peak_id_b"]: + + index_name = peaklist["name"].tolist().index(str(row["peak_id_b"])) + mz = peaklist["mz"].iloc[index_name] + + #if row["flag"]: # different adducts - label_a and label_b? + # exact_mass_diff = 0.0 # adduct == isotope e.g. K / (41K) and [M+K]+ / [M+(41K)]+ + #else: + exact_mass_diff = float(row["iso_exact_mass_diff"]) + + records_b = _select_compounds(cursor_cpds, row["peak_id_b"], mz, ppm, adducts, row['iso_label_b'], row["nl_label"], None, None, row["mz_ratio"], exact_mass_diff) + if row["nl_label"] is not None: # Neutral Loss + + if row["peak_id_b_nl"] is not None: + + if row["peak_id_b_nl"] == row["peak_id_b"]: + peak_id_nl = row["peak_id_bb_nl"] + nl_exact_mass_diff = -(row["nl_exact_mass_diff"] - exact_mass_diff) + b_nl_exact_mass_diff = exact_mass_diff + row["nl_exact_mass_diff"] + else: + peak_id_nl = row["peak_id_b_nl"] + nl_exact_mass_diff = (row["nl_exact_mass_diff"] + exact_mass_diff) + b_nl_exact_mass_diff = -row["nl_exact_mass_diff"] + exact_mass_diff + + records_nl = _select_compounds(cursor_cpds, row["peak_id_b"], mz, ppm, adducts, row['iso_label_b'], row["nl_label"], None, None, row["mz_ratio"], nl_exact_mass_diff) # Neutral Loss + records_b.extend(records_nl) # Neutral Loss + + index_name = peaklist["name"].tolist().index(str(peak_id_nl)) + mz = peaklist["mz"].iloc[index_name] + + records_b_nl = _select_compounds(cursor_cpds, peak_id_nl, mz, ppm, adducts, row['iso_label_b'], row["nl_label"], None, None, row["mz_ratio"], exact_mass_diff) # Neutral Loss + records_nl = _select_compounds(cursor_cpds, peak_id_nl, mz, ppm, adducts, row['iso_label_b'], row["nl_label"], None, None, row["mz_ratio"], b_nl_exact_mass_diff) # Neutral Loss + records_b_nl.extend(records_nl) # Neutral Loss + for record_b in reversed(records_b): # list changes during iteration + for record_b_nl in records_b_nl: + if record_b["compound_id"] == record_b_nl["compound_id"]: + records_b.append(record_b_nl) + + names_to_skip.append(peak_id_nl) + + for record_a in records_a: + for record_b in records_b: + if record_a["compound_id"] == record_b["compound_id"]: + if record_a not in records: + records.append(record_a) + if record_b not in records: + records.append(record_b) + + names_to_skip.append(row["peak_id_b"]) + else: + names_to_skip.append(row["peak_id_a"]) + records.extend(records_a) + + if len(records) > 0: + if match: + for record in records: + record["adduct"] = row["adduct_label"] + + sql_str = """INSERT OR IGNORE INTO compounds_{} ({}) VALUES (:{}) + """.format(db_name, ",".join(map(str, records[0].keys())), ", :".join(map(str, records[0].keys()))) + cursor.executemany(sql_str, records) + conn.commit() + + cursor.execute("select id, compound_id from compounds_{}".format(db_name)) + cpds_subset = cursor.fetchall() for i in range(len(peaklist.iloc[:, 0])): + mz = float(peaklist["mz"].iloc[i]) name = str(peaklist["name"].iloc[i]) - min_tol, max_tol = calculate_mz_tolerance(mz, ppm) - - for adduct in lib_adducts.lib: + rt = float(peaklist["rt"].iloc[i]) - if mz - lib_adducts.lib[adduct] > 0.5: + records = [] + if rt_tol: + min_rt, max_rt = calculate_rt_tolerance(rt, rt_tol) + else: + min_rt, max_rt = None, None - if "conn_mem" in locals(): - records = conn_mem.select_compounds(min_tol - lib_adducts.lib[adduct], max_tol - lib_adducts.lib[adduct]) - elif "conn_local" in locals(): - col_names = ["compound_id", "C", "H", "N", "O", "P", "S", "CHNOPS", "molecular_formula", "compound_name", "exact_mass"] - cursor_local.execute("""SELECT id, C, H, N, O, P, S, CHNOPS, - molecular_formula, name, exact_mass - from {} where exact_mass >= {} and exact_mass <= {} - """.format(db_name, min_tol - lib_adducts.lib[adduct], max_tol - lib_adducts.lib[adduct])) - records = [OrderedDict(zip(col_names, list(record))) for record in cursor_local.fetchall()] + if min_rt and max_rt: + records = _select_compounds(cursor_cpds, name, mz, ppm, None, None, None, min_rt, max_rt, 1.0, 0.0) + else: + if name in names_to_skip and patterns: + continue + records = _select_compounds(cursor_cpds, name, mz, ppm, lib_adducts.lib.keys(), None, None, None, None, 1.0, 0.0) + + records_filt = [record for record in records if (record["id"], record["compound_id"]) not in cpds_subset] + if len(records_filt) > 0: + sql_str = """INSERT INTO compounds_{} ({}) VALUES (:{}) + """.format(db_name, ",".join(map(str, records_filt[0].keys())), ", :".join(map(str, records_filt[0].keys()))) + cursor.executemany(sql_str, records_filt) + + if isinstance(cursor_cpds, sqlite3.Cursor): + conn_cpds.close() + else: + cursor_cpds.close() - for record in records: - record["id"] = name - record["exact_mass"] = record["exact_mass"] + float(lib_adducts.lib[adduct]) - record["mz"] = mz - record["ppm_error"] = calculate_ppm_error(mz, record["exact_mass"]) - record["adduct"] = adduct - cursor.execute("""insert into compounds_{} ({}) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) - """.format(db_name, ",".join(map(str, list(record.keys())))), list(record.values())) conn.commit() conn.close() return @@ -904,13 +1565,13 @@ def annotate_drug_products(peaklist, db_out, list_smiles, lib_adducts, ppm, phas min_tol, max_tol = calculate_mz_tolerance(mz, ppm) for adduct in lib_adducts.lib: - if mz - lib_adducts.lib[adduct] > 0.5: + if mz - lib_adducts.lib[adduct]["mass"] > 0.5: - records = conn_mem.select(min_tol - lib_adducts.lib[adduct], max_tol - lib_adducts.lib[adduct]) + records = conn_mem.select(min_tol - lib_adducts.lib[adduct]["mass"], max_tol - lib_adducts.lib[adduct]["mass"]) for record in records: record["id"] = name - record["exact_mass"] = record["exact_mass"] + float(lib_adducts.lib[adduct]) + record["exact_mass"] = record["exact_mass"] + float(lib_adducts.lib[adduct]["mass"]) record["mz"] = mz record["ppm_error"] = calculate_ppm_error(mz, record["exact_mass"]) record["adduct"] = adduct @@ -928,23 +1589,28 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig cursor.execute("DROP TABLE IF EXISTS peaklist") df[["name", "mz", "rt", "intensity"]].to_sql("peaklist", conn, index=False) + cursor.execute("CREATE INDEX idx_name on peaklist (name)") cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") tables = cursor.fetchall() - tables_amo = ["adduct_pairs", "multiple_charged_ions", "oligomers", "isotopes"] + tables_pp = ["adduct_pairs", "oligomers", "isotopes", "neutral_losses"] # TODO: make more efficient tables_to_union = [] for tn in tables: - if tn[0] in tables_amo: + if tn[0] in tables_pp: # TODO - add addtional tables (neutral losses) tables_to_union.append(str(tn[0])) - if len(tables_to_union) > 0 and ("groups",) in tables: + flag_groups = ("groups",) in tables + + if len(tables_to_union) > 0 and flag_groups: if len(tables_to_union) > 1: query = "select peak_id_a, peak_id_b from " query += " union select peak_id_a, peak_id_b from ".join(map(str, tables_to_union)) elif len(tables_to_union) == 1: query = "select peak_id_a, peak_id_b from {}".format(tables_to_union[0]) + else: + query = "" cursor.execute(query) records = [(str(record[0]), str(record[1])) for record in cursor.fetchall()] @@ -959,7 +1625,7 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig for n in g.nodes(): to_add.append([i+1, n, g.degree(n), g.number_of_nodes(), g.number_of_edges()]) - cursor.execute("""CREATE TEMPORARY TABLE sub_groups ( + cursor.execute("""CREATE TEMP TABLE sub_groups ( sub_group_id INTEGER DEFAULT NULL, peak_id INTEGER DEFAULT NULL, degree INTEGER DEFAULT NULL, @@ -967,85 +1633,94 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig n_edges INTEGER DEFAULT NULL, PRIMARY KEY (sub_group_id, peak_id));""") - cursor.executemany("""insert into sub_groups (sub_group_id, peak_id, degree, n_nodes, n_edges) - values (?,?,?,?,?)""", to_add) + cursor.executemany("""INSERT INTO sub_groups (sub_group_id, peak_id, degree, n_nodes, n_edges) + VALUES (?,?,?,?,?)""", to_add) - columns_groupings = """peak_id, group_id, degree_cor, sub_group_id, degree, n_nodes, n_edges""" + columns_groupings = ["peak_id", "group_id", "degree_cor", "sub_group_id", "degree", "n_nodes", "n_edges"] - query_groupings = """select distinct gr.peak_id as peak_id, gr.group_id as group_id, degree_cor, - sub_groups.sub_group_id as sub_group_id, sub_groups.degree as degree, - sub_groups.n_nodes as n_nodes, sub_groups.n_edges as n_edges - from (select group_id, peak_id_a as peak_id, degree_a as degree_cor from groups - union - select group_id, peak_id_b as peak_id, degree_b as degree_cor from groups) AS gr + query_groupings = """SELECT DISTINCT gr.peak_id AS peak_id, gr.group_id AS group_id, degree_cor, + sub_groups.sub_group_id AS sub_group_id, sub_groups.degree AS degree, + sub_groups.n_nodes AS n_nodes, sub_groups.n_edges AS n_edges + FROM (SELECT group_id, peak_id_a AS peak_id, degree_a AS degree_cor FROM groups + UNION + SELECT group_id, peak_id_b AS peak_id, degree_b AS degree_cor FROM groups) AS gr LEFT JOIN sub_groups ON gr.peak_id = sub_groups.peak_id""" else: query_groupings = "" - columns_groupings = "" + columns_groupings = [] - flag_amo = len([tl for tl in tables_amo if (tl,) in tables]) > 0 + columns_adducts_oligo, columns_isotopes, columns_nls = [], [], [] + query_adducts_oligo, query_isotopes, query_nls = "", "", "" + + # flag_pp = len([tl for tl in tables_pp if (tl,) in tables]) > 0 + flag_adducts_oligo = ("adduct_pairs",) in tables or ("oligomers",) in tables flag_isotopes = ("isotopes",) in tables + flag_neutral_losses = ("neutral_losses",) in tables - if flag_amo: + if flag_adducts_oligo: sub_queries = [] - for tl in tables_amo: + for tl in tables_pp: if (tl,) in tables: if tl == "adduct_pairs": - sub_queries.append("""select peak_id_a as peak_id_amo, label_a as label, 1 as charge, 1 as oligomer from adduct_pairs - union - select peak_id_b as peak_id_amo, label_b as label, 1 as charge, 1 as oligomer from adduct_pairs""") - elif tl == "multiple_charged_ions": - sub_queries.append("""select peak_id_a as peak_id_amo, label_a as label, charge_a as charge, 1 as oligomer from multiple_charged_ions + sub_queries.append("""select peak_id_a AS peak_id_pp, label_a AS label, charge_a AS charge, 1 AS oligomer from adduct_pairs union - select peak_id_b as peak_id_amo, label_b as label, charge_b as charge, 1 as oligomer from multiple_charged_ions""") + select peak_id_b AS peak_id_pp, label_b AS label, charge_b AS charge, 1 AS oligomer from adduct_pairs""") elif tl == "oligomers": - sub_queries.append("""select peak_id_a as peak_id_amo, label_a as label, 1 as charge, 1 as oligomer from oligomers + sub_queries.append("""select peak_id_a AS peak_id_pp, label_a AS label, charge_a AS charge, 1 AS oligomer from oligomers union - select peak_id_b as peak_id_amo, label_b as label, 1 as charge, round(mz_ratio) as oligomer from oligomers""") - columns_amo = ", label, charge, oligomer" - query_amo = " union ".join(map(str, sub_queries)) + select peak_id_b AS peak_id_pp, label_b AS label, charge_b AS charge, cast(round(mz_ratio) AS integer) AS oligomer from oligomers""") + columns_adducts_oligo = ["label", "charge", "oligomer"] + query_adducts_oligo = " union ".join(map(str, sub_queries)) if flag_isotopes: - columns_isotopes = ", isotope_labels_a, isotope_ids, isotope_labels_b, atoms" - query_isotopes = """SELECT peak_id_a, group_concat(label_a) as isotope_labels_a, - group_concat(peak_id_b, ",") as isotope_ids, - group_concat(label_b) as isotope_labels_b, group_concat(round(atoms,1), ",") as atoms - from (select peak_id_a, label_a, peak_id_b, label_b, atoms, ppm_error from isotopes + columns_isotopes = ["isotope_labels_a", "isotope_ids", "isotope_labels_b", "isotope_charges", "atoms"] + query_isotopes = """SELECT peak_id_a as isotope_peak_id_a, group_concat(label_a) AS isotope_labels_a, + group_concat(peak_id_b, ",") AS isotope_ids, + group_concat(label_b) AS isotope_labels_b, group_concat(charge) AS isotope_charges, + group_concat(round(atoms,1), ",") AS atoms + from (select peak_id_a, label_a, peak_id_b, label_b, charge, atoms, ppm_error from isotopes union - select peak_id_b as peak_id_a, label_b as label_a, - peak_id_a as peak_id_b, label_a as label_b, atoms, ppm_error - from isotopes - ) group by peak_id_a""" + select peak_id_b AS peak_id_a, label_b AS label_a, + peak_id_a AS peak_id_b, label_a AS label_b, charge, atoms, ppm_error + from isotopes + ) group by isotope_peak_id_a""" + + if flag_neutral_losses: + columns_nls = ["nl_labels", "nl_ids"] + query_nls = """SELECT peak_id_a as nl_peak_id_a, group_concat(label) AS nl_labels, + group_concat(peak_id_b, ",") AS nl_ids + from (select peak_id_a, label, peak_id_b, ppm_error + from neutral_losses + union + select peak_id_b as peak_id_a, label, peak_id_a AS peak_id_b, ppm_error + from neutral_losses + ) group by nl_peak_id_a""" cursor.execute("DROP TABLE IF EXISTS peak_labels") - if flag_amo and flag_isotopes: - query = "CREATE TABLE peak_labels as " - if query_groupings != "": - query += "SELECT {}{}{} from """.format(columns_groupings, columns_amo, columns_isotopes) - query += "({}) LEFT JOIN ({}) ON peak_id = peak_id_amo LEFT JOIN ({}) ON peak_id = peak_id_a".format(query_groupings, query_amo, query_isotopes) - else: - query += "SELECT peaklist.name as peak_id{}{} from ".format(columns_amo, columns_isotopes) - query += "peaklist LEFT JOIN ({}) ON peaklist.name = peak_id LEFT JOIN ({}) ON peaklist.name = peak_id_a".format(query_amo, query_isotopes) - query = query.replace("peak_id_amo", "peak_id") - cursor.execute(query) - elif flag_isotopes and not flag_amo: - query = "CREATE TABLE peak_labels as " - if query_groupings != "": - query += "select {}{} from ".format(columns_groupings, columns_isotopes) - query += "({}) LEFT JOIN ({}) ON peak_id = peak_id_a".format(query_groupings, query_isotopes) - else: - query += query_isotopes - cursor.execute(query) - elif not flag_isotopes and flag_amo: - query = "CREATE TABLE peak_labels as " - if query_groupings != "": - query += """select {}{} from """.format(columns_groupings, columns_amo) - query += """({}) LEFT JOIN ({}) ON peak_id = peak_id_amo""".format(query_groupings, query_amo) + sql_str_index = "CREATE INDEX idx_peak_id ON peak_labels (peak_id)" + + if flag_adducts_oligo or flag_isotopes or flag_neutral_losses: + columns = ", ".join(map(str, columns_groupings + columns_adducts_oligo + columns_isotopes + columns_nls)) + query = "CREATE TABLE peak_labels AS " + if query_groupings: + query += "SELECT {} FROM ({})".format(columns, query_groupings) else: - query += query_amo.replace("peak_id_amo", "peak_id") + query += "SELECT peaklist.name AS peak_id, {} FROM peaklist ".format(columns) + + if flag_adducts_oligo: + query += " LEFT JOIN ({}) ON peak_id = peak_id_pp ".format(query_adducts_oligo) + if flag_isotopes: + query += " LEFT JOIN ({}) ON peak_id = isotope_peak_id_a ".format(query_isotopes) + if flag_neutral_losses: + query += " LEFT JOIN ({}) ON peak_id = nl_peak_id_a ".format(query_nls) + cursor.execute(query) - if flag_amo: + cursor.execute(sql_str_index) + + if flag_adducts_oligo: + + # Add dummy row for features to make joint statement possible where label and adduct do not match. cursor.execute('PRAGMA table_info("peak_labels")') columns = cursor.fetchall() @@ -1057,9 +1732,8 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig else: columns_to_select.append(cn[1]) - query = "INSERT INTO peak_labels" - query += " SELECT {} FROM peak_labels where label is not NULL".format(", ".join(map(str, columns_to_select))) - + query = """INSERT INTO peak_labels + SELECT {} FROM peak_labels where label is not NULL""".format(", ".join(map(str, columns_to_select))) cursor.execute(query) conn.commit() @@ -1068,62 +1742,69 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig flag_mf = ("molecular_formulae",) in tables flag_cpd = len(cpd_tables) > 0 - columns = ["exact_mass", "ppm_error", "adduct", "C", "H", "N", "O", "P", "S", "molecular_formula"] + columns = ["exact_mass", "ppm_error", "rt_diff", "adduct", "C", "H", "N", "O", "P", "S", "molecular_formula"] + + cpd_t_flags = [] + for i in range(0, len(cpd_tables)): + b = [0] * len(cpd_tables) + b[i] = 1 + cpd_t_flags.append(["{} AS {}".format(flag[0], flag[1]) for flag in list(zip(b, cpd_tables))]) if len(cpd_tables) > 1: - unions_cpd_sub_query = "LEFT JOIN (select * from " - unions_cpd_sub_query += " union select * from ".join(map(str, cpd_tables)) - unions_cpd_sub_query += ") as ct " + unions_cpd_sub_query = "LEFT JOIN (select *, {} from {}".format(", ".join(map(str, cpd_t_flags[0])), cpd_tables[0]) + for i, cpd_t in enumerate(cpd_tables[1:], start=1): + unions_cpd_sub_query += " union select *, {} from {}".format(", ".join(map(str, cpd_t_flags[i])), cpd_t) + unions_cpd_sub_query += ") AS ct " elif len(cpd_tables) == 1: - unions_cpd_sub_query = "LEFT JOIN (select * from {}) as ct".format(cpd_tables[0]) + unions_cpd_sub_query = "LEFT JOIN (select *, 1 AS {} from {}) AS ct".format(cpd_tables[0], cpd_tables[0]) else: unions_cpd_sub_query = "" if flag_mf and flag_cpd: - unions_cpd_query = "CREATE TEMP TABLE compounds AS select * from " - unions_cpd_query += " union select * from ".join(map(str, cpd_tables)) + # unions_cpd_query = "CREATE TEMP TABLE compounds AS select * from " + # unions_cpd_query += " union select * from ".join(map(str, cpd_tables)) + + unions_cpd_query = "CREATE TEMP TABLE compounds AS select *, {} from {}".format(", ".join(map(str, cpd_t_flags[0])), cpd_tables[0]) + for i, cpd_t in enumerate(cpd_tables[1:], start=1): + unions_cpd_query += " union select *, {} from {}".format(", ".join(map(str, cpd_t_flags[i])), cpd_t) cursor.execute(unions_cpd_query) unions_cpd_sub_query = "" query = """CREATE TEMP TABLE mf_cd as - SELECT mf.id, mf.exact_mass, mf.ppm_error, mf.adduct, mf.C, mf.H, mf.N, mf.O, mf.P, mf.S, - mf.molecular_formula, cpds.compound_name, cpds.compound_id - FROM molecular_formulae as mf - LEFT JOIN compounds as cpds + SELECT mf.id, mf.exact_mass, mf.ppm_error, cpds.rt_diff, mf.adduct, + mf.C, mf.H, mf.N, mf.O, mf.P, mf.S, + mf.molecular_formula, cpds.compound_name, cpds.compound_id, NULL AS compound_count, {} + FROM molecular_formulae AS mf + LEFT JOIN compounds AS cpds ON mf.molecular_formula = cpds.molecular_formula AND mf.adduct = cpds.adduct UNION - SELECT cpds.id, cpds.exact_mass, cpds.ppm_error, cpds.adduct, cpds.C, cpds.H, cpds.N, cpds.O, cpds.P, cpds.S, - cpds.molecular_formula, cpds.compound_name, cpds.compound_id - FROM compounds as cpds - LEFT JOIN molecular_formulae as mf + SELECT cpds.id, cpds.exact_mass, cpds.ppm_error, cpds.rt_diff, cpds.adduct, cpds.C, + cpds.H, cpds.N, cpds.O, cpds.P, cpds.S, + cpds.molecular_formula, cpds.compound_name, cpds.compound_id, NULL AS compound_count, {} + FROM compounds AS cpds + LEFT JOIN molecular_formulae AS mf ON mf.molecular_formula = cpds.molecular_formula AND mf.adduct = cpds.adduct - WHERE mf.molecular_formula IS NULL""" + WHERE mf.molecular_formula IS NULL + """.format(", ".join(map(str, ["cpds." + ct for ct in cpd_tables])), + ", ".join(map(str, ["cpds." + ct for ct in cpd_tables]))) cursor.execute(query) - - # mf_cpc_columns = "".join(map(str, [", mf.{} as {}".format(c, c) for c in columns])) - # mf_cpc_columns += ", ct.compound_name as compound_name, ct.compound_id as compound_id" - # unions_cpd_sub_query += " ON mf.molecular_formula = ct.molecular_formula AND mf.adduct = ct.adduct" - # if flag_amo: - # union_mf_sub_query = "LEFT JOIN molecular_formulae AS mf ON (peaklist.name = mf.id and peak_labels.label = mf.adduct)" - # union_mf_sub_query += " OR (peaklist.name = mf.id AND peak_labels.label is NULL and not exists (select 1 from peak_labels where peak_id = mf.id and label = mf.adduct))" - # else: - # union_mf_sub_query = "LEFT JOIN molecular_formulae AS mf ON peaklist.name = mf.id" - - mf_cpc_columns = "".join(map(str, [", mf_cd.{} as {}".format(c, c) for c in columns])) - mf_cpc_columns += ", mf_cd.compound_name as compound_name, mf_cd.compound_id as compound_id" - if flag_amo: + mf_cpc_columns = "".join(map(str, [", mf_cd.{} AS {}".format(c, c) for c in columns])) + mf_cpc_columns += ", mf_cd.compound_name AS compound_name, mf_cd.compound_id AS compound_id, NULL AS compound_count, " + mf_cpc_columns += ", ".join(map(str, cpd_tables)) + if flag_adducts_oligo: union_mf_sub_query = "LEFT JOIN mf_cd ON (peaklist.name = mf_cd.id and peak_labels.label = mf_cd.adduct)" union_mf_sub_query += " OR (peaklist.name = mf_cd.id AND peak_labels.label is NULL and not exists (select 1 from peak_labels where peak_id = mf_cd.id and label = mf_cd.adduct))" else: union_mf_sub_query = "LEFT JOIN mf_cd ON peaklist.name = mf_cd.id" elif not flag_mf and flag_cpd: - mf_cpc_columns = "".join(map(str,[", ct.{} as {}".format(c, c) for c in columns])) - mf_cpc_columns += ", compound_name as compound_name, compound_id as compound_id" - if flag_amo: + mf_cpc_columns = "".join(map(str,[", ct.{} AS {}".format(c, c) for c in columns])) + mf_cpc_columns += ", compound_name AS compound_name, compound_id AS compound_id, NULL AS compound_count, " + mf_cpc_columns += ", ".join(map(str, cpd_tables)) + if flag_adducts_oligo: unions_cpd_sub_query += " ON (peaklist.name = ct.id AND peak_labels.label = adduct)" unions_cpd_sub_query += " OR (peaklist.name = ct.id AND peak_labels.label is NULL and not exists (select 1 from peak_labels where peak_id = ct.id and label = ct.adduct))" else: @@ -1131,8 +1812,8 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig union_mf_sub_query = "" elif flag_mf and not flag_cpd: - mf_cpc_columns = "".join(map(str, [", mf.{} as {}".format(c, c) for c in columns])) - if flag_amo: + mf_cpc_columns = "".join(map(str, [", mf.{} AS {}".format(c, c) for c in columns if c != "rt_diff"])) + if flag_adducts_oligo: union_mf_sub_query = "LEFT JOIN molecular_formulae AS mf" union_mf_sub_query += " ON (peaklist.name = mf.id AND peak_labels.label = mf.adduct)" union_mf_sub_query += " OR (peaklist.name = mf.id AND peak_labels.label is NULL and not exists (select 1 from peak_labels where peak_id = mf.id and label = mf.adduct))" @@ -1162,69 +1843,184 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig pl_columns = "" join_peak_labels = "" - query = """CREATE TABLE summary AS SELECT - peaklist.name, peaklist.mz, peaklist.rt, peaklist.intensity{}{} - FROM peaklist - {} - {} - {} - """.format(pl_columns, mf_cpc_columns, join_peak_labels, union_mf_sub_query, unions_cpd_sub_query) - # ORDER BY peaklist.rt, peaklist.mz + sql_str_order = "ORDER BY peaklist.rowid" + if ".label," in pl_columns: + sql_str_order += ", label is NULL, label" + if "isotope" in pl_columns: + sql_str_order += ", isotope_labels_a is NULL, isotope_labels_a" + if "ppm_error" in mf_cpc_columns: + sql_str_order += ", abs(ppm_error) is NULL, abs(ppm_error)" + if "compound_name" in mf_cpc_columns: + sql_str_order += ", compound_name is NULL, compound_name" + + query = """ + CREATE TABLE summary AS SELECT distinct + peaklist.name, peaklist.mz, peaklist.rt, peaklist.intensity{}{} + FROM peaklist + {} + {} + {} + {} + """.format(pl_columns, mf_cpc_columns, join_peak_labels, union_mf_sub_query, unions_cpd_sub_query, sql_str_order) cursor.execute("DROP TABLE IF EXISTS summary") + # print(query) cursor.execute(query) conn.commit() - columns_to_select = [] - if ("groups",) in tables: - columns_to_select.append("group_id, degree_cor, sub_group_id, degree, n_nodes, n_edges") - if ("adduct_pairs",) in tables or ("oligomers",) in tables or ("multiple_charged_ions",) in tables: - columns_to_select.append("""(select group_concat(label || '::' || charge || '::' || oligomer, '||') - from (select distinct label, charge, oligomer from summary as s where summary.name = s.name) - ) as label_charge_oligomer""") - if ("isotopes",) in tables: - columns_to_select.append("isotope_labels_a, isotope_ids, isotope_labels_b, atoms") + # build where statement to remove dummy rows from the summary table # TODO: refactor code block + cursor.execute('PRAGMA table_info("summary")') + columns_summary = cursor.fetchall() + + where_str = "" + for cn in columns_summary: + if cn[1] in ["label", "isotope_labels_a", "neutral_loss_labels_a", "adduct"]: + where_str += " AND {} is NULL".format(cn[1]) + + query_d = """ + SELECT name + FROM summary + GROUP BY name + HAVING COUNT(name) > 1 + """ + cursor.execute(query_d) + r = cursor.fetchall() + + query_d = """ + SELECT name + FROM summary + WHERE name IS NOT NULL{} + """.format(where_str) + cursor.execute(query_d) + rr = cursor.fetchall() + + query_d = """ + DELETE FROM summary + WHERE name IN ("{}"){} + """.format('","'.join(map(str, [name[0] for name in set(r) & set(rr)])), where_str) + cursor.execute(query_d) + conn.commit() + + if columns_groupings and flag_cpd: + if "sub_group_id" in columns_groupings: + grt = "sub_group_id" + else: + grt = "group_id" + + query = """ + UPDATE summary + SET compound_count = + (SELECT scs.c FROM + (SELECT {}, compound_id, COUNT(DISTINCT name) AS c FROM summary AS s + GROUP BY {}, compound_id) AS scs + WHERE scs.compound_id = summary.compound_id + AND (scs.{} = summary.{} and scs.{} IS NOT NULL AND summary.{} IS NOT NULL) + ) + """.format(grt, grt, grt, grt, grt, grt) + + cursor.execute(query) + conn.commit() + query = """ + UPDATE summary + SET compound_count = 1 + WHERE compound_id is NOT NULL AND {} IS NULL + """.format(grt) + + cursor.execute(query) + conn.commit() + elif flag_cpd: + query = """ + UPDATE summary + SET compound_count = + (SELECT COUNT(DISTINCT name) FROM summary AS s + WHERE s.compound_id = summary.compound_id + AND summary.compound_id IS NOT NULL + ) where summary.compound_id IS NOT NULL + """ + cursor.execute(query) + conn.commit() if single_row: + columns_to_select = [] + if ("groups",) in tables: + columns_to_select.extend(["group_id", "degree_cor", "sub_group_id", "degree", "n_nodes", "n_edges"]) + if ("adduct_pairs",) in tables or ("oligomers",) in tables: + columns_to_select.append("""(select group_concat(label || '::' || charge || '::' || oligomer, '||') + from (select distinct label, charge, oligomer from summary AS s where summary.name = s.name) + ) AS label_charge_oligomer""") + if ("isotopes",) in tables: + columns_to_select.extend(["isotope_labels_a", "isotope_ids", "isotope_labels_b", "isotope_charges", "atoms"]) + if ("neutral_losses",) in tables: + columns_to_select.extend(["nl_labels", "nl_ids"]) + if flag_cpd: if single_column: - columns_to_select.append(""" - group_concat( - molecular_formula || '::' || adduct || '::' || ifnull(compound_name, "None") || '::' || ifnull(compound_id, "None") || '::' || exact_mass || '::' || round(ppm_error, 2) , - '||' - ) as annotation - """) + for cpd_t in cpd_tables: + + cursor.execute("SELECT COUNT(*) FROM {} WHERE rt_diff is not NULL".format(cpd_t)) + if int(cursor.fetchone()[0]) > 0: + rt_col = """|| '::' || ifnull(round(rt_diff, 2), "None")""" + else: + rt_col = "" + + columns_to_select.append(""" + group_concat( + CASE WHEN {} = 1 THEN + molecular_formula || '::' || + adduct || '::' || + compound_name || '::' || + compound_id || '::' || + compound_count || '::' || + exact_mass || '::' || + round(ppm_error, 2) + {} + ELSE NULL END + , '||' + ) AS {} + """.format(cpd_t, rt_col, cpd_t)) else: + cursor.execute("SELECT COUNT(*) FROM summary WHERE rt_diff is not NULL") + if int(cursor.fetchone()[0]) > 0: + rt_col = """, group_concat(ifnull(round(rt_diff, 2), "None"), '||') AS rt_diff""" + else: + rt_col = "" + columns_to_select.append(""" - group_concat(molecular_formula, '||') as molecular_formula, - group_concat(adduct, '||') as adduct, - group_concat(ifnull(compound_name, "None"), '||') as compound_name, - group_concat(ifnull(compound_id, "None"), '||') as compound_id, - group_concat(exact_mass, '||') as exact_mass, - group_concat(round(ppm_error, 2), '||') as ppm_error - """) + group_concat(molecular_formula, '||') AS molecular_formula, + group_concat(adduct, '||') AS adduct, + group_concat(compound_name, '||') AS compound_name, + group_concat(compound_id, '||') AS compound_id, + group_concat(compound_count, '||') AS compound_count, + group_concat(exact_mass, '||') AS exact_mass, + group_concat(round(ppm_error, 2), '||') AS ppm_error + {} + """.format(rt_col)) elif flag_mf: if single_column: columns_to_select.append(""" - group_concat( - molecular_formula || '::' || adduct || '::' || exact_mass || '::' || round(ppm_error, 2) , - '||' - ) as annotation - """) + group_concat( + molecular_formula || '::' || + adduct || '::' || + exact_mass || '::' || + round(ppm_error, 2) , + '||' + ) AS annotation + """) else: columns_to_select.append(""" - group_concat(molecular_formula, '||') as molecular_formula, - group_concat(adduct, '||') as adduct, - group_concat(exact_mass, '||') as exact_mass, - group_concat(round(ppm_error, 2), '||') as ppm_error - """) - - query = """SELECT DISTINCT name, mz, rt, intensity, {} - from summary - GROUP BY NAME - ORDER BY rowid - """.format(", ".join(map(str, columns_to_select))) + group_concat(molecular_formula, '||') AS molecular_formula, + group_concat(adduct, '||') AS adduct, + group_concat(exact_mass, '||') AS exact_mass, + group_concat(round(ppm_error, 2), '||') AS ppm_error + """) + + query = """ + SELECT DISTINCT name, mz, rt, intensity, {} + from summary + GROUP BY NAME + ORDER BY rowid + """.format(", ".join(map(str, columns_to_select))) df_out = pd.read_sql(query, conn) df_out.columns = [name.replace("peaklist.", "").replace("peak_labels.", "") for name in list(df_out.columns.values)] @@ -1234,7 +2030,8 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig df_out["compound_id"] = df_out["compound_id"].replace({"None": ""}) df_out["compound_name"] = df_out["compound_name"].replace({"None": ""}) else: - df_out["annotation"] = df_out["annotation"].replace({"None": ""}) + for cpd_t in cpd_tables: + df_out[cpd_t] = df_out[cpd_t].replace({"None": ""}) else: df_out = pd.read_sql("select * from summary", conn) df_out.columns = [name.replace("peaklist.", "").replace("peak_labels.", "") for name in list(df_out.columns.values)] @@ -1253,5 +2050,14 @@ def summary(df, db, single_row=False, single_column=False, convert_rt=None, ndig elif ndigits_mz is not None: raise ValueError("Provide integer or None for ndigits_mz") + # Workaround for Pandas casting INT fo Float when Nan is present + for c in df_out.columns: + columns = ["charge", "oligomer", "group_id", "degree_cor", "sub_group_id", "degree", "n_nodes", "n_edges", "C", "H", "N", "O", "P", "S"] + columns.extend(cpd_tables) # include compound tables + + if not single_row and not single_column: + if c in columns: + df_out[c] = df_out[c].astype('Int64') + conn.close() return df_out diff --git a/beamspy/data/adducts.txt b/beamspy/data/adducts.txt index 1e82339..e665554 100644 --- a/beamspy/data/adducts.txt +++ b/beamspy/data/adducts.txt @@ -1 +1 @@ -label exact_mass ion_mode [M+H]+ 1.007276 pos [M+Na]+ 22.989221 pos [M+K]+ 38.963158 pos [M-H]- -1.007276 neg [M+Cl]- 34.969401 neg [M+Na-2H]- 20.974668 neg [M+K-2H]- 36.948605 neg [M+Hac-H]- 59.013853 neg \ No newline at end of file +label exact_mass charge ion_mode [M+H]+ 1.007276 1 pos [M+Na]+ 22.989221 1 pos [M+K]+ 38.963158 1 pos [M-H]- -1.007276 1 neg [M+Cl]- 34.969401 1 neg [M+Na-2H]- 20.974668 1 neg [M+K-2H]- 36.948605 1 neg [M+Hac-H]- 59.013853 1 neg \ No newline at end of file diff --git a/beamspy/data/adducts_differences.txt b/beamspy/data/adducts_differences.txt deleted file mode 100644 index 9f2715b..0000000 --- a/beamspy/data/adducts_differences.txt +++ /dev/null @@ -1 +0,0 @@ -label_x label_y mass_difference charge_x charge_y ion_mode [M+H]+ [M+Na]+ 21.981945 1 1 pos \ No newline at end of file diff --git a/beamspy/data/databases/chebi_3star_rel169_v1.sql.gz b/beamspy/data/databases/chebi_3star_rel169_v1.sql.gz deleted file mode 100644 index 6d6b081..0000000 Binary files a/beamspy/data/databases/chebi_3star_rel169_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/chebi_complete_3star_rel195_v1.sql.gz b/beamspy/data/databases/chebi_complete_3star_rel195_v1.sql.gz new file mode 100644 index 0000000..2566890 Binary files /dev/null and b/beamspy/data/databases/chebi_complete_3star_rel195_v1.sql.gz differ diff --git a/beamspy/data/databases/chebi_complete_rel195_v1.sql.gz b/beamspy/data/databases/chebi_complete_rel195_v1.sql.gz new file mode 100644 index 0000000..8576792 Binary files /dev/null and b/beamspy/data/databases/chebi_complete_rel195_v1.sql.gz differ diff --git a/beamspy/data/databases/chebi_full_rel169_v1.sql.gz b/beamspy/data/databases/chebi_full_rel169_v1.sql.gz deleted file mode 100644 index 5a54dec..0000000 Binary files a/beamspy/data/databases/chebi_full_rel169_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/databases.txt b/beamspy/data/databases/databases.txt index 0cda62e..9db5d09 100644 --- a/beamspy/data/databases/databases.txt +++ b/beamspy/data/databases/databases.txt @@ -1,25 +1,23 @@ database organism_or_subset category source_url description source_filename source_format source_version released_on license login_required beams_db_version database_name id molecular_formula inchi inchi_key pubchem_id smiles name -BioCyc Chlamydomonas_reinhardtii ChlamyCyc ftp://ftp.plantcyc.org/Pathways/Data_dumps/PMN13_July2018/chlamycyc_compounds.20180702 PlantCyc - ChlamyCyc (Chlamydomonas reinhardtii) | 2018-07-02 | v1 ChlamyCyc_compounds.20180702 tab 2018-07-02 2018-07-02 - 0 v1 biocyc_chlamycyc_20180702_v1 Compound_id Chemical_formula Smiles Compound_common_name -CHEBI full ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel169/SDF/ChEBI_complete.sdf.gz CHEBI - Full database | rel169 | v1 ChEBI_complete.sdf sdf rel169 2018-10-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_full_rel169_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name -CHEBI full ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel169/SDF/ChEBI_complete_3star.sdf.gz CHEBI - 3stars records | rel169 | v1 ChEBI_complete_3star.sdf sdf rel169 2018-10-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_3star_rel169_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name -DrugBank full https://www.drugbank.ca/releases/5-1-1/downloads/all-full-database/drugbank_all_full_database.xml.zip DrugBank | v5.1.1 | v1 full database.xml xml v5.1.1 2018-07-03 https://www.drugbank.ca/about 1 v1 drugbank_full_v5_1_1_v1 drugbank-id Molecular Formula InChI InChIKey PubChem Compound SMILES name -HMDB Human urine http://www.hmdb.ca/system/downloads/current/urine_metabolites.zip HMDB - The Urine Metabolome Database | v4.0 | v1 urine_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_urine_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human serum http://www.hmdb.ca/system/downloads/current/serum_metabolites.zip HMDB - The Serum Metabolome Database | v4.0 | v1 serum_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_serum_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human csf http://www.hmdb.ca/system/downloads/current/csf_metabolites.zip HMDB - The cerebrospinal fluid (CSF) Metabolome Database | v4.0 | v1 csf_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_csf_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human saliva http://www.hmdb.ca/system/downloads/current/saliva_metabolites.zip HMDB - The Saliva Metabolome Database | v4.0 | v1 saliva_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_saliva_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human faces http://www.hmdb.ca/system/downloads/current/feces_metabolites.zip HMDB - The Fecal Metabolome Database | v4.0 | v1 feces_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_feces_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human sweat http://www.hmdb.ca/system/downloads/current/sweat_metabolites.zip HMDB - The Sweat Metabolome Database | v4.0 | v1 sweat_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_sweat_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -HMDB Human full http://www.hmdb.ca/system/downloads/current/hmdb_metabolites.zip HMDB - The Human Metabolome Database | v4.0 | v1 hmdb_metabolites.xml xml v4.0 2018-07-08 http://www.hmdb.ca/downloads 0 v1 hmdb_full_v4_0_v1 accession chemical_formula inchi inchi_key pubchem_compound_id smiles name -KEGG Daphnia_pulex dpx https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Daphnia pulex (dpx) | 2018-11-01 | v1 API API 2018-11-01 2018-11-01 - 0 v1 kegg_dpx_20181101_v1 entry formula name -KEGG Human hsa https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Human (hsa) | 2018-11-01 | v1 API API 2018-11-01 | v1 2018-11-01 - 0 v1 kegg_hsa_20181101_v1 entry formula name -KEGG full https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Full database | 2018-11-01 | v1 API API 2018-11-01 | v1 2018-11-01 - 0 v1 kegg_full_20181101_v1 entry formula name -LIPID_MAPS FattyAcyls https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - FattyAcyls | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalFA.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_fattyacyls_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS Glycerolipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Glycerolipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalGL.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_glycerolipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS Glycerophospholipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Glycerophospholipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalGP.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_slycerophospholipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS Polyketides https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Polyketides | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalPK.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_solyketides_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS PrenolLipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - PrenolLipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalPR.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_srenollipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS Sacccharolipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Sacccharolipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalSL.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_sacccharolipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS Sphingolipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Sphingolipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalSP.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_sphingolipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS SterolLipids https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - SterolLipids | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalST.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_sterollipids_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -LIPID_MAPS full https://www.lipidmaps.org/resources/downloads/LMSDFDownload12Dec17.zip LIPID_MAPS - Full database | 2018-12-17 | v1 LMSDFDownload12Dec17/LMSDFDownload12Dec17FinalAll.sdf sdf 2018-12-17 2018-12-17 - 0 v1 lipidmaps_full_20181217_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES COMMON_NAME -YMDB Saccharomyces_cerevisiae full http://www.ymdb.ca/system/downloads/current/ymdb.sdf.zip Yeast Metabolome Database (YMDB) | v2.0 | v1 ymdb.sdf sdf v2.0 2017-01-01 http://www.ymdb.ca/downloads 0 v1 ymdb_full_v2_0_v1 DATABASE_ID FORMULA INCHI_IDENTIFIER INCHI_KEY SMILES GENERIC_NAME +BioCyc Chlamydomonas_reinhardtii ChlamyCyc ftp.dpb.carnegiescience.edu//Pathways/Data_dumps/PMN13_July2018/compounds/chlamycyc_compounds.20180702 PlantCyc - ChlamyCyc (Chlamydomonas reinhardtii) | 2018-07-02 | v1 ChlamyCyc_compounds.20180702 tab 2018-07-02 2018-07-02 - 0 v1 biocyc_chlamycyc_20180702_v1 Compound_id Chemical_formula Smiles Compound_common_name +CHEBI complete ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel195/SDF/ChEBI_complete.sdf.gz CHEBI - Complete database | rel195 | v1 ChEBI_complete.sdf sdf rel195 2021-01-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_complete_rel195_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name +CHEBI complete_3star ftp://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel195/SDF/ChEBI_complete_3star.sdf.gz CHEBI - Complete 3star records | rel195 | v1 ChEBI_complete_3star.sdf sdf rel195 2021-01-01 https://creativecommons.org/licenses/by/4.0/ 0 v1 chebi_complete_3star_rel195_v1 ChEBI ID Formulae InChI InChIKey PubChem Database Links SMILES ChEBI Name +HMDB Human urine http://www.hmdb.ca/system/downloads/current/urine_metabolites.zip HMDB - The Urine Metabolome Database | v4.0 2020-09-10 | v1 urine_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_urine_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human serum http://www.hmdb.ca/system/downloads/current/serum_metabolites.zip HMDB - The Serum Metabolome Database | v4.0 2020-09-10 | v1 serum_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_serum_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human csf http://www.hmdb.ca/system/downloads/current/csf_metabolites.zip HMDB - The cerebrospinal fluid (CSF) Metabolome Database | v4.0 2020-09-10 | v1 csf_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_csf_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human saliva http://www.hmdb.ca/system/downloads/current/saliva_metabolites.zip HMDB - The Saliva Metabolome Database | v4.0 2020-09-10 | v1 saliva_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_saliva_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human faces http://www.hmdb.ca/system/downloads/current/feces_metabolites.zip HMDB - The Fecal Metabolome Database | v4.0 2020-09-10 | v1 feces_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_feces_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human sweat http://www.hmdb.ca/system/downloads/current/sweat_metabolites.zip HMDB - The Sweat Metabolome Database | v4.0 2020-09-10 | v1 sweat_metabolites.xml xml v4.0 2020-09-10 http://www.hmdb.ca/downloads 0 v1 hmdb_sweat_v4_0_20200910_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +HMDB Human full http://www.hmdb.ca/system/downloads/current/hmdb_metabolites.zip HMDB - The Human Metabolome Database | v4.0 2020-09-09 | v1 hmdb_metabolites.xml xml v4.0 2020-09-09 http://www.hmdb.ca/downloads 0 v1 hmdb_full_v4_0_20200909_v1 accession chemical_formula inchi inchikey pubchem_compound_id smiles name +KEGG Daphnia_pulex dpx https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Daphnia pulex (dpx) | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_dpx_20210111_v1 entry formula name +KEGG Human hsa https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Human (hsa) | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_hsa_20210111_v1 entry formula name +KEGG full https://www.kegg.jp/kegg/rest/keggapi.html KEGG - Full database | 2021-01-11 | v1 API API 2021-01-11 | v1 2021-01-11 - 0 v1 kegg_full_20210111_v1 entry formula name +LIPID_MAPS Fatty Acyls [FA] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Fatty Acyls [FA] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_fattyacyls_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Glycerolipids [GL] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Glycerolipids [GL] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_glycerolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Glycerophospholipids [GP] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Glycerophospholipids [GP] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_slycerophospholipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Polyketides [PK] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Polyketides [PK] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_solyketides_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Prenol Lipids [PR] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Prenol Lipids [PR] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_srenollipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Saccharolipids [SL] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sacccharolipids [SL]| 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sacccharolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Sphingolipids [SP] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sphingolipids [SP] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sphingolipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS Sterol Lipids [ST] https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Sterol Lipids [ST] | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_sterollipids_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME +LIPID_MAPS full https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip LIPID_MAPS - Full database | 2020-10-01 | v1 structures.sdf sdf 2020-10-01 2020-10-01 - 0 v1 lipidmaps_full_20201001_v1 LM_ID FORMULA INCHI INCHI_KEY PUBCHEM_CID SMILES NAME diff --git a/beamspy/data/databases/hmdb_csf_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_csf_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..75277b9 Binary files /dev/null and b/beamspy/data/databases/hmdb_csf_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_csf_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_csf_v4_0_v1.sql.gz deleted file mode 100644 index 07fb5ca..0000000 Binary files a/beamspy/data/databases/hmdb_csf_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_feces_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_feces_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..cfecc68 Binary files /dev/null and b/beamspy/data/databases/hmdb_feces_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_feces_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_feces_v4_0_v1.sql.gz deleted file mode 100644 index 301fefa..0000000 Binary files a/beamspy/data/databases/hmdb_feces_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_full_v4_0_20200909_v1.sql.gz b/beamspy/data/databases/hmdb_full_v4_0_20200909_v1.sql.gz new file mode 100644 index 0000000..e39d1e8 Binary files /dev/null and b/beamspy/data/databases/hmdb_full_v4_0_20200909_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_full_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_full_v4_0_v1.sql.gz deleted file mode 100644 index 656f118..0000000 Binary files a/beamspy/data/databases/hmdb_full_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_saliva_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_saliva_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..301cebc Binary files /dev/null and b/beamspy/data/databases/hmdb_saliva_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_saliva_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_saliva_v4_0_v1.sql.gz deleted file mode 100644 index 3db4670..0000000 Binary files a/beamspy/data/databases/hmdb_saliva_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_serum_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_serum_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..397053f Binary files /dev/null and b/beamspy/data/databases/hmdb_serum_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_serum_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_serum_v4_0_v1.sql.gz deleted file mode 100644 index 8508415..0000000 Binary files a/beamspy/data/databases/hmdb_serum_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_sweat_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_sweat_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..4c2fd83 Binary files /dev/null and b/beamspy/data/databases/hmdb_sweat_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_sweat_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_sweat_v4_0_v1.sql.gz deleted file mode 100644 index 90b3827..0000000 Binary files a/beamspy/data/databases/hmdb_sweat_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/hmdb_urine_v4_0_20200910_v1.sql.gz b/beamspy/data/databases/hmdb_urine_v4_0_20200910_v1.sql.gz new file mode 100644 index 0000000..06e74d4 Binary files /dev/null and b/beamspy/data/databases/hmdb_urine_v4_0_20200910_v1.sql.gz differ diff --git a/beamspy/data/databases/hmdb_urine_v4_0_v1.sql.gz b/beamspy/data/databases/hmdb_urine_v4_0_v1.sql.gz deleted file mode 100644 index a823854..0000000 Binary files a/beamspy/data/databases/hmdb_urine_v4_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/kegg_dpx_20181101_v1.sql.gz b/beamspy/data/databases/kegg_dpx_20181101_v1.sql.gz deleted file mode 100644 index 3f939fd..0000000 Binary files a/beamspy/data/databases/kegg_dpx_20181101_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/kegg_dpx_20210111_v1.sql.gz b/beamspy/data/databases/kegg_dpx_20210111_v1.sql.gz new file mode 100644 index 0000000..02baa79 Binary files /dev/null and b/beamspy/data/databases/kegg_dpx_20210111_v1.sql.gz differ diff --git a/beamspy/data/databases/kegg_full_20181101_v1.sql.gz b/beamspy/data/databases/kegg_full_20181101_v1.sql.gz deleted file mode 100644 index 529aded..0000000 Binary files a/beamspy/data/databases/kegg_full_20181101_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/kegg_full_20210111_v1.sql.gz b/beamspy/data/databases/kegg_full_20210111_v1.sql.gz new file mode 100644 index 0000000..104be9d Binary files /dev/null and b/beamspy/data/databases/kegg_full_20210111_v1.sql.gz differ diff --git a/beamspy/data/databases/kegg_hsa_20181101_v1.sql.gz b/beamspy/data/databases/kegg_hsa_20181101_v1.sql.gz deleted file mode 100644 index 20998bd..0000000 Binary files a/beamspy/data/databases/kegg_hsa_20181101_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/kegg_hsa_20210111_v1.sql.gz b/beamspy/data/databases/kegg_hsa_20210111_v1.sql.gz new file mode 100644 index 0000000..634d95f Binary files /dev/null and b/beamspy/data/databases/kegg_hsa_20210111_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_fattyacyls_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_fattyacyls_20181217_v1.sql.gz deleted file mode 100644 index 4708a4a..0000000 Binary files a/beamspy/data/databases/lipidmaps_fattyacyls_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_fattyacyls_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_fattyacyls_20201001_v1.sql.gz new file mode 100644 index 0000000..4c6f882 Binary files /dev/null and b/beamspy/data/databases/lipidmaps_fattyacyls_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_full_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_full_20181217_v1.sql.gz deleted file mode 100644 index 3b11760..0000000 Binary files a/beamspy/data/databases/lipidmaps_full_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_full_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_full_20201001_v1.sql.gz new file mode 100644 index 0000000..e59c368 Binary files /dev/null and b/beamspy/data/databases/lipidmaps_full_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_glycerolipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_glycerolipids_20181217_v1.sql.gz deleted file mode 100644 index aa06a6d..0000000 Binary files a/beamspy/data/databases/lipidmaps_glycerolipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_glycerolipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_glycerolipids_20201001_v1.sql.gz new file mode 100644 index 0000000..a5b9c6c Binary files /dev/null and b/beamspy/data/databases/lipidmaps_glycerolipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_sacccharolipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_sacccharolipids_20181217_v1.sql.gz deleted file mode 100644 index 7af6efa..0000000 Binary files a/beamspy/data/databases/lipidmaps_sacccharolipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_sacccharolipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_sacccharolipids_20201001_v1.sql.gz new file mode 100644 index 0000000..00094dd Binary files /dev/null and b/beamspy/data/databases/lipidmaps_sacccharolipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_slycerophospholipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_slycerophospholipids_20181217_v1.sql.gz deleted file mode 100644 index a90fbe7..0000000 Binary files a/beamspy/data/databases/lipidmaps_slycerophospholipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_slycerophospholipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_slycerophospholipids_20201001_v1.sql.gz new file mode 100644 index 0000000..538518e Binary files /dev/null and b/beamspy/data/databases/lipidmaps_slycerophospholipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_solyketides_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_solyketides_20181217_v1.sql.gz deleted file mode 100644 index fe844c2..0000000 Binary files a/beamspy/data/databases/lipidmaps_solyketides_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_solyketides_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_solyketides_20201001_v1.sql.gz new file mode 100644 index 0000000..6de3e46 Binary files /dev/null and b/beamspy/data/databases/lipidmaps_solyketides_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_sphingolipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_sphingolipids_20181217_v1.sql.gz deleted file mode 100644 index ad217f5..0000000 Binary files a/beamspy/data/databases/lipidmaps_sphingolipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_sphingolipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_sphingolipids_20201001_v1.sql.gz new file mode 100644 index 0000000..4537459 Binary files /dev/null and b/beamspy/data/databases/lipidmaps_sphingolipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_srenollipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_srenollipids_20181217_v1.sql.gz deleted file mode 100644 index 86a307a..0000000 Binary files a/beamspy/data/databases/lipidmaps_srenollipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_srenollipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_srenollipids_20201001_v1.sql.gz new file mode 100644 index 0000000..8996efe Binary files /dev/null and b/beamspy/data/databases/lipidmaps_srenollipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/lipidmaps_sterollipids_20181217_v1.sql.gz b/beamspy/data/databases/lipidmaps_sterollipids_20181217_v1.sql.gz deleted file mode 100644 index 77c68e0..0000000 Binary files a/beamspy/data/databases/lipidmaps_sterollipids_20181217_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/databases/lipidmaps_sterollipids_20201001_v1.sql.gz b/beamspy/data/databases/lipidmaps_sterollipids_20201001_v1.sql.gz new file mode 100644 index 0000000..e355492 Binary files /dev/null and b/beamspy/data/databases/lipidmaps_sterollipids_20201001_v1.sql.gz differ diff --git a/beamspy/data/databases/ymdb_full_v2_0_v1.sql.gz b/beamspy/data/databases/ymdb_full_v2_0_v1.sql.gz deleted file mode 100644 index 4a6bfe4..0000000 Binary files a/beamspy/data/databases/ymdb_full_v2_0_v1.sql.gz and /dev/null differ diff --git a/beamspy/data/isotopes.txt b/beamspy/data/isotopes.txt index 9a3ffe1..e2b1ce8 100644 --- a/beamspy/data/isotopes.txt +++ b/beamspy/data/isotopes.txt @@ -1 +1 @@ -label_x label_y mass_difference abundance_x abundance_y ion_mode C (13C) 1.003355 100 1.07 both S (34S) 1.995796 100 4.21 both K (41K) 1.998119 100 6.73 pos Cl (37Cl) 1.997050 100 24.23 neg \ No newline at end of file +label_x label_y mass_difference abundance_x abundance_y ion_mode charge C (13C) 1.003355 98.93 1.07 both 1 C (13C) 0.5016775 98.93 1.07 both 2 S (34S) 1.995796 94.99 4.25 both 1 K (41K) 1.998119 93.25 6.73 pos 1 Cl (37Cl) 1.99705 75.76 24.24 neg 1 \ No newline at end of file diff --git a/beamspy/data/multiple_charged_differences.txt b/beamspy/data/multiple_charged_differences.txt deleted file mode 100644 index 2eaa7f9..0000000 --- a/beamspy/data/multiple_charged_differences.txt +++ /dev/null @@ -1,6 +0,0 @@ -label_x label_y mass_difference charge_x charge_y ion_mode -[(C12)]1+ [(C13)]2+ 0.501678 1 2 both -[(C12)]1+ [(C13)]3+ 0.334452 1 3 both -[(C12)]1+ [(C13)]4+ 0.250839 1 4 both -[(C12)]1+ [(C13)]5+ 0.200671 1 5 both -[(C12)]1+ [(C13)]6+ 0.167226 1 6 both diff --git a/beamspy/data/multiple_charged_ions.txt b/beamspy/data/multiple_charged_ions.txt index 31a77b2..ea19073 100644 --- a/beamspy/data/multiple_charged_ions.txt +++ b/beamspy/data/multiple_charged_ions.txt @@ -1,6 +1 @@ -label exact_mass charge ion_mode -[M+H]+ 1.007276 1 pos -[M+Na]+ 22.989221 1 pos -[M/2+2H]2+ 1.007276 2 pos -[M/2+H+Na]2+ 11.998249 2 pos -[M/3+3H]3+ 1.007276 3 pos +label exact_mass charge ion_mode [M+H]+ 1.007276 1 pos [M+Na]+ 22.989221 1 pos [M+2H]2+ 1.007276 2 pos [M+H+Na]2+ 11.9982485 2 pos \ No newline at end of file diff --git a/beamspy/data/neutral_losses.txt b/beamspy/data/neutral_losses.txt new file mode 100644 index 0000000..172a446 --- /dev/null +++ b/beamspy/data/neutral_losses.txt @@ -0,0 +1 @@ +label mass_difference H2O 18.010565 CO 27.994915 \ No newline at end of file diff --git a/beamspy/gui.py b/beamspy/gui.py index 38858d5..5636473 100644 --- a/beamspy/gui.py +++ b/beamspy/gui.py @@ -10,9 +10,10 @@ from beamspy import plots from PySide2 import QtCore, QtGui, QtWidgets from beamspy.qt import form - +from beamspy import __version__ from collections import OrderedDict from multiprocessing import cpu_count +from os.path import expanduser class BeamsApp(QtWidgets.QMainWindow, form.Ui_MainWindow): @@ -22,6 +23,10 @@ def __init__(self, *args, **kwargs): self.pushButton_cancel.clicked.connect(QtCore.QCoreApplication.instance().quit) + self.path_wd = expanduser("~") + + self.pushButton_wd.clicked.connect(partial(self.open_directory, self.lineEdit_wd)) + self.pushButton_peaklist.clicked.connect(partial(self.open_file, self.lineEdit_peaklist)) self.pushButton_peak_matrix.clicked.connect(partial(self.open_file, self.lineEdit_intensity_matrix)) @@ -33,7 +38,7 @@ def __init__(self, *args, **kwargs): self.lineEdit_adduct_library)) self.pushButton_adduct_library.clicked.connect(partial(self.open_file, self.lineEdit_adduct_library)) self.pushButton_isotopes.clicked.connect(partial(self.open_file, self.lineEdit_isotopes)) - self.pushButton_multiple_charged.clicked.connect(partial(self.open_file, self.lineEdit_multiple_charged)) + self.pushButton_neutral_losses.clicked.connect(partial(self.open_file, self.lineEdit_neutral_losses)) self.checkBox_filename_reference.clicked.connect(self.source_compounds) self.pushButton_filename_reference.clicked.connect(partial(self.open_file, self.lineEdit_filename_reference)) @@ -50,7 +55,7 @@ def __init__(self, *args, **kwargs): self.comboBox_source_mf.activated.connect(self.source_mf) self.checkBox_adduct_library.clicked.connect(self.source_peak_patterns) self.checkBox_isotopes.clicked.connect(self.source_peak_patterns) - self.checkBox_multiple_charged.clicked.connect(self.source_peak_patterns) + self.checkBox_neutral_losses.clicked.connect(self.source_peak_patterns) self.checkBox_oligomers.clicked.connect(self.source_peak_patterns) self.checkBox_mz_digits.clicked.connect(self.create_summary) @@ -63,10 +68,18 @@ def __init__(self, *args, **kwargs): self.pushButton_start.clicked.connect(self.run) # When the button is pressed + def open_directory(self, field): + d = QtWidgets.QFileDialog.getExistingDirectory(None, 'Select a folder', self.path_wd) + if d: + if str(d) == "": + QtWidgets.QMessageBox.critical(None, "Select a folder", "No folder selected", QtWidgets.QMessageBox.Ok) + else: + field.setText(d) + self.path_wd = d + return def open_file(self, field, field_extra=None): - - d = QtWidgets.QFileDialog.getOpenFileName(self, 'Select File', "") + d = QtWidgets.QFileDialog.getOpenFileName(self, 'Select File', self.path_wd) if d: if str(d[0]) == "": QtWidgets.QMessageBox.critical(None, "Select File", "No file selected", QtWidgets.QMessageBox.Ok) @@ -77,7 +90,7 @@ def open_file(self, field, field_extra=None): return def save_file(self, field, filename): - d = QtWidgets.QFileDialog.getSaveFileName(self, 'Save File', filename) + d = QtWidgets.QFileDialog.getSaveFileName(self, 'Save File', os.path.join(self.path_wd, filename)) if d: if str(d[0]) == "": QtWidgets.QMessageBox.critical(None, "Save File", "Provide a valid filename", QtWidgets.QMessageBox.Ok) @@ -107,6 +120,7 @@ def source_mf(self): self.label_max_mz.setEnabled(False) self.spinBox_max_mz.setEnabled(False) self.checkBox_heuristic_rules.setEnabled(False) + self.checkBox_mf_pp_rules.setEnabled(True) else: self.label_filename_mf.setEnabled(False) self.lineEdit_filename_mf.setEnabled(False) @@ -114,6 +128,8 @@ def source_mf(self): self.label_max_mz.setEnabled(True) self.spinBox_max_mz.setEnabled(True) self.checkBox_heuristic_rules.setEnabled(True) + self.checkBox_mf_pp_rules.setEnabled(True) + def source_peak_patterns(self): if not self.checkBox_adduct_library.isChecked(): @@ -128,12 +144,12 @@ def source_peak_patterns(self): else: self.lineEdit_isotopes.setEnabled(True) self.pushButton_isotopes.setEnabled(True) - if not self.checkBox_multiple_charged.isChecked(): - self.pushButton_multiple_charged.setEnabled(False) - self.lineEdit_multiple_charged.setEnabled(False) + if not self.checkBox_neutral_losses.isChecked(): + self.lineEdit_neutral_losses.setEnabled(False) + self.pushButton_neutral_losses.setEnabled(False) else: - self.pushButton_multiple_charged.setEnabled(True) - self.lineEdit_multiple_charged.setEnabled(True) + self.lineEdit_neutral_losses.setEnabled(True) + self.pushButton_neutral_losses.setEnabled(True) if not self.checkBox_oligomers.isChecked(): self.spinBox_max_monomer_units.setEnabled(False) self.label_max_monomer_units.setEnabled(False) @@ -145,16 +161,18 @@ def source_compounds(self): if self.checkBox_filename_reference.isChecked(): self.listWidget_databases.setEnabled(False) # self.listWidget_categories.setEnabled(False) - self.label_databases.setEnabled(False) + # self.label_databases.setEnabled(False) self.pushButton_filename_reference.setEnabled(True) self.lineEdit_filename_reference.setEnabled(True) + self.checkBox_cpds_pp_rules.setEnabled(True) else: - self.label_databases.setEnabled(True) + # self.label_databases.setEnabled(True) self.listWidget_databases.setEnabled(True) # self.label_categories.setEnabled(False) # self.listWidget_categories.setEnabled(False) self.pushButton_filename_reference.setEnabled(False) self.lineEdit_filename_reference.setEnabled(False) + self.checkBox_cpds_pp_rules.setEnabled(True) def group_features(self): if not self.checkBox_group_features.isChecked(): @@ -191,19 +209,23 @@ def annotate_peak_patterns(self): self.pushButton_adduct_library.setEnabled(False) self.checkBox_adduct_library.setEnabled(False) self.checkBox_isotopes.setEnabled(False) - self.pushButton_multiple_charged.setEnabled(False) - self.lineEdit_multiple_charged.setEnabled(False) self.lineEdit_isotopes.setEnabled(False) self.pushButton_isotopes.setEnabled(False) - self.checkBox_multiple_charged.setEnabled(False) + self.checkBox_neutral_losses.setEnabled(False) + self.lineEdit_neutral_losses.setEnabled(False) + self.pushButton_neutral_losses.setEnabled(False) self.checkBox_oligomers.setEnabled(False) self.label_max_monomer_units.setEnabled(False) self.spinBox_max_monomer_units.setEnabled(False) + self.doubleSpinBox_pp_ppm_error.setEnabled(False) + self.label_pp_ppm_tolerance.setEnabled(False) else: self.checkBox_adduct_library.setEnabled(True) self.checkBox_isotopes.setEnabled(True) - self.checkBox_multiple_charged.setEnabled(True) + self.checkBox_neutral_losses.setEnabled(True) self.checkBox_oligomers.setEnabled(True) + self.doubleSpinBox_pp_ppm_error.setEnabled(True) + self.label_pp_ppm_tolerance.setEnabled(True) self.source_peak_patterns() self.source_graph_file() @@ -217,9 +239,15 @@ def annotate_molecular_formulae(self): self.label_max_mz.setEnabled(False) self.spinBox_max_mz.setEnabled(False) self.checkBox_heuristic_rules.setEnabled(False) + self.label_mf_ppm_tolerance.setEnabled(False) + self.doubleSpinBox_mf_ppm_error.setEnabled(False) + self.checkBox_mf_pp_rules.setEnabled(False) else: self.comboBox_source_mf.setEnabled(True) self.label_source_mf.setEnabled(True) + self.label_mf_ppm_tolerance.setEnabled(True) + self.doubleSpinBox_mf_ppm_error.setEnabled(True) + self.checkBox_mf_pp_rules.setEnabled(True) self.source_mf() return @@ -227,12 +255,18 @@ def annotate_compounds(self): if not self.checkBox_annotate_compounds.isChecked(): self.listWidget_databases.setEnabled(False) # self.listWidget_categories.setEnabled(False) - self.label_databases.setEnabled(False) + # self.label_databases.setEnabled(False) self.checkBox_filename_reference.setEnabled(False) self.pushButton_filename_reference.setEnabled(False) self.lineEdit_filename_reference.setEnabled(False) + self.label_cpds_ppm_tolerance.setEnabled(False) + self.doubleSpinBox_cpds_ppm_error.setEnabled(False) + self.checkBox_cpds_pp_rules.setEnabled(False) else: + self.label_cpds_ppm_tolerance.setEnabled(True) + self.doubleSpinBox_cpds_ppm_error.setEnabled(True) self.checkBox_filename_reference.setEnabled(True) + self.checkBox_cpds_pp_rules.setEnabled(True) self.source_compounds() def create_summary(self): @@ -245,7 +279,7 @@ def create_summary(self): self.comboBox_annotations_format.setEnabled(False) self.pushButton_summary_filename.setEnabled(False) self.comboBox_separator.setEnabled(False) - self.comboBox_convert_rt.setEnabled(False) + self.comboBox_convert_rt.setDisabled(True) self.spinBox_mz_digits.setEnabled(False) else: self.label_summary_filename.setEnabled(True) @@ -258,9 +292,9 @@ def create_summary(self): self.comboBox_separator.setEnabled(True) if self.checkBox_convert_rt.isChecked(): - self.comboBox_convert_rt.setEnabled(True) + self.comboBox_convert_rt.setDisabled(False) else: - self.comboBox_convert_rt.setEnabled(False) + self.comboBox_convert_rt.setDisabled(True) if self.checkBox_mz_digits.isChecked(): self.spinBox_mz_digits.setEnabled(True) else: @@ -370,17 +404,20 @@ def run(self): annotation.annotate_isotopes(inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) print("Done") - if self.checkBox_multiple_charged.isChecked(): - print("Multiple charged ions...."), - if self.lineEdit_multiple_charged.text() == "Use default": - path = 'data/multiple_charged_ions.txt' + if self.checkBox_neutral_losses.isChecked(): + print("Neutral losses...."), + if self.lineEdit_neutral_losses.text() == "Use default": + path = 'data/neutral_losses.txt' p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) - lib = in_out.read_multiple_charged_ions(p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) - elif os.path.isfile(self.lineEdit_multiple_charged.text()): - lib = in_out.read_multiple_charged_ions(self.lineEdit_multiple_charged.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) + lib = in_out.read_neutral_losses(p) + + elif os.path.isfile(self.lineEdit_neutral_losses.text()): + lib = in_out.read_neutral_losses(self.lineEdit_neutral_losses.text()) else: - raise IOError("Provide a valid filename for multiple charged ions or 'Use default'") - annotation.annotate_multiple_charged_ions(inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) + raise IOError("Provide a valid filename for neutral losses or 'Use default'") + print("") + print(lib) + annotation.annotate_neutral_losses(inp, db_out=self.lineEdit_sql_database.text(), ppm=self.doubleSpinBox_pp_ppm_error.value(), lib=lib) print("Done") if self.checkBox_oligomers.isChecked(): @@ -421,10 +458,12 @@ def run(self): rules = None max_mz = None else: - db_in = "http://mfdb.bham.ac.uk" + db_in = "https://mfdb.bham.ac.uk" rules = self.checkBox_heuristic_rules.isChecked() max_mz = self.spinBox_max_mz.value() + use_peak_patterns = self.checkBox_mf_pp_rules.isChecked() + print("") print(lib) annotation.annotate_molecular_formulae(df, @@ -432,6 +471,7 @@ def run(self): ppm=self.doubleSpinBox_mf_ppm_error.value(), db_out=self.lineEdit_sql_database.text(), db_in=db_in, + patterns=use_peak_patterns, rules=rules, max_mz=max_mz) print("Done") @@ -445,10 +485,7 @@ def run(self): p = os.path.join(os.path.dirname(os.path.abspath(__file__)), path) lib = in_out.read_adducts(p, lib_ion_mode[self.comboBox_ion_mode.currentText()]) elif os.path.isfile(self.lineEdit_default_adduct_library.text()): - try: - lib = in_out.read_adducts(self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) - except: - lib = in_out.read_mass_differences(self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) + lib = in_out.read_adducts(self.lineEdit_default_adduct_library.text(), lib_ion_mode[self.comboBox_ion_mode.currentText()]) else: raise IOError("Provide a valid filename for adducts") @@ -456,13 +493,16 @@ def run(self): print("") print(lib) annotation.annotate_compounds(df, lib_adducts=lib, ppm=self.doubleSpinBox_cpds_ppm_error.value(), - db_out=self.lineEdit_sql_database.text(), db_name=None, db_in=self.lineEdit_filename_reference.text()) + db_out=self.lineEdit_sql_database.text(), db_name=None, + patterns=self.checkBox_cpds_pp_rules.isChecked(), + db_in=self.lineEdit_filename_reference.text()) else: for db_name in self.listWidget_databases.selectedItems(): annotation.annotate_compounds(df, lib_adducts=lib, ppm=self.doubleSpinBox_cpds_ppm_error.value(), - db_out=self.lineEdit_sql_database.text(), db_name=self.db_names[db_name.text()]) + db_out=self.lineEdit_sql_database.text(), db_name=self.db_names[db_name.text()], + patterns=self.checkBox_cpds_pp_rules.isChecked()) print("Done") - print + print("") if self.checkBox_create_summary.isChecked(): print("Creating summary...."), diff --git a/beamspy/in_out.py b/beamspy/in_out.py index b7b5359..d386775 100644 --- a/beamspy/in_out.py +++ b/beamspy/in_out.py @@ -23,11 +23,11 @@ def read_adducts(filename, ion_mode, separator="\t"): adducts.remove("*") for index, row in df.iterrows(): if "ion_mode" not in row: - adducts.add(row["label"], row["exact_mass"]) + adducts.add(row["label"], row["exact_mass"], row["charge"]) elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": - adducts.add(row["label"], row["exact_mass"]) + adducts.add(row["label"], row["exact_mass"], row["charge"]) elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": - adducts.add(row["label"], row["exact_mass"]) + adducts.add(row["label"], row["exact_mass"], row["charge"]) return adducts @@ -37,11 +37,14 @@ def read_isotopes(filename, ion_mode, separator="\t"): isotopes.remove("*") for index, row in df.iterrows(): if "ion_mode" not in row: - isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], row["mass_difference"]) + isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], + row["mass_difference"], row["charge"]) elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": - isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], row["mass_difference"]) + isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], + row["mass_difference"], row["charge"]) elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": - isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], row["mass_difference"]) + isotopes.add(row["label_x"], row["label_y"], row["abundance_x"], row["abundance_y"], + row["mass_difference"], row["charge"]) return isotopes @@ -74,7 +77,7 @@ def read_molecular_formulae(filename, separator="\t", calculate=True, filename_a return records -def read_compounds(filename, separator="\t", calculate=True, filename_atoms=""): +def read_compounds(filename, separator="\t", calculate=True, lib_adducts=[], filename_atoms=""): if calculate: path_nist_database = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'nist_database.txt') @@ -93,10 +96,21 @@ def read_compounds(filename, separator="\t", calculate=True, filename_atoms=""): record["exact_mass"] = round(pyteomics_mass.calculate_mass(formula=str(str(row.molecular_formula)), mass_data=nist_database),6) else: record["exact_mass"] = float(row.exact_mass) + record["compound_id"] = row.compound_id record["compound_name"] = row.compound_name comp = pyteomics_mass.Composition(str(row.molecular_formula)) record["molecular_formula"] = composition_to_string(comp) + + if "retention_time" in df.columns: + record["retention_time"] = row.retention_time + elif "rt" in df.columns: + record["retention_time"] = row.rt + if "adduct" in df.columns: + record["adduct"] = row.adduct + if lib_adducts and calculate: + record["exact_mass"] += lib_adducts.lib[row.adduct]["mass"] + records.append(record) else: Warning("{} Skipped".format(row)) @@ -104,20 +118,6 @@ def read_compounds(filename, separator="\t", calculate=True, filename_atoms=""): return records -def read_multiple_charged_ions(filename, ion_mode, separator="\t"): - df = read_csv(filename, sep=separator, float_precision="round_trip") - multiple_charges = libraries.MultipleChargedIons() - multiple_charges.remove("*") - for index, row in df.iterrows(): - if "ion_mode" not in row: - multiple_charges.add(row["label"], row["exact_mass"], row["charge"]) - elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": - multiple_charges.add(row["label"], row["exact_mass"], row["charge"]) - elif (row["ion_mode"] == "neg" or row["ion_mode"] == "both") and ion_mode == "neg": - multiple_charges.add(row["label"], row["exact_mass"], row["charge"]) - return multiple_charges - - def read_mass_differences(filename, ion_mode, separator="\t"): df = read_csv(filename, sep=separator, float_precision="round_trip") mass_differences = libraries.MassDifferences() @@ -127,7 +127,7 @@ def read_mass_differences(filename, ion_mode, separator="\t"): charge_y = row["charge_y"] else: charge_x = 1 - charge_y = 2 + charge_y = 1 if "ion_mode" not in row: mass_differences.add(row["label_x"], row["label_y"], row["mass_difference"], charge_x, charge_y) elif (row["ion_mode"] == "pos" or row["ion_mode"] == "both") and ion_mode == "pos": @@ -137,6 +137,14 @@ def read_mass_differences(filename, ion_mode, separator="\t"): return mass_differences +def read_neutral_losses(filename, separator="\t"): + df = read_csv(filename, sep=separator, float_precision="round_trip") + nls = libraries.NeutralLosses() + for index, row in df.iterrows(): + nls.add(row["label"], row["mass_difference"]) + return nls + + def read_xset_matrix(fn_matrix, first_sample, separator="\t", mapping={"mz": "mz", "rt": "rt", "name": "name"}, samples_in_columns=True): if "mz" not in mapping and "rt" not in mapping and "name" not in mapping: raise ValueError("Incorrect column mapping: provide column names for mz, and name") @@ -193,7 +201,6 @@ def combine_peaklist_matrix(fn_peaklist, fn_matrix, separator="\t", median_inten return pd.merge(df_peaklist, df_matrix, how='left', left_on=merge_on, right_on=merge_on) - def read_peaklist(fn_peaklist, separator="\t", mapping={"name": "name", "mz": "mz", "rt": "rt", "intensity": "intensity"}): @@ -223,7 +230,10 @@ def read_peaklist(fn_peaklist, separator="\t", df_peaklist = df_peaklist[[mapping["mz"], mapping["rt"], mapping["intensity"]]] df_peaklist.columns = ["mz", "rt", "intensity"] - names = "M" + df_peaklist["mz"].round().astype(int).astype(str).str.cat(df_peaklist["rt"].round().astype(int).astype(str), sep="T") + uids = df_peaklist["mz"].round().astype(int).astype(str).str.cat(df_peaklist["rt"].round().astype(int).astype(str), sep="T") + ms = pd.Series(['M'] * len(uids)) + names = ms.str.cat(uids, sep='') + for n in names.copy(): idxs = names.index[names == n].tolist() if len(idxs) > 1: diff --git a/beamspy/libraries.py b/beamspy/libraries.py index 4aa291b..6a197dc 100644 --- a/beamspy/libraries.py +++ b/beamspy/libraries.py @@ -16,9 +16,9 @@ def __init__(self, ion_mode=None, e=0.0005486): elif ion_mode is None: self.lib = OrderedDict() - def add(self, name, mass): - self.lib[name] = mass - self.lib = OrderedDict(sorted(self.lib.items(), key=lambda x: x[1])) + def add(self, name, mass, charge): + self.lib[name] = OrderedDict([("mass", float(mass)), ("charge", int(charge))]) + self.lib = OrderedDict(sorted(self.lib.items(), key=lambda x: x[1]['mass'])) def remove(self, name): if name == "*": @@ -32,7 +32,7 @@ def remove(self, name): def __str__(self): out = "Adducts in library\n" out += "-----------------\n" - out += "name\texact_mass\tion_mode\n" + out += "name\texact_mass\n" for key in self.lib: out += "%s\t%s\n" % (key, self.lib[key]) return out @@ -43,21 +43,28 @@ class Isotopes: def __init__(self, ion_mode=None): self.ion_mode = ion_mode - self.lib = [OrderedDict([("C", {"abundance": 100.0}), ("(13C)", {"abundance": 1.1}), ("mass_difference", 1.003355)]), - OrderedDict([("S", {"abundance": 100.0}), ("(34S)", {"abundance": 4.21}), ("mass_difference", 1.995796)])] + self.lib = [OrderedDict([("C", {"abundance": 100.0}), ("(13C)", {"abundance": 1.1}), + ("mass_difference", 1.003355), + ("charge", 1)]), + OrderedDict([("S", {"abundance": 100.0}), ("(34S)", {"abundance": 4.21}), + ("mass_difference", 1.995796), + ("charge", 1)])] if self.ion_mode == "pos": - self.lib.append(OrderedDict([("K", {"abundance": 100.0}), ("(41K)", {"abundance": 6.73}), ("mass_difference", 1.998117)])) + self.lib.append(OrderedDict([("K", {"abundance": 100.0}), ("(41K)", {"abundance": 6.73}), + ("mass_difference", 1.998117), ("charge", 1)])) #self.lib.append(OrderedDict([("(6Li)", {"abundance": 7.42}), ("Li", {"abundance": 1.0}), ("mass_difference", 1.000882)])) elif self.ion_mode == "neg": - self.lib.append(OrderedDict([("Cl", {"abundance": 100.0}), ("(37Cl)", {"abundance": 24.23}), ("mass_difference", 1.997050)])) + self.lib.append(OrderedDict([("Cl", {"abundance": 100.0}), ("(37Cl)", {"abundance": 24.23}), + ("mass_difference", 1.997050), ("charge", 1)])) self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) - def add(self, label_x, label_y, mx_abundance, my_abundance, mass_difference): + def add(self, label_x, label_y, mx_abundance, my_abundance, mass_difference, charge): self.lib.append(OrderedDict([(label_x, {"abundance": float(mx_abundance)}), (label_y, {"abundance": float(my_abundance)}), - ("mass_difference", mass_difference)])) + ("mass_difference", float(mass_difference)), + ("charge", int(charge))])) self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) def remove(self, label_x="*", label_y="*"): @@ -73,46 +80,42 @@ def remove(self, label_x="*", label_y="*"): def __str__(self): out = "Isotopes in library:\n" out += "--------------------------------------------\n" - out += "label_x\tlabel_y\tmass_difference\tabundance_x\tabundance_y\n" + out += "label_x\tlabel_y\tmass_difference\tcharge\tabundance_x\tabundance_y\n" for item in self.lib: label_x = list(item.items())[0][0] label_y = list(item.items())[1][0] - out += "{}\t{}\t{}\t{}\t{}\n".format(label_x, label_y, + out += "{}\t{}\t{}\t{}\t{}\t{}\n".format(label_x, label_y, item["mass_difference"], + item["charge"], item[label_x]["abundance"], item[label_y]["abundance"]) return out -class MultipleChargedIons: - def __init__(self, ion_mode=None, e=0.0005486): +class NeutralLosses: - self.e = e - if ion_mode == "pos": - self.lib = OrderedDict() - elif ion_mode == "neg": - self.lib = OrderedDict() - elif ion_mode is None: - self.lib = OrderedDict() + def __init__(self): + self.lib = [] - def add(self, name, mass, charge): - self.lib[name] = OrderedDict([("mass", float(mass)), ("charge", int(charge))]) - self.lib = OrderedDict(sorted(self.lib.items(), key=lambda x: x[1]['mass'])) + def add(self, label, mass_difference): + self.lib.append(OrderedDict([("label", label), ("mass_difference", mass_difference)])) + self.lib = sorted(self.lib, key=lambda k: k['mass_difference']) - def remove(self, name): - if name == "*": - self.lib = OrderedDict() + def remove(self, label="*"): + if label == "*": + self.lib = [] else: - if name in self.lib: - del self.lib[name] + for item in self.lib: + if label in self.lib: + self.lib.remove(item) else: print("Entry not in library") def __str__(self): - out = "Multiple charge ions in library\n" - out += "-------------------------------\n" - out += "name\tmass\tcharge\n" - for key in self.lib: - out += "{}\t{}\t{}\n".format(key, self.lib[key]["mass"], self.lib[key]["charge"]) + out = "Neutral losses in library:\n" + out += "--------------------------------------------\n" + out += "label\tmass_difference\n" + for d in self.lib: + out += "{}\t{}\n".format(d["label"], d["mass_difference"]) return out diff --git a/beamspy/plots.py b/beamspy/plots.py index d4a833a..96d6f95 100644 --- a/beamspy/plots.py +++ b/beamspy/plots.py @@ -114,7 +114,7 @@ def plot_annotations(column_ppm_error, column_adducts, df): ppm_errors = df[column_ppm_error].dropna() - sns.boxplot(ppm_errors, ax=ax_box) + sns.boxplot(x=ppm_errors, ax=ax_box) bin_size = 0.1 bins = np.arange(np.floor(ppm_errors.min()) - bin_size, np.ceil(ppm_errors.max()) + bin_size, bin_size).round(3) @@ -137,7 +137,7 @@ def plot_annotations(column_ppm_error, column_adducts, df): ax_hist.set_title("mean={}; std={}".format(round(mean, 2), round(std, 2))) ax_hist.set(xlabel="Ppm error", ylabel="Frequency") - sns.countplot(df[column_adducts].dropna(), ax=ax_count) + sns.countplot(x=df[column_adducts].dropna(), ax=ax_count) ax_count.set(xlabel="Adduct", ylabel="Frequency") plt.setp(ax_box.get_xticklabels(), visible=False) diff --git a/beamspy/qt/form.py b/beamspy/qt/form.py index 0f504f1..bfe7e0f 100644 --- a/beamspy/qt/form.py +++ b/beamspy/qt/form.py @@ -3,8 +3,8 @@ # Form implementation generated from reading ui file 'form.ui', # licensing of 'form.ui' applies. # -# Created: Fri Aug 9 17:01:50 2019 -# by: pyside2-uic running on PySide2 5.9.0~a1 +# Created: Thu Dec 3 21:58:30 2020 +# by: pyside2-uic running on PySide2 5.13.1 # # WARNING! All changes made in this file will be lost! @@ -14,49 +14,144 @@ class Ui_MainWindow(object): def setupUi(self, MainWindow): MainWindow.setObjectName("MainWindow") MainWindow.setEnabled(True) - MainWindow.resize(795, 830) + MainWindow.resize(974, 775) MainWindow.setAnimated(False) self.centralwidget = QtWidgets.QWidget(MainWindow) self.centralwidget.setObjectName("centralwidget") - self.groupBox_general = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_general.setGeometry(QtCore.QRect(20, 8, 761, 131)) + self.scrollArea = QtWidgets.QScrollArea(self.centralwidget) + self.scrollArea.setGeometry(QtCore.QRect(6, 5, 961, 731)) + self.scrollArea.setWidgetResizable(True) + self.scrollArea.setObjectName("scrollArea") + self.scrollAreaWidgetContents = QtWidgets.QWidget() + self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 959, 729)) + self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents") + self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.scrollAreaWidgetContents) + self.verticalLayout_2.setObjectName("verticalLayout_2") + self.groupBox_general = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) self.groupBox_general.setTitle("") self.groupBox_general.setObjectName("groupBox_general") + self.gridLayout_3 = QtWidgets.QGridLayout(self.groupBox_general) + self.gridLayout_3.setContentsMargins(10, 5, 10, 5) + self.gridLayout_3.setVerticalSpacing(5) + self.gridLayout_3.setObjectName("gridLayout_3") + self.pushButton_peaklist = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_peaklist.setEnabled(True) + self.pushButton_peaklist.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_peaklist.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_peaklist.setObjectName("pushButton_peaklist") + self.gridLayout_3.addWidget(self.pushButton_peaklist, 5, 2, 1, 1) + self.pushButton_graph = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_graph.setEnabled(True) + self.pushButton_graph.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_graph.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_graph.setObjectName("pushButton_graph") + self.gridLayout_3.addWidget(self.pushButton_graph, 5, 6, 1, 1) + self.pushButton_default_adduct_library = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_default_adduct_library.setEnabled(True) + self.pushButton_default_adduct_library.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_default_adduct_library.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_default_adduct_library.setObjectName("pushButton_default_adduct_library") + self.gridLayout_3.addWidget(self.pushButton_default_adduct_library, 7, 6, 1, 1) self.lineEdit_intensity_matrix = QtWidgets.QLineEdit(self.groupBox_general) - self.lineEdit_intensity_matrix.setGeometry(QtCore.QRect(120, 60, 161, 20)) + self.lineEdit_intensity_matrix.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_intensity_matrix.setMaximumSize(QtCore.QSize(150, 16777215)) self.lineEdit_intensity_matrix.setText("") self.lineEdit_intensity_matrix.setReadOnly(True) self.lineEdit_intensity_matrix.setObjectName("lineEdit_intensity_matrix") + self.gridLayout_3.addWidget(self.lineEdit_intensity_matrix, 7, 1, 1, 1) + self.lineEdit_graph = QtWidgets.QLineEdit(self.groupBox_general) + self.lineEdit_graph.setEnabled(True) + self.lineEdit_graph.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_graph.setMaximumSize(QtCore.QSize(150, 16777215)) + self.lineEdit_graph.setText("") + self.lineEdit_graph.setReadOnly(True) + self.lineEdit_graph.setObjectName("lineEdit_graph") + self.gridLayout_3.addWidget(self.lineEdit_graph, 5, 5, 1, 1) + self.label_graph = QtWidgets.QLabel(self.groupBox_general) + self.label_graph.setEnabled(True) + self.label_graph.setObjectName("label_graph") + self.gridLayout_3.addWidget(self.label_graph, 5, 3, 1, 1) + self.lineEdit_wd = QtWidgets.QLineEdit(self.groupBox_general) + self.lineEdit_wd.setEnabled(True) + self.lineEdit_wd.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_wd.setMaximumSize(QtCore.QSize(150, 16777215)) + self.lineEdit_wd.setText("") + self.lineEdit_wd.setReadOnly(True) + self.lineEdit_wd.setObjectName("lineEdit_wd") + self.gridLayout_3.addWidget(self.lineEdit_wd, 4, 1, 1, 1) + self.pushButton_sql_database = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_sql_database.setEnabled(True) + self.pushButton_sql_database.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_sql_database.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_sql_database.setObjectName("pushButton_sql_database") + self.gridLayout_3.addWidget(self.pushButton_sql_database, 4, 6, 1, 1) + self.lineEdit_sql_database = QtWidgets.QLineEdit(self.groupBox_general) + self.lineEdit_sql_database.setEnabled(True) + self.lineEdit_sql_database.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_sql_database.setMaximumSize(QtCore.QSize(150, 16777215)) + self.lineEdit_sql_database.setText("") + self.lineEdit_sql_database.setReadOnly(True) + self.lineEdit_sql_database.setObjectName("lineEdit_sql_database") + self.gridLayout_3.addWidget(self.lineEdit_sql_database, 4, 5, 1, 1) self.lineEdit_peaklist = QtWidgets.QLineEdit(self.groupBox_general) self.lineEdit_peaklist.setEnabled(True) - self.lineEdit_peaklist.setGeometry(QtCore.QRect(120, 30, 161, 20)) + self.lineEdit_peaklist.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_peaklist.setMaximumSize(QtCore.QSize(150, 16777215)) self.lineEdit_peaklist.setText("") self.lineEdit_peaklist.setReadOnly(True) self.lineEdit_peaklist.setObjectName("lineEdit_peaklist") - self.pushButton_peaklist = QtWidgets.QPushButton(self.groupBox_general) - self.pushButton_peaklist.setEnabled(True) - self.pushButton_peaklist.setGeometry(QtCore.QRect(290, 28, 71, 23)) - self.pushButton_peaklist.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_peaklist.setMaximumSize(QtCore.QSize(81, 16777215)) - self.pushButton_peaklist.setObjectName("pushButton_peaklist") - self.label_peaklist = QtWidgets.QLabel(self.groupBox_general) - self.label_peaklist.setEnabled(True) - self.label_peaklist.setGeometry(QtCore.QRect(12, 30, 81, 16)) - self.label_peaklist.setObjectName("label_peaklist") + self.gridLayout_3.addWidget(self.lineEdit_peaklist, 5, 1, 1, 1) + self.pushButton_wd = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_wd.setEnabled(True) + self.pushButton_wd.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_wd.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_wd.setObjectName("pushButton_wd") + self.gridLayout_3.addWidget(self.pushButton_wd, 4, 2, 1, 1) + self.label_default_adduct_library = QtWidgets.QLabel(self.groupBox_general) + self.label_default_adduct_library.setEnabled(True) + self.label_default_adduct_library.setObjectName("label_default_adduct_library") + self.gridLayout_3.addWidget(self.label_default_adduct_library, 7, 3, 1, 1) self.label_intensity_matrix = QtWidgets.QLabel(self.groupBox_general) - self.label_intensity_matrix.setGeometry(QtCore.QRect(12, 60, 111, 16)) self.label_intensity_matrix.setObjectName("label_intensity_matrix") + self.gridLayout_3.addWidget(self.label_intensity_matrix, 7, 0, 1, 1) + self.lineEdit_default_adduct_library = QtWidgets.QLineEdit(self.groupBox_general) + self.lineEdit_default_adduct_library.setEnabled(True) + self.lineEdit_default_adduct_library.setMinimumSize(QtCore.QSize(150, 0)) + self.lineEdit_default_adduct_library.setMaximumSize(QtCore.QSize(150, 16777215)) + self.lineEdit_default_adduct_library.setReadOnly(True) + self.lineEdit_default_adduct_library.setObjectName("lineEdit_default_adduct_library") + self.gridLayout_3.addWidget(self.lineEdit_default_adduct_library, 7, 5, 1, 1) + self.pushButton_peak_matrix = QtWidgets.QPushButton(self.groupBox_general) + self.pushButton_peak_matrix.setEnabled(True) + self.pushButton_peak_matrix.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_peak_matrix.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_peak_matrix.setObjectName("pushButton_peak_matrix") + self.gridLayout_3.addWidget(self.pushButton_peak_matrix, 7, 2, 1, 1) + self.label_ion_mode = QtWidgets.QLabel(self.groupBox_general) + self.label_ion_mode.setMaximumSize(QtCore.QSize(75, 16777215)) + self.label_ion_mode.setObjectName("label_ion_mode") + self.gridLayout_3.addWidget(self.label_ion_mode, 7, 7, 1, 1) self.comboBox_ion_mode = QtWidgets.QComboBox(self.groupBox_general) - self.comboBox_ion_mode.setGeometry(QtCore.QRect(500, 58, 111, 22)) + self.comboBox_ion_mode.setMinimumSize(QtCore.QSize(100, 0)) + self.comboBox_ion_mode.setMaximumSize(QtCore.QSize(100, 16777215)) self.comboBox_ion_mode.setObjectName("comboBox_ion_mode") self.comboBox_ion_mode.addItem("") self.comboBox_ion_mode.addItem("") - self.label_ion_mode = QtWidgets.QLabel(self.groupBox_general) - self.label_ion_mode.setGeometry(QtCore.QRect(400, 60, 71, 16)) - self.label_ion_mode.setObjectName("label_ion_mode") + self.gridLayout_3.addWidget(self.comboBox_ion_mode, 7, 8, 1, 1) + self.label_sql_database = QtWidgets.QLabel(self.groupBox_general) + self.label_sql_database.setEnabled(True) + self.label_sql_database.setObjectName("label_sql_database") + self.gridLayout_3.addWidget(self.label_sql_database, 4, 3, 1, 1) + self.label_peaklist = QtWidgets.QLabel(self.groupBox_general) + self.label_peaklist.setEnabled(True) + self.label_peaklist.setObjectName("label_peaklist") + self.gridLayout_3.addWidget(self.label_peaklist, 5, 0, 1, 1) + self.label_wd = QtWidgets.QLabel(self.groupBox_general) + self.label_wd.setEnabled(True) + self.label_wd.setObjectName("label_wd") + self.gridLayout_3.addWidget(self.label_wd, 4, 0, 1, 1) self.label_data_files = QtWidgets.QLabel(self.groupBox_general) self.label_data_files.setEnabled(True) - self.label_data_files.setGeometry(QtCore.QRect(10, 0, 291, 16)) font = QtGui.QFont() font.setFamily(".SF NS Text") font.setPointSize(13) @@ -64,69 +159,41 @@ def setupUi(self, MainWindow): font.setBold(True) self.label_data_files.setFont(font) self.label_data_files.setObjectName("label_data_files") - self.lineEdit_default_adduct_library = QtWidgets.QLineEdit(self.groupBox_general) - self.lineEdit_default_adduct_library.setEnabled(True) - self.lineEdit_default_adduct_library.setGeometry(QtCore.QRect(500, 88, 111, 20)) - self.lineEdit_default_adduct_library.setReadOnly(True) - self.lineEdit_default_adduct_library.setObjectName("lineEdit_default_adduct_library") - self.pushButton_default_adduct_library = QtWidgets.QPushButton(self.groupBox_general) - self.pushButton_default_adduct_library.setEnabled(True) - self.pushButton_default_adduct_library.setGeometry(QtCore.QRect(620, 86, 71, 23)) - self.pushButton_default_adduct_library.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_default_adduct_library.setObjectName("pushButton_default_adduct_library") - self.label_sql_database = QtWidgets.QLabel(self.groupBox_general) - self.label_sql_database.setEnabled(True) - self.label_sql_database.setGeometry(QtCore.QRect(12, 92, 121, 16)) - self.label_sql_database.setObjectName("label_sql_database") - self.lineEdit_sql_database = QtWidgets.QLineEdit(self.groupBox_general) - self.lineEdit_sql_database.setEnabled(True) - self.lineEdit_sql_database.setGeometry(QtCore.QRect(120, 92, 161, 20)) - self.lineEdit_sql_database.setText("") - self.lineEdit_sql_database.setReadOnly(True) - self.lineEdit_sql_database.setObjectName("lineEdit_sql_database") - self.pushButton_sql_database = QtWidgets.QPushButton(self.groupBox_general) - self.pushButton_sql_database.setEnabled(True) - self.pushButton_sql_database.setGeometry(QtCore.QRect(290, 90, 71, 23)) - self.pushButton_sql_database.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_sql_database.setMaximumSize(QtCore.QSize(81, 16777215)) - self.pushButton_sql_database.setObjectName("pushButton_sql_database") - self.lineEdit_graph = QtWidgets.QLineEdit(self.groupBox_general) - self.lineEdit_graph.setEnabled(True) - self.lineEdit_graph.setGeometry(QtCore.QRect(498, 30, 161, 20)) - self.lineEdit_graph.setText("") - self.lineEdit_graph.setReadOnly(True) - self.lineEdit_graph.setObjectName("lineEdit_graph") - self.pushButton_graph = QtWidgets.QPushButton(self.groupBox_general) - self.pushButton_graph.setEnabled(True) - self.pushButton_graph.setGeometry(QtCore.QRect(668, 28, 71, 23)) - self.pushButton_graph.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_graph.setMaximumSize(QtCore.QSize(81, 16777215)) - self.pushButton_graph.setObjectName("pushButton_graph") - self.label_graph = QtWidgets.QLabel(self.groupBox_general) - self.label_graph.setEnabled(True) - self.label_graph.setGeometry(QtCore.QRect(400, 30, 111, 16)) - self.label_graph.setObjectName("label_graph") - self.pushButton_peak_matrix = QtWidgets.QPushButton(self.groupBox_general) - self.pushButton_peak_matrix.setEnabled(True) - self.pushButton_peak_matrix.setGeometry(QtCore.QRect(290, 56, 71, 23)) - self.pushButton_peak_matrix.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_peak_matrix.setMaximumSize(QtCore.QSize(81, 16777215)) - self.pushButton_peak_matrix.setObjectName("pushButton_peak_matrix") - self.label_default_adduct_library = QtWidgets.QLabel(self.groupBox_general) - self.label_default_adduct_library.setEnabled(True) - self.label_default_adduct_library.setGeometry(QtCore.QRect(400, 90, 141, 16)) - self.label_default_adduct_library.setObjectName("label_default_adduct_library") - self.groupBox_group_features = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_group_features.setGeometry(QtCore.QRect(20, 150, 761, 91)) + self.gridLayout_3.addWidget(self.label_data_files, 3, 0, 1, 2) + self.verticalLayout_2.addWidget(self.groupBox_general) + self.groupBox_group_features = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) self.groupBox_group_features.setTitle("") self.groupBox_group_features.setObjectName("groupBox_group_features") - self.label_max_rt = QtWidgets.QLabel(self.groupBox_group_features) - self.label_max_rt.setEnabled(True) - self.label_max_rt.setGeometry(QtCore.QRect(10, 33, 191, 16)) - self.label_max_rt.setObjectName("label_max_rt") + self.gridLayout_4 = QtWidgets.QGridLayout(self.groupBox_group_features) + self.gridLayout_4.setContentsMargins(10, 5, 10, 5) + self.gridLayout_4.setVerticalSpacing(5) + self.gridLayout_4.setObjectName("gridLayout_4") + self.doubleSpinBox_block = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) + self.doubleSpinBox_block.setDecimals(0) + self.doubleSpinBox_block.setMaximum(100000000.0) + self.doubleSpinBox_block.setSingleStep(1000.0) + self.doubleSpinBox_block.setProperty("value", 5000.0) + self.doubleSpinBox_block.setObjectName("doubleSpinBox_block") + self.gridLayout_4.addWidget(self.doubleSpinBox_block, 9, 5, 1, 1) + self.doubleSpinBox_ncpus = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) + self.doubleSpinBox_ncpus.setDecimals(0) + self.doubleSpinBox_ncpus.setMaximum(10000.0) + self.doubleSpinBox_ncpus.setSingleStep(1.0) + self.doubleSpinBox_ncpus.setProperty("value", 1.0) + self.doubleSpinBox_ncpus.setObjectName("doubleSpinBox_ncpus") + self.gridLayout_4.addWidget(self.doubleSpinBox_ncpus, 3, 5, 1, 1) + self.label_grouping_method = QtWidgets.QLabel(self.groupBox_group_features) + self.label_grouping_method.setEnabled(True) + self.label_grouping_method.setObjectName("label_grouping_method") + self.gridLayout_4.addWidget(self.label_grouping_method, 3, 2, 1, 1) + self.doubleSpinBox_coefficent = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) + self.doubleSpinBox_coefficent.setMaximum(1.0) + self.doubleSpinBox_coefficent.setSingleStep(0.1) + self.doubleSpinBox_coefficent.setProperty("value", 0.7) + self.doubleSpinBox_coefficent.setObjectName("doubleSpinBox_coefficent") + self.gridLayout_4.addWidget(self.doubleSpinBox_coefficent, 9, 1, 1, 1) self.checkBox_group_features = QtWidgets.QCheckBox(self.groupBox_group_features) self.checkBox_group_features.setEnabled(True) - self.checkBox_group_features.setGeometry(QtCore.QRect(10, -1, 231, 17)) font = QtGui.QFont() font.setFamily(".SF NS Text") font.setPointSize(13) @@ -135,80 +202,88 @@ def setupUi(self, MainWindow): self.checkBox_group_features.setFont(font) self.checkBox_group_features.setChecked(True) self.checkBox_group_features.setObjectName("checkBox_group_features") + self.gridLayout_4.addWidget(self.checkBox_group_features, 0, 0, 1, 2) + self.label_tool_p_value = QtWidgets.QLabel(self.groupBox_group_features) + self.label_tool_p_value.setEnabled(True) + self.label_tool_p_value.setObjectName("label_tool_p_value") + self.gridLayout_4.addWidget(self.label_tool_p_value, 9, 2, 1, 1) + self.label_max_rt = QtWidgets.QLabel(self.groupBox_group_features) + self.label_max_rt.setEnabled(True) + self.label_max_rt.setObjectName("label_max_rt") + self.gridLayout_4.addWidget(self.label_max_rt, 3, 0, 1, 1) self.doubleSpinBox_max_rt = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) - self.doubleSpinBox_max_rt.setGeometry(QtCore.QRect(190, 30, 81, 22)) self.doubleSpinBox_max_rt.setMaximum(9999999.0) self.doubleSpinBox_max_rt.setProperty("value", 5.0) self.doubleSpinBox_max_rt.setObjectName("doubleSpinBox_max_rt") - self.comboBox_grouping_method = QtWidgets.QComboBox(self.groupBox_group_features) - self.comboBox_grouping_method.setGeometry(QtCore.QRect(410, 31, 151, 22)) - self.comboBox_grouping_method.setObjectName("comboBox_grouping_method") - self.comboBox_grouping_method.addItem("") - self.comboBox_grouping_method.addItem("") + self.gridLayout_4.addWidget(self.doubleSpinBox_max_rt, 3, 1, 1, 1) self.doubleSpinBox_p_value = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) - self.doubleSpinBox_p_value.setGeometry(QtCore.QRect(410, 57, 151, 22)) self.doubleSpinBox_p_value.setDecimals(10) self.doubleSpinBox_p_value.setMaximum(1.0) self.doubleSpinBox_p_value.setSingleStep(0.01) self.doubleSpinBox_p_value.setProperty("value", 0.01) self.doubleSpinBox_p_value.setObjectName("doubleSpinBox_p_value") - self.label_tool_p_value = QtWidgets.QLabel(self.groupBox_group_features) - self.label_tool_p_value.setEnabled(True) - self.label_tool_p_value.setGeometry(QtCore.QRect(303, 60, 121, 16)) - self.label_tool_p_value.setObjectName("label_tool_p_value") - self.label_grouping_method = QtWidgets.QLabel(self.groupBox_group_features) - self.label_grouping_method.setEnabled(True) - self.label_grouping_method.setGeometry(QtCore.QRect(303, 33, 121, 16)) - self.label_grouping_method.setObjectName("label_grouping_method") - self.doubleSpinBox_coefficent = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) - self.doubleSpinBox_coefficent.setGeometry(QtCore.QRect(190, 57, 81, 22)) - self.doubleSpinBox_coefficent.setMaximum(1.0) - self.doubleSpinBox_coefficent.setSingleStep(0.1) - self.doubleSpinBox_coefficent.setProperty("value", 0.7) - self.doubleSpinBox_coefficent.setObjectName("doubleSpinBox_coefficent") + self.gridLayout_4.addWidget(self.doubleSpinBox_p_value, 9, 3, 1, 1) self.label_tool_coefficient = QtWidgets.QLabel(self.groupBox_group_features) self.label_tool_coefficient.setEnabled(True) - self.label_tool_coefficient.setGeometry(QtCore.QRect(10, 60, 181, 16)) self.label_tool_coefficient.setObjectName("label_tool_coefficient") - self.doubleSpinBox_block = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) - self.doubleSpinBox_block.setGeometry(QtCore.QRect(666, 57, 81, 22)) - self.doubleSpinBox_block.setDecimals(0) - self.doubleSpinBox_block.setMaximum(100000000.0) - self.doubleSpinBox_block.setSingleStep(1000.0) - self.doubleSpinBox_block.setProperty("value", 5000.0) - self.doubleSpinBox_block.setObjectName("doubleSpinBox_block") - self.doubleSpinBox_ncpus = QtWidgets.QDoubleSpinBox(self.groupBox_group_features) - self.doubleSpinBox_ncpus.setGeometry(QtCore.QRect(666, 30, 81, 22)) - self.doubleSpinBox_ncpus.setDecimals(0) - self.doubleSpinBox_ncpus.setMaximum(10000.0) - self.doubleSpinBox_ncpus.setSingleStep(1.0) - self.doubleSpinBox_ncpus.setProperty("value", 1.0) - self.doubleSpinBox_ncpus.setObjectName("doubleSpinBox_ncpus") + self.gridLayout_4.addWidget(self.label_tool_coefficient, 9, 0, 1, 1) + self.comboBox_grouping_method = QtWidgets.QComboBox(self.groupBox_group_features) + self.comboBox_grouping_method.setObjectName("comboBox_grouping_method") + self.comboBox_grouping_method.addItem("") + self.comboBox_grouping_method.addItem("") + self.gridLayout_4.addWidget(self.comboBox_grouping_method, 3, 3, 1, 1) self.label_grouping_ncpus = QtWidgets.QLabel(self.groupBox_group_features) self.label_grouping_ncpus.setEnabled(True) - self.label_grouping_ncpus.setGeometry(QtCore.QRect(596, 33, 61, 16)) self.label_grouping_ncpus.setObjectName("label_grouping_ncpus") + self.gridLayout_4.addWidget(self.label_grouping_ncpus, 3, 4, 1, 1) self.label_grouping_block = QtWidgets.QLabel(self.groupBox_group_features) self.label_grouping_block.setEnabled(True) - self.label_grouping_block.setGeometry(QtCore.QRect(596, 59, 61, 16)) self.label_grouping_block.setObjectName("label_grouping_block") - self.groupBox_annotate_peak_patterns = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_annotate_peak_patterns.setGeometry(QtCore.QRect(20, 250, 761, 121)) + self.gridLayout_4.addWidget(self.label_grouping_block, 9, 4, 1, 1) + self.verticalLayout_2.addWidget(self.groupBox_group_features) + self.groupBox_annotate_peak_patterns = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) self.groupBox_annotate_peak_patterns.setTitle("") self.groupBox_annotate_peak_patterns.setObjectName("groupBox_annotate_peak_patterns") - self.lineEdit_adduct_library = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) - self.lineEdit_adduct_library.setEnabled(True) - self.lineEdit_adduct_library.setGeometry(QtCore.QRect(10, 58, 91, 20)) - self.lineEdit_adduct_library.setReadOnly(True) - self.lineEdit_adduct_library.setObjectName("lineEdit_adduct_library") - self.pushButton_adduct_library = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) - self.pushButton_adduct_library.setEnabled(True) - self.pushButton_adduct_library.setGeometry(QtCore.QRect(110, 56, 71, 23)) - self.pushButton_adduct_library.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_adduct_library.setObjectName("pushButton_adduct_library") + self.gridLayout = QtWidgets.QGridLayout(self.groupBox_annotate_peak_patterns) + self.gridLayout.setContentsMargins(10, 5, 10, 5) + self.gridLayout.setVerticalSpacing(5) + self.gridLayout.setObjectName("gridLayout") + self.pushButton_isotopes = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) + self.pushButton_isotopes.setEnabled(True) + self.pushButton_isotopes.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_isotopes.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_isotopes.setObjectName("pushButton_isotopes") + self.gridLayout.addWidget(self.pushButton_isotopes, 6, 4, 1, 1) + self.label_max_monomer_units = QtWidgets.QLabel(self.groupBox_annotate_peak_patterns) + self.label_max_monomer_units.setEnabled(True) + self.label_max_monomer_units.setMinimumSize(QtCore.QSize(110, 0)) + self.label_max_monomer_units.setObjectName("label_max_monomer_units") + self.gridLayout.addWidget(self.label_max_monomer_units, 6, 7, 1, 1) + self.checkBox_neutral_losses = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) + self.checkBox_neutral_losses.setEnabled(True) + self.checkBox_neutral_losses.setChecked(True) + self.checkBox_neutral_losses.setObjectName("checkBox_neutral_losses") + self.gridLayout.addWidget(self.checkBox_neutral_losses, 4, 5, 1, 1) + self.pushButton_neutral_losses = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) + self.pushButton_neutral_losses.setEnabled(True) + self.pushButton_neutral_losses.setMinimumSize(QtCore.QSize(63, 23)) + self.pushButton_neutral_losses.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_neutral_losses.setObjectName("pushButton_neutral_losses") + self.gridLayout.addWidget(self.pushButton_neutral_losses, 6, 6, 1, 1) + self.checkBox_isotopes = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) + self.checkBox_isotopes.setEnabled(True) + self.checkBox_isotopes.setChecked(True) + self.checkBox_isotopes.setObjectName("checkBox_isotopes") + self.gridLayout.addWidget(self.checkBox_isotopes, 4, 3, 1, 1) + self.lineEdit_isotopes = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) + self.lineEdit_isotopes.setEnabled(True) + self.lineEdit_isotopes.setMinimumSize(QtCore.QSize(120, 0)) + self.lineEdit_isotopes.setMaximumSize(QtCore.QSize(120, 16777215)) + self.lineEdit_isotopes.setReadOnly(True) + self.lineEdit_isotopes.setObjectName("lineEdit_isotopes") + self.gridLayout.addWidget(self.lineEdit_isotopes, 6, 3, 1, 1) self.checkBox_annotate_peak_patterns = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) self.checkBox_annotate_peak_patterns.setEnabled(True) - self.checkBox_annotate_peak_patterns.setGeometry(QtCore.QRect(10, -1, 271, 17)) font = QtGui.QFont() font.setFamily(".SF NS Text") font.setPointSize(13) @@ -217,214 +292,231 @@ def setupUi(self, MainWindow): self.checkBox_annotate_peak_patterns.setFont(font) self.checkBox_annotate_peak_patterns.setChecked(True) self.checkBox_annotate_peak_patterns.setObjectName("checkBox_annotate_peak_patterns") + self.gridLayout.addWidget(self.checkBox_annotate_peak_patterns, 0, 0, 1, 2) self.checkBox_adduct_library = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) self.checkBox_adduct_library.setEnabled(True) - self.checkBox_adduct_library.setGeometry(QtCore.QRect(10, 33, 181, 17)) self.checkBox_adduct_library.setChecked(True) self.checkBox_adduct_library.setObjectName("checkBox_adduct_library") - self.checkBox_isotopes = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) - self.checkBox_isotopes.setEnabled(True) - self.checkBox_isotopes.setGeometry(QtCore.QRect(200, 33, 171, 17)) - self.checkBox_isotopes.setChecked(True) - self.checkBox_isotopes.setObjectName("checkBox_isotopes") - self.pushButton_multiple_charged = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) - self.pushButton_multiple_charged.setEnabled(True) - self.pushButton_multiple_charged.setGeometry(QtCore.QRect(490, 56, 71, 23)) - self.pushButton_multiple_charged.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_multiple_charged.setObjectName("pushButton_multiple_charged") - self.lineEdit_multiple_charged = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) - self.lineEdit_multiple_charged.setEnabled(True) - self.lineEdit_multiple_charged.setGeometry(QtCore.QRect(390, 58, 91, 20)) - self.lineEdit_multiple_charged.setReadOnly(True) - self.lineEdit_multiple_charged.setObjectName("lineEdit_multiple_charged") - self.lineEdit_isotopes = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) - self.lineEdit_isotopes.setEnabled(True) - self.lineEdit_isotopes.setGeometry(QtCore.QRect(200, 58, 91, 20)) - self.lineEdit_isotopes.setReadOnly(True) - self.lineEdit_isotopes.setObjectName("lineEdit_isotopes") - self.pushButton_isotopes = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) - self.pushButton_isotopes.setEnabled(True) - self.pushButton_isotopes.setGeometry(QtCore.QRect(300, 56, 71, 23)) - self.pushButton_isotopes.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_isotopes.setObjectName("pushButton_isotopes") - self.checkBox_multiple_charged = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) - self.checkBox_multiple_charged.setEnabled(True) - self.checkBox_multiple_charged.setGeometry(QtCore.QRect(390, 33, 171, 17)) - self.checkBox_multiple_charged.setChecked(True) - self.checkBox_multiple_charged.setObjectName("checkBox_multiple_charged") + self.gridLayout.addWidget(self.checkBox_adduct_library, 4, 0, 1, 1) self.checkBox_oligomers = QtWidgets.QCheckBox(self.groupBox_annotate_peak_patterns) self.checkBox_oligomers.setEnabled(True) - self.checkBox_oligomers.setGeometry(QtCore.QRect(580, 33, 171, 17)) - self.checkBox_oligomers.setChecked(True) + self.checkBox_oligomers.setChecked(False) self.checkBox_oligomers.setObjectName("checkBox_oligomers") + self.gridLayout.addWidget(self.checkBox_oligomers, 4, 7, 1, 1) + self.label_pp_ppm_tolerance = QtWidgets.QLabel(self.groupBox_annotate_peak_patterns) + self.label_pp_ppm_tolerance.setEnabled(True) + self.label_pp_ppm_tolerance.setObjectName("label_pp_ppm_tolerance") + self.gridLayout.addWidget(self.label_pp_ppm_tolerance, 10, 0, 1, 2) + self.lineEdit_neutral_losses = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) + self.lineEdit_neutral_losses.setEnabled(True) + self.lineEdit_neutral_losses.setMinimumSize(QtCore.QSize(120, 0)) + self.lineEdit_neutral_losses.setMaximumSize(QtCore.QSize(120, 16777215)) + self.lineEdit_neutral_losses.setReadOnly(True) + self.lineEdit_neutral_losses.setObjectName("lineEdit_neutral_losses") + self.gridLayout.addWidget(self.lineEdit_neutral_losses, 6, 5, 1, 1) self.spinBox_max_monomer_units = QtWidgets.QSpinBox(self.groupBox_annotate_peak_patterns) - self.spinBox_max_monomer_units.setGeometry(QtCore.QRect(670, 58, 51, 22)) self.spinBox_max_monomer_units.setMinimum(2) self.spinBox_max_monomer_units.setMaximum(1000000) self.spinBox_max_monomer_units.setProperty("value", 2) self.spinBox_max_monomer_units.setDisplayIntegerBase(10) self.spinBox_max_monomer_units.setObjectName("spinBox_max_monomer_units") - self.label_max_monomer_units = QtWidgets.QLabel(self.groupBox_annotate_peak_patterns) - self.label_max_monomer_units.setEnabled(True) - self.label_max_monomer_units.setGeometry(QtCore.QRect(580, 60, 81, 16)) - self.label_max_monomer_units.setObjectName("label_max_monomer_units") - self.label_pp_ppm_tolerance = QtWidgets.QLabel(self.groupBox_annotate_peak_patterns) - self.label_pp_ppm_tolerance.setEnabled(True) - self.label_pp_ppm_tolerance.setGeometry(QtCore.QRect(10, 90, 141, 16)) - self.label_pp_ppm_tolerance.setObjectName("label_pp_ppm_tolerance") + self.gridLayout.addWidget(self.spinBox_max_monomer_units, 6, 8, 1, 1) + self.lineEdit_adduct_library = QtWidgets.QLineEdit(self.groupBox_annotate_peak_patterns) + self.lineEdit_adduct_library.setEnabled(True) + self.lineEdit_adduct_library.setMinimumSize(QtCore.QSize(120, 0)) + self.lineEdit_adduct_library.setMaximumSize(QtCore.QSize(120, 16777215)) + self.lineEdit_adduct_library.setReadOnly(True) + self.lineEdit_adduct_library.setObjectName("lineEdit_adduct_library") + self.gridLayout.addWidget(self.lineEdit_adduct_library, 6, 0, 1, 1) + self.pushButton_adduct_library = QtWidgets.QPushButton(self.groupBox_annotate_peak_patterns) + self.pushButton_adduct_library.setEnabled(True) + self.pushButton_adduct_library.setMinimumSize(QtCore.QSize(95, 23)) + self.pushButton_adduct_library.setMaximumSize(QtCore.QSize(95, 16777215)) + self.pushButton_adduct_library.setObjectName("pushButton_adduct_library") + self.gridLayout.addWidget(self.pushButton_adduct_library, 6, 1, 1, 2) self.doubleSpinBox_pp_ppm_error = QtWidgets.QDoubleSpinBox(self.groupBox_annotate_peak_patterns) - self.doubleSpinBox_pp_ppm_error.setGeometry(QtCore.QRect(140, 88, 111, 22)) self.doubleSpinBox_pp_ppm_error.setMaximum(100000.0) self.doubleSpinBox_pp_ppm_error.setProperty("value", 5.0) self.doubleSpinBox_pp_ppm_error.setObjectName("doubleSpinBox_pp_ppm_error") - self.pushButton_start = QtWidgets.QPushButton(self.centralwidget) - self.pushButton_start.setGeometry(QtCore.QRect(640, 764, 71, 23)) - self.pushButton_start.setObjectName("pushButton_start") - self.pushButton_cancel = QtWidgets.QPushButton(self.centralwidget) - self.pushButton_cancel.setGeometry(QtCore.QRect(711, 764, 71, 23)) - self.pushButton_cancel.setObjectName("pushButton_cancel") - self.groupBox_annotate_molecular_formulae = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_annotate_molecular_formulae.setGeometry(QtCore.QRect(20, 383, 761, 101)) + self.gridLayout.addWidget(self.doubleSpinBox_pp_ppm_error, 10, 3, 1, 1) + self.verticalLayout_2.addWidget(self.groupBox_annotate_peak_patterns) + self.groupBox_annotate_molecular_formulae = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) self.groupBox_annotate_molecular_formulae.setTitle("") self.groupBox_annotate_molecular_formulae.setObjectName("groupBox_annotate_molecular_formulae") - self.label_filename_mf = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) - self.label_filename_mf.setEnabled(False) - self.label_filename_mf.setGeometry(QtCore.QRect(10, 65, 181, 16)) - self.label_filename_mf.setObjectName("label_filename_mf") + self.gridLayout_7 = QtWidgets.QGridLayout(self.groupBox_annotate_molecular_formulae) + self.gridLayout_7.setContentsMargins(10, 5, 10, 5) + self.gridLayout_7.setVerticalSpacing(5) + self.gridLayout_7.setObjectName("gridLayout_7") self.lineEdit_filename_mf = QtWidgets.QLineEdit(self.groupBox_annotate_molecular_formulae) self.lineEdit_filename_mf.setEnabled(False) - self.lineEdit_filename_mf.setGeometry(QtCore.QRect(120, 62, 161, 20)) self.lineEdit_filename_mf.setText("") self.lineEdit_filename_mf.setReadOnly(True) self.lineEdit_filename_mf.setObjectName("lineEdit_filename_mf") - self.checkBox_annotate_molecular_formulae = QtWidgets.QCheckBox(self.groupBox_annotate_molecular_formulae) - self.checkBox_annotate_molecular_formulae.setEnabled(True) - self.checkBox_annotate_molecular_formulae.setGeometry(QtCore.QRect(10, -1, 281, 17)) - font = QtGui.QFont() - font.setFamily(".SF NS Text") - font.setPointSize(13) - font.setWeight(75) - font.setBold(True) - self.checkBox_annotate_molecular_formulae.setFont(font) - self.checkBox_annotate_molecular_formulae.setChecked(True) - self.checkBox_annotate_molecular_formulae.setObjectName("checkBox_annotate_molecular_formulae") - self.comboBox_source_mf = QtWidgets.QComboBox(self.groupBox_annotate_molecular_formulae) - self.comboBox_source_mf.setGeometry(QtCore.QRect(120, 30, 251, 22)) - self.comboBox_source_mf.setObjectName("comboBox_source_mf") - self.comboBox_source_mf.addItem("") - self.comboBox_source_mf.addItem("") - self.label_source_mf = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) - self.label_source_mf.setEnabled(True) - self.label_source_mf.setGeometry(QtCore.QRect(10, 33, 101, 16)) - self.label_source_mf.setObjectName("label_source_mf") + self.gridLayout_7.addWidget(self.lineEdit_filename_mf, 2, 1, 1, 1) + self.doubleSpinBox_mf_ppm_error = QtWidgets.QDoubleSpinBox(self.groupBox_annotate_molecular_formulae) + self.doubleSpinBox_mf_ppm_error.setEnabled(False) + self.doubleSpinBox_mf_ppm_error.setMaximum(100000.0) + self.doubleSpinBox_mf_ppm_error.setProperty("value", 5.0) + self.doubleSpinBox_mf_ppm_error.setObjectName("doubleSpinBox_mf_ppm_error") + self.gridLayout_7.addWidget(self.doubleSpinBox_mf_ppm_error, 2, 4, 1, 1) self.pushButton_filename_mf = QtWidgets.QPushButton(self.groupBox_annotate_molecular_formulae) self.pushButton_filename_mf.setEnabled(False) - self.pushButton_filename_mf.setGeometry(QtCore.QRect(290, 60, 71, 23)) self.pushButton_filename_mf.setMinimumSize(QtCore.QSize(63, 23)) self.pushButton_filename_mf.setFlat(False) self.pushButton_filename_mf.setObjectName("pushButton_filename_mf") + self.gridLayout_7.addWidget(self.pushButton_filename_mf, 2, 2, 1, 1) self.label_max_mz = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) - self.label_max_mz.setEnabled(True) - self.label_max_mz.setGeometry(QtCore.QRect(390, 30, 121, 16)) + self.label_max_mz.setEnabled(False) self.label_max_mz.setObjectName("label_max_mz") + self.gridLayout_7.addWidget(self.label_max_mz, 1, 3, 1, 1) + self.checkBox_mf_pp_rules = QtWidgets.QCheckBox(self.groupBox_annotate_molecular_formulae) + self.checkBox_mf_pp_rules.setEnabled(False) + self.checkBox_mf_pp_rules.setChecked(True) + self.checkBox_mf_pp_rules.setObjectName("checkBox_mf_pp_rules") + self.gridLayout_7.addWidget(self.checkBox_mf_pp_rules, 1, 5, 1, 1) self.spinBox_max_mz = QtWidgets.QSpinBox(self.groupBox_annotate_molecular_formulae) - self.spinBox_max_mz.setGeometry(QtCore.QRect(490, 27, 111, 22)) + self.spinBox_max_mz.setEnabled(False) self.spinBox_max_mz.setMaximum(1000000) self.spinBox_max_mz.setProperty("value", 500) self.spinBox_max_mz.setDisplayIntegerBase(10) self.spinBox_max_mz.setObjectName("spinBox_max_mz") + self.gridLayout_7.addWidget(self.spinBox_max_mz, 1, 4, 1, 1) + self.label_mf_ppm_tolerance = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) + self.label_mf_ppm_tolerance.setEnabled(False) + self.label_mf_ppm_tolerance.setObjectName("label_mf_ppm_tolerance") + self.gridLayout_7.addWidget(self.label_mf_ppm_tolerance, 2, 3, 1, 1) + self.comboBox_source_mf = QtWidgets.QComboBox(self.groupBox_annotate_molecular_formulae) + self.comboBox_source_mf.setEnabled(False) + self.comboBox_source_mf.setObjectName("comboBox_source_mf") + self.comboBox_source_mf.addItem("") + self.comboBox_source_mf.addItem("") + self.gridLayout_7.addWidget(self.comboBox_source_mf, 1, 1, 1, 1) + self.label_filename_mf = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) + self.label_filename_mf.setEnabled(False) + self.label_filename_mf.setObjectName("label_filename_mf") + self.gridLayout_7.addWidget(self.label_filename_mf, 2, 0, 1, 1) self.checkBox_heuristic_rules = QtWidgets.QCheckBox(self.groupBox_annotate_molecular_formulae) - self.checkBox_heuristic_rules.setEnabled(True) - self.checkBox_heuristic_rules.setGeometry(QtCore.QRect(630, 30, 171, 17)) + self.checkBox_heuristic_rules.setEnabled(False) self.checkBox_heuristic_rules.setChecked(True) self.checkBox_heuristic_rules.setObjectName("checkBox_heuristic_rules") - self.label_cpds_ppm_tolerance_2 = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) - self.label_cpds_ppm_tolerance_2.setEnabled(True) - self.label_cpds_ppm_tolerance_2.setGeometry(QtCore.QRect(390, 65, 141, 16)) - self.label_cpds_ppm_tolerance_2.setObjectName("label_cpds_ppm_tolerance_2") - self.doubleSpinBox_cpds_ppm_error = QtWidgets.QDoubleSpinBox(self.groupBox_annotate_molecular_formulae) - self.doubleSpinBox_cpds_ppm_error.setGeometry(QtCore.QRect(520, 64, 111, 22)) - self.doubleSpinBox_cpds_ppm_error.setMaximum(100000.0) - self.doubleSpinBox_cpds_ppm_error.setProperty("value", 5.0) - self.doubleSpinBox_cpds_ppm_error.setObjectName("doubleSpinBox_cpds_ppm_error") - self.groupBox_annotate_compounds = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_annotate_compounds.setGeometry(QtCore.QRect(20, 492, 761, 151)) - self.groupBox_annotate_compounds.setTitle("") - self.groupBox_annotate_compounds.setObjectName("groupBox_annotate_compounds") - self.checkBox_annotate_compounds = QtWidgets.QCheckBox(self.groupBox_annotate_compounds) - self.checkBox_annotate_compounds.setEnabled(True) - self.checkBox_annotate_compounds.setGeometry(QtCore.QRect(10, -2, 351, 17)) + self.gridLayout_7.addWidget(self.checkBox_heuristic_rules, 2, 5, 1, 1) + self.label_source_mf = QtWidgets.QLabel(self.groupBox_annotate_molecular_formulae) + self.label_source_mf.setEnabled(False) + self.label_source_mf.setObjectName("label_source_mf") + self.gridLayout_7.addWidget(self.label_source_mf, 1, 0, 1, 1) + self.checkBox_annotate_molecular_formulae = QtWidgets.QCheckBox(self.groupBox_annotate_molecular_formulae) + self.checkBox_annotate_molecular_formulae.setEnabled(True) font = QtGui.QFont() font.setFamily(".SF NS Text") font.setPointSize(13) font.setWeight(75) font.setBold(True) - self.checkBox_annotate_compounds.setFont(font) - self.checkBox_annotate_compounds.setChecked(True) - self.checkBox_annotate_compounds.setObjectName("checkBox_annotate_compounds") + self.checkBox_annotate_molecular_formulae.setFont(font) + self.checkBox_annotate_molecular_formulae.setCheckable(True) + self.checkBox_annotate_molecular_formulae.setChecked(False) + self.checkBox_annotate_molecular_formulae.setObjectName("checkBox_annotate_molecular_formulae") + self.gridLayout_7.addWidget(self.checkBox_annotate_molecular_formulae, 0, 0, 1, 2) + self.verticalLayout_2.addWidget(self.groupBox_annotate_molecular_formulae) + self.groupBox_annotate_compounds = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) + self.groupBox_annotate_compounds.setTitle("") + self.groupBox_annotate_compounds.setObjectName("groupBox_annotate_compounds") + self.gridLayout_6 = QtWidgets.QGridLayout(self.groupBox_annotate_compounds) + self.gridLayout_6.setContentsMargins(10, 5, 10, 5) + self.gridLayout_6.setVerticalSpacing(5) + self.gridLayout_6.setObjectName("gridLayout_6") + self.pushButton_filename_reference = QtWidgets.QPushButton(self.groupBox_annotate_compounds) + self.pushButton_filename_reference.setEnabled(False) + self.pushButton_filename_reference.setMinimumSize(QtCore.QSize(63, 23)) + self.pushButton_filename_reference.setObjectName("pushButton_filename_reference") + self.gridLayout_6.addWidget(self.pushButton_filename_reference, 2, 2, 1, 1) self.lineEdit_filename_reference = QtWidgets.QLineEdit(self.groupBox_annotate_compounds) self.lineEdit_filename_reference.setEnabled(False) - self.lineEdit_filename_reference.setGeometry(QtCore.QRect(480, 70, 161, 20)) self.lineEdit_filename_reference.setText("") self.lineEdit_filename_reference.setReadOnly(True) self.lineEdit_filename_reference.setObjectName("lineEdit_filename_reference") - self.pushButton_filename_reference = QtWidgets.QPushButton(self.groupBox_annotate_compounds) - self.pushButton_filename_reference.setEnabled(False) - self.pushButton_filename_reference.setGeometry(QtCore.QRect(650, 69, 71, 23)) - self.pushButton_filename_reference.setMinimumSize(QtCore.QSize(63, 23)) - self.pushButton_filename_reference.setObjectName("pushButton_filename_reference") + self.gridLayout_6.addWidget(self.lineEdit_filename_reference, 2, 1, 1, 1) + self.checkBox_cpds_pp_rules = QtWidgets.QCheckBox(self.groupBox_annotate_compounds) + self.checkBox_cpds_pp_rules.setEnabled(True) + self.checkBox_cpds_pp_rules.setChecked(True) + self.checkBox_cpds_pp_rules.setObjectName("checkBox_cpds_pp_rules") + self.gridLayout_6.addWidget(self.checkBox_cpds_pp_rules, 3, 3, 1, 1) self.checkBox_filename_reference = QtWidgets.QCheckBox(self.groupBox_annotate_compounds) self.checkBox_filename_reference.setEnabled(True) - self.checkBox_filename_reference.setGeometry(QtCore.QRect(480, 50, 251, 17)) self.checkBox_filename_reference.setChecked(False) self.checkBox_filename_reference.setObjectName("checkBox_filename_reference") + self.gridLayout_6.addWidget(self.checkBox_filename_reference, 1, 1, 1, 1) self.listWidget_databases = QtWidgets.QListWidget(self.groupBox_annotate_compounds) - self.listWidget_databases.setGeometry(QtCore.QRect(10, 50, 451, 91)) + self.listWidget_databases.setMinimumSize(QtCore.QSize(510, 0)) + self.listWidget_databases.setMaximumSize(QtCore.QSize(16777215, 95)) self.listWidget_databases.setObjectName("listWidget_databases") - self.label_databases = QtWidgets.QLabel(self.groupBox_annotate_compounds) - self.label_databases.setGeometry(QtCore.QRect(10, 30, 71, 16)) - self.label_databases.setObjectName("label_databases") - self.label_mf_ppm_tolerance = QtWidgets.QLabel(self.groupBox_annotate_compounds) - self.label_mf_ppm_tolerance.setEnabled(True) - self.label_mf_ppm_tolerance.setGeometry(QtCore.QRect(480, 110, 141, 16)) - self.label_mf_ppm_tolerance.setObjectName("label_mf_ppm_tolerance") - self.doubleSpinBox_mf_ppm_error = QtWidgets.QDoubleSpinBox(self.groupBox_annotate_compounds) - self.doubleSpinBox_mf_ppm_error.setGeometry(QtCore.QRect(610, 108, 111, 22)) - self.doubleSpinBox_mf_ppm_error.setMaximum(100000.0) - self.doubleSpinBox_mf_ppm_error.setProperty("value", 5.0) - self.doubleSpinBox_mf_ppm_error.setObjectName("doubleSpinBox_mf_ppm_error") - self.groupBox_create_summary = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox_create_summary.setGeometry(QtCore.QRect(20, 657, 761, 101)) + self.gridLayout_6.addWidget(self.listWidget_databases, 1, 0, 4, 1) + self.doubleSpinBox_cpds_ppm_error = QtWidgets.QDoubleSpinBox(self.groupBox_annotate_compounds) + self.doubleSpinBox_cpds_ppm_error.setMaximum(100000.0) + self.doubleSpinBox_cpds_ppm_error.setProperty("value", 5.0) + self.doubleSpinBox_cpds_ppm_error.setObjectName("doubleSpinBox_cpds_ppm_error") + self.gridLayout_6.addWidget(self.doubleSpinBox_cpds_ppm_error, 3, 2, 1, 1) + self.label_cpds_ppm_tolerance = QtWidgets.QLabel(self.groupBox_annotate_compounds) + self.label_cpds_ppm_tolerance.setEnabled(True) + self.label_cpds_ppm_tolerance.setObjectName("label_cpds_ppm_tolerance") + self.gridLayout_6.addWidget(self.label_cpds_ppm_tolerance, 3, 1, 1, 1) + self.checkBox_annotate_compounds = QtWidgets.QCheckBox(self.groupBox_annotate_compounds) + self.checkBox_annotate_compounds.setEnabled(True) + font = QtGui.QFont() + font.setFamily(".SF NS Text") + font.setPointSize(13) + font.setWeight(75) + font.setBold(True) + self.checkBox_annotate_compounds.setFont(font) + self.checkBox_annotate_compounds.setChecked(True) + self.checkBox_annotate_compounds.setObjectName("checkBox_annotate_compounds") + self.gridLayout_6.addWidget(self.checkBox_annotate_compounds, 0, 0, 1, 1) + self.verticalLayout_2.addWidget(self.groupBox_annotate_compounds) + self.groupBox_create_summary = QtWidgets.QGroupBox(self.scrollAreaWidgetContents) self.groupBox_create_summary.setTitle("") self.groupBox_create_summary.setObjectName("groupBox_create_summary") - self.lineEdit_summary_filename = QtWidgets.QLineEdit(self.groupBox_create_summary) - self.lineEdit_summary_filename.setGeometry(QtCore.QRect(110, 31, 161, 20)) - self.lineEdit_summary_filename.setText("") - self.lineEdit_summary_filename.setReadOnly(True) - self.lineEdit_summary_filename.setObjectName("lineEdit_summary_filename") + self.gridLayout_2 = QtWidgets.QGridLayout(self.groupBox_create_summary) + self.gridLayout_2.setContentsMargins(10, 5, 10, 5) + self.gridLayout_2.setVerticalSpacing(5) + self.gridLayout_2.setObjectName("gridLayout_2") self.pushButton_summary_filename = QtWidgets.QPushButton(self.groupBox_create_summary) - self.pushButton_summary_filename.setGeometry(QtCore.QRect(280, 30, 71, 23)) self.pushButton_summary_filename.setMinimumSize(QtCore.QSize(63, 23)) self.pushButton_summary_filename.setObjectName("pushButton_summary_filename") - self.label_summary_filename = QtWidgets.QLabel(self.groupBox_create_summary) - self.label_summary_filename.setEnabled(True) - self.label_summary_filename.setGeometry(QtCore.QRect(12, 33, 111, 16)) - self.label_summary_filename.setObjectName("label_summary_filename") + self.gridLayout_2.addWidget(self.pushButton_summary_filename, 1, 2, 1, 1) self.spinBox_mz_digits = QtWidgets.QSpinBox(self.groupBox_create_summary) self.spinBox_mz_digits.setEnabled(False) - self.spinBox_mz_digits.setGeometry(QtCore.QRect(590, 61, 41, 22)) self.spinBox_mz_digits.setMaximum(1000000) self.spinBox_mz_digits.setProperty("value", 5) self.spinBox_mz_digits.setDisplayIntegerBase(10) self.spinBox_mz_digits.setObjectName("spinBox_mz_digits") - self.checkBox_mz_digits = QtWidgets.QCheckBox(self.groupBox_create_summary) - self.checkBox_mz_digits.setEnabled(True) - self.checkBox_mz_digits.setGeometry(QtCore.QRect(440, 62, 121, 20)) - self.checkBox_mz_digits.setChecked(False) - self.checkBox_mz_digits.setObjectName("checkBox_mz_digits") + self.gridLayout_2.addWidget(self.spinBox_mz_digits, 3, 5, 1, 1) + self.checkBox_convert_rt = QtWidgets.QCheckBox(self.groupBox_create_summary) + self.checkBox_convert_rt.setEnabled(True) + self.checkBox_convert_rt.setChecked(False) + self.checkBox_convert_rt.setObjectName("checkBox_convert_rt") + self.gridLayout_2.addWidget(self.checkBox_convert_rt, 3, 6, 1, 1) + self.comboBox_annotations_format = QtWidgets.QComboBox(self.groupBox_create_summary) + self.comboBox_annotations_format.setObjectName("comboBox_annotations_format") + self.comboBox_annotations_format.addItem("") + self.comboBox_annotations_format.addItem("") + self.comboBox_annotations_format.addItem("") + self.gridLayout_2.addWidget(self.comboBox_annotations_format, 3, 1, 1, 1) + self.label_separator = QtWidgets.QLabel(self.groupBox_create_summary) + self.label_separator.setEnabled(True) + self.label_separator.setObjectName("label_separator") + self.gridLayout_2.addWidget(self.label_separator, 1, 3, 1, 1) + self.lineEdit_summary_filename = QtWidgets.QLineEdit(self.groupBox_create_summary) + self.lineEdit_summary_filename.setText("") + self.lineEdit_summary_filename.setReadOnly(True) + self.lineEdit_summary_filename.setObjectName("lineEdit_summary_filename") + self.gridLayout_2.addWidget(self.lineEdit_summary_filename, 1, 1, 1, 1) + self.comboBox_convert_rt = QtWidgets.QComboBox(self.groupBox_create_summary) + self.comboBox_convert_rt.setEnabled(False) + self.comboBox_convert_rt.setObjectName("comboBox_convert_rt") + self.comboBox_convert_rt.addItem("") + self.comboBox_convert_rt.addItem("") + self.gridLayout_2.addWidget(self.comboBox_convert_rt, 3, 7, 1, 1) self.checkBox_create_summary = QtWidgets.QCheckBox(self.groupBox_create_summary) self.checkBox_create_summary.setEnabled(True) - self.checkBox_create_summary.setGeometry(QtCore.QRect(10, -1, 261, 17)) font = QtGui.QFont() font.setFamily(".SF NS Text") font.setPointSize(13) @@ -433,127 +525,115 @@ def setupUi(self, MainWindow): self.checkBox_create_summary.setFont(font) self.checkBox_create_summary.setChecked(True) self.checkBox_create_summary.setObjectName("checkBox_create_summary") - self.comboBox_annotations_format = QtWidgets.QComboBox(self.groupBox_create_summary) - self.comboBox_annotations_format.setGeometry(QtCore.QRect(460, 30, 281, 22)) - self.comboBox_annotations_format.setObjectName("comboBox_annotations_format") - self.comboBox_annotations_format.addItem("") - self.comboBox_annotations_format.addItem("") - self.comboBox_annotations_format.addItem("") - self.label_annotations_format = QtWidgets.QLabel(self.groupBox_create_summary) - self.label_annotations_format.setEnabled(True) - self.label_annotations_format.setGeometry(QtCore.QRect(372, 33, 111, 16)) - self.label_annotations_format.setObjectName("label_annotations_format") - self.comboBox_convert_rt = QtWidgets.QComboBox(self.centralwidget) - self.comboBox_convert_rt.setEnabled(False) - self.comboBox_convert_rt.setGeometry(QtCore.QRect(300, 720, 121, 22)) - self.comboBox_convert_rt.setObjectName("comboBox_convert_rt") - self.comboBox_convert_rt.addItem("") - self.comboBox_convert_rt.addItem("") - self.label_separator = QtWidgets.QLabel(self.centralwidget) - self.label_separator.setEnabled(True) - self.label_separator.setGeometry(QtCore.QRect(32, 720, 101, 16)) - self.label_separator.setObjectName("label_separator") - self.comboBox_separator = QtWidgets.QComboBox(self.centralwidget) - self.comboBox_separator.setGeometry(QtCore.QRect(100, 718, 71, 22)) + self.gridLayout_2.addWidget(self.checkBox_create_summary, 0, 0, 1, 2) + self.checkBox_mz_digits = QtWidgets.QCheckBox(self.groupBox_create_summary) + self.checkBox_mz_digits.setEnabled(True) + self.checkBox_mz_digits.setChecked(False) + self.checkBox_mz_digits.setObjectName("checkBox_mz_digits") + self.gridLayout_2.addWidget(self.checkBox_mz_digits, 3, 2, 1, 2) + self.comboBox_separator = QtWidgets.QComboBox(self.groupBox_create_summary) self.comboBox_separator.setObjectName("comboBox_separator") self.comboBox_separator.addItem("") self.comboBox_separator.addItem("") - self.checkBox_convert_rt = QtWidgets.QCheckBox(self.centralwidget) - self.checkBox_convert_rt.setEnabled(True) - self.checkBox_convert_rt.setGeometry(QtCore.QRect(210, 720, 91, 20)) - self.checkBox_convert_rt.setChecked(False) - self.checkBox_convert_rt.setObjectName("checkBox_convert_rt") + self.gridLayout_2.addWidget(self.comboBox_separator, 1, 5, 1, 1) + self.label_summary_filename = QtWidgets.QLabel(self.groupBox_create_summary) + self.label_summary_filename.setEnabled(True) + self.label_summary_filename.setObjectName("label_summary_filename") + self.gridLayout_2.addWidget(self.label_summary_filename, 1, 0, 1, 1) + self.label_annotations_format = QtWidgets.QLabel(self.groupBox_create_summary) + self.label_annotations_format.setEnabled(True) + self.label_annotations_format.setObjectName("label_annotations_format") + self.gridLayout_2.addWidget(self.label_annotations_format, 3, 0, 1, 1) + self.verticalLayout_2.addWidget(self.groupBox_create_summary) + self.scrollArea.setWidget(self.scrollAreaWidgetContents) + self.pushButton_start = QtWidgets.QPushButton(self.centralwidget) + self.pushButton_start.setGeometry(QtCore.QRect(860, 740, 100, 32)) + self.pushButton_start.setObjectName("pushButton_start") + self.pushButton_cancel = QtWidgets.QPushButton(self.centralwidget) + self.pushButton_cancel.setGeometry(QtCore.QRect(760, 740, 100, 32)) + self.pushButton_cancel.setMaximumSize(QtCore.QSize(16777204, 16777215)) + self.pushButton_cancel.setObjectName("pushButton_cancel") MainWindow.setCentralWidget(self.centralwidget) - self.menubar = QtWidgets.QMenuBar(MainWindow) - self.menubar.setGeometry(QtCore.QRect(0, 0, 795, 22)) - self.menubar.setObjectName("menubar") - self.menuAdd_example_data = QtWidgets.QMenu(self.menubar) - self.menuAdd_example_data.setObjectName("menuAdd_example_data") - MainWindow.setMenuBar(self.menubar) - self.statusbar = QtWidgets.QStatusBar(MainWindow) - self.statusbar.setObjectName("statusbar") - MainWindow.setStatusBar(self.statusbar) self.actionExampleData = QtWidgets.QAction(MainWindow) self.actionExampleData.setObjectName("actionExampleData") self.actionAbout = QtWidgets.QAction(MainWindow) self.actionAbout.setObjectName("actionAbout") - self.menuAdd_example_data.addAction(self.actionExampleData) - self.menuAdd_example_data.addAction(self.actionAbout) - self.menubar.addAction(self.menuAdd_example_data.menuAction()) self.retranslateUi(MainWindow) QtCore.QMetaObject.connectSlotsByName(MainWindow) def retranslateUi(self, MainWindow): - MainWindow.setWindowTitle(QtWidgets.QApplication.translate("MainWindow", "BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry", None, -1)) + MainWindow.setWindowTitle(QtWidgets.QApplication.translate("MainWindow", "BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry (Python package)", None, -1)) self.pushButton_peaklist.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) - self.label_peaklist.setText(QtWidgets.QApplication.translate("MainWindow", "Peaklist:", None, -1)) + self.pushButton_graph.setText(QtWidgets.QApplication.translate("MainWindow", "Save as...", None, -1)) + self.pushButton_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) + self.label_graph.setText(QtWidgets.QApplication.translate("MainWindow", "Graph:", None, -1)) + self.pushButton_sql_database.setText(QtWidgets.QApplication.translate("MainWindow", "Save as...", None, -1)) + self.pushButton_wd.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) + self.label_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Adduct library:", None, -1)) self.label_intensity_matrix.setText(QtWidgets.QApplication.translate("MainWindow", "Intensity matrix:", None, -1)) + self.lineEdit_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) + self.pushButton_peak_matrix.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) + self.label_ion_mode.setText(QtWidgets.QApplication.translate("MainWindow", "Ion mode:", None, -1)) self.comboBox_ion_mode.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "Positive", None, -1)) self.comboBox_ion_mode.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Negative", None, -1)) - self.label_ion_mode.setText(QtWidgets.QApplication.translate("MainWindow", "Ion mode:", None, -1)) - self.label_data_files.setText(QtWidgets.QApplication.translate("MainWindow", "Data Files & General Settings", None, -1)) - self.lineEdit_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) - self.pushButton_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) self.label_sql_database.setText(QtWidgets.QApplication.translate("MainWindow", "Database:", None, -1)) - self.pushButton_sql_database.setText(QtWidgets.QApplication.translate("MainWindow", "Save as...", None, -1)) - self.pushButton_graph.setText(QtWidgets.QApplication.translate("MainWindow", "Save as...", None, -1)) - self.label_graph.setText(QtWidgets.QApplication.translate("MainWindow", "Graph:", None, -1)) - self.pushButton_peak_matrix.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) - self.label_default_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Adduct library:", None, -1)) - self.label_max_rt.setText(QtWidgets.QApplication.translate("MainWindow", "Maximum RT difference (sec):", None, -1)) + self.label_peaklist.setText(QtWidgets.QApplication.translate("MainWindow", "Peaklist:", None, -1)) + self.label_wd.setText(QtWidgets.QApplication.translate("MainWindow", "Working directory:", None, -1)) + self.label_data_files.setText(QtWidgets.QApplication.translate("MainWindow", "Data Files & General Settings", None, -1)) + self.label_grouping_method.setText(QtWidgets.QApplication.translate("MainWindow", "Grouping method:", None, -1)) self.checkBox_group_features.setText(QtWidgets.QApplication.translate("MainWindow", "Group Features", None, -1)) - self.comboBox_grouping_method.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "Pearson correlation", None, -1)) - self.comboBox_grouping_method.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Spearman-rank correlation", None, -1)) self.label_tool_p_value.setText(QtWidgets.QApplication.translate("MainWindow", "P-value threshold:", None, -1)) - self.label_grouping_method.setText(QtWidgets.QApplication.translate("MainWindow", "Grouping method:", None, -1)) + self.label_max_rt.setText(QtWidgets.QApplication.translate("MainWindow", "Maximum RT difference (sec):", None, -1)) self.label_tool_coefficient.setText(QtWidgets.QApplication.translate("MainWindow", "Coefficent threshold:", None, -1)) + self.comboBox_grouping_method.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "Pearson correlation", None, -1)) + self.comboBox_grouping_method.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Spearman-rank correlation", None, -1)) self.label_grouping_ncpus.setText(QtWidgets.QApplication.translate("MainWindow", "cpus:", None, -1)) self.label_grouping_block.setText(QtWidgets.QApplication.translate("MainWindow", "Block size:", None, -1)) - self.lineEdit_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) - self.pushButton_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) - self.checkBox_annotate_peak_patterns.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Peak Patterns", None, -1)) - self.checkBox_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Adducts", None, -1)) + self.pushButton_isotopes.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) + self.label_max_monomer_units.setText(QtWidgets.QApplication.translate("MainWindow", "Monomer Units:", None, -1)) + self.checkBox_neutral_losses.setText(QtWidgets.QApplication.translate("MainWindow", "Neutral losses", None, -1)) + self.pushButton_neutral_losses.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) self.checkBox_isotopes.setText(QtWidgets.QApplication.translate("MainWindow", "Isotopes", None, -1)) - self.pushButton_multiple_charged.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) - self.lineEdit_multiple_charged.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) self.lineEdit_isotopes.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) - self.pushButton_isotopes.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) - self.checkBox_multiple_charged.setText(QtWidgets.QApplication.translate("MainWindow", "Multiple charged ions", None, -1)) + self.checkBox_annotate_peak_patterns.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Peak Patterns", None, -1)) + self.checkBox_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Adducts", None, -1)) self.checkBox_oligomers.setText(QtWidgets.QApplication.translate("MainWindow", "Oligomers", None, -1)) - self.label_max_monomer_units.setText(QtWidgets.QApplication.translate("MainWindow", "Monomer Units:", None, -1)) self.label_pp_ppm_tolerance.setText(QtWidgets.QApplication.translate("MainWindow", "Mass tolerance (ppm):", None, -1)) - self.pushButton_start.setText(QtWidgets.QApplication.translate("MainWindow", "Start", None, -1)) - self.pushButton_cancel.setText(QtWidgets.QApplication.translate("MainWindow", "Cancel", None, -1)) - self.label_filename_mf.setText(QtWidgets.QApplication.translate("MainWindow", "Reference file:", None, -1)) - self.checkBox_annotate_molecular_formulae.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Molecular Formulae", None, -1)) - self.comboBox_source_mf.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "http://mfdb.bham.ac.uk", None, -1)) - self.comboBox_source_mf.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Tab-delimited text file", None, -1)) - self.label_source_mf.setText(QtWidgets.QApplication.translate("MainWindow", "Source:", None, -1)) + self.lineEdit_neutral_losses.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) + self.lineEdit_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Use default", None, -1)) + self.pushButton_adduct_library.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) self.pushButton_filename_mf.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) self.label_max_mz.setText(QtWidgets.QApplication.translate("MainWindow", "Maximum m/z:", None, -1)) + self.checkBox_mf_pp_rules.setText(QtWidgets.QApplication.translate("MainWindow", "use peak patterns", None, -1)) + self.label_mf_ppm_tolerance.setText(QtWidgets.QApplication.translate("MainWindow", "Mass tolerance (ppm):", None, -1)) + self.comboBox_source_mf.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "https://mfdb.bham.ac.uk", None, -1)) + self.comboBox_source_mf.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Tab-delimited text file", None, -1)) + self.label_filename_mf.setText(QtWidgets.QApplication.translate("MainWindow", "Reference file:", None, -1)) self.checkBox_heuristic_rules.setText(QtWidgets.QApplication.translate("MainWindow", "Heuristic rules", None, -1)) - self.label_cpds_ppm_tolerance_2.setText(QtWidgets.QApplication.translate("MainWindow", "Mass tolerance (ppm):", None, -1)) - self.checkBox_annotate_compounds.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Compounds / Metabolites", None, -1)) + self.label_source_mf.setText(QtWidgets.QApplication.translate("MainWindow", "Source:", None, -1)) + self.checkBox_annotate_molecular_formulae.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Molecular Formulae", None, -1)) self.pushButton_filename_reference.setText(QtWidgets.QApplication.translate("MainWindow", "Browse...", None, -1)) + self.checkBox_cpds_pp_rules.setText(QtWidgets.QApplication.translate("MainWindow", "use peak patterns", None, -1)) self.checkBox_filename_reference.setText(QtWidgets.QApplication.translate("MainWindow", "Reference file", None, -1)) - self.label_databases.setText(QtWidgets.QApplication.translate("MainWindow", "Databases", None, -1)) - self.label_mf_ppm_tolerance.setText(QtWidgets.QApplication.translate("MainWindow", "Mass tolerance (ppm):", None, -1)) + self.label_cpds_ppm_tolerance.setText(QtWidgets.QApplication.translate("MainWindow", "Mass tolerance (ppm):", None, -1)) + self.checkBox_annotate_compounds.setText(QtWidgets.QApplication.translate("MainWindow", "Annotate Compounds / Metabolites", None, -1)) self.pushButton_summary_filename.setText(QtWidgets.QApplication.translate("MainWindow", "Save as...", None, -1)) - self.label_summary_filename.setText(QtWidgets.QApplication.translate("MainWindow", "Summary:", None, -1)) - self.checkBox_mz_digits.setText(QtWidgets.QApplication.translate("MainWindow", "Number of digits m/z:", None, -1)) - self.checkBox_create_summary.setText(QtWidgets.QApplication.translate("MainWindow", "Create summary", None, -1)) + self.checkBox_convert_rt.setText(QtWidgets.QApplication.translate("MainWindow", "Convert RT:", None, -1)) self.comboBox_annotations_format.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "Multiple rows for each feature", None, -1)) self.comboBox_annotations_format.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Single row for each feature and separate columns", None, -1)) self.comboBox_annotations_format.setItemText(2, QtWidgets.QApplication.translate("MainWindow", "Single row for each feature and merged columns", None, -1)) - self.label_annotations_format.setText(QtWidgets.QApplication.translate("MainWindow", "Annotations:", None, -1)) + self.label_separator.setText(QtWidgets.QApplication.translate("MainWindow", "Separator:", None, -1)) self.comboBox_convert_rt.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "Minutes", None, -1)) self.comboBox_convert_rt.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "Seconds", None, -1)) - self.label_separator.setText(QtWidgets.QApplication.translate("MainWindow", "Separator:", None, -1)) + self.checkBox_create_summary.setText(QtWidgets.QApplication.translate("MainWindow", "Create summary", None, -1)) + self.checkBox_mz_digits.setText(QtWidgets.QApplication.translate("MainWindow", "Number of digits m/z:", None, -1)) self.comboBox_separator.setItemText(0, QtWidgets.QApplication.translate("MainWindow", "tab", None, -1)) self.comboBox_separator.setItemText(1, QtWidgets.QApplication.translate("MainWindow", "comma", None, -1)) - self.checkBox_convert_rt.setText(QtWidgets.QApplication.translate("MainWindow", "Convert RT:", None, -1)) - self.menuAdd_example_data.setTitle(QtWidgets.QApplication.translate("MainWindow", "Help", None, -1)) + self.label_summary_filename.setText(QtWidgets.QApplication.translate("MainWindow", "Summary:", None, -1)) + self.label_annotations_format.setText(QtWidgets.QApplication.translate("MainWindow", "Annotations:", None, -1)) + self.pushButton_start.setText(QtWidgets.QApplication.translate("MainWindow", "Start", None, -1)) + self.pushButton_cancel.setText(QtWidgets.QApplication.translate("MainWindow", "Cancel", None, -1)) self.actionExampleData.setText(QtWidgets.QApplication.translate("MainWindow", "Add example data", None, -1)) self.actionAbout.setText(QtWidgets.QApplication.translate("MainWindow", "About", None, -1)) diff --git a/beamspy/qt/form.ui b/beamspy/qt/form.ui index a4f70fe..1ce4377 100644 --- a/beamspy/qt/form.ui +++ b/beamspy/qt/form.ui @@ -9,8 +9,8 @@ 0 0 - 795 - 830 + 974 + 775 @@ -20,1631 +20,1482 @@ false - + - 20 - 8 - 761 - 131 + 6 + 5 + 961 + 731 - - - - - - - 120 - 60 - 161 - 20 - - - - - - - true - - - - - true - - - - 120 - 30 - 161 - 20 - - - - - - - true - - - - - true - - - - 290 - 28 - 71 - 23 - - - - - 63 - 23 - - - - - 81 - 16777215 - - - - Browse... - - - - - true - - - - 12 - 30 - 81 - 16 - - - - Peaklist: - - - - - - 12 - 60 - 111 - 16 - - - - Intensity matrix: - - - - - - 500 - 58 - 111 - 22 - - - - - Positive - - - - - Negative - - - - - - - 400 - 60 - 71 - 16 - - - - Ion mode: - - - - - true - - - - 10 - 0 - 291 - 16 - - - - - .SF NS Text - 13 - 75 - true - - - - Data Files & General Settings - - - - - true - - - - 500 - 88 - 111 - 20 - - - - Use default - - - true - - - - - true - - - - 620 - 86 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - - - true - - - - 12 - 92 - 121 - 16 - - - - Database: - - - - - true - - - - 120 - 92 - 161 - 20 - - - - - - - true - - - - - true - - - - 290 - 90 - 71 - 23 - - - - - 63 - 23 - - - - - 81 - 16777215 - - - - Save as... - - - - - true - - - - 498 - 30 - 161 - 20 - - - - - - - true - - - - - true - - - - 668 - 28 - 71 - 23 - - - - - 63 - 23 - - - - - 81 - 16777215 - - - - Save as... - - - - - true - - - - 400 - 30 - 111 - 16 - - - - Graph: - - - - - true - - - - 290 - 56 - 71 - 23 - - - - - 63 - 23 - - - - - 81 - 16777215 - - - - Browse... - - - - - true - - - - 400 - 90 - 141 - 16 - - - - Adduct library: - - - - - - - 20 - 150 - 761 - 91 - - - - - - - - true - - - - 10 - 33 - 191 - 16 - - - - Maximum RT difference (sec): - - - - - true - - - - 10 - -1 - 231 - 17 - - - - - .SF NS Text - 13 - 75 - true - - - - Group Features - - - true - - - - - - 190 - 30 - 81 - 22 - - - - 9999999.000000000000000 - - - 5.000000000000000 - - - - - - 410 - 31 - 151 - 22 - - - - - Pearson correlation - - - - - Spearman-rank correlation - - - - - - - 410 - 57 - 151 - 22 - - - - 10 - - - 1.000000000000000 - - - 0.010000000000000 - - - 0.010000000000000 - - - - - true - - - - 303 - 60 - 121 - 16 - - - - P-value threshold: - - - - - true - - - - 303 - 33 - 121 - 16 - - - - Grouping method: - - - - - - 190 - 57 - 81 - 22 - - - - 1.000000000000000 - - - 0.100000000000000 - - - 0.700000000000000 - - - - - true - - - - 10 - 60 - 181 - 16 - - - - Coefficent threshold: - - - - - - 666 - 57 - 81 - 22 - - - - 0 - - - 100000000.000000000000000 - - - 1000.000000000000000 - - - 5000.000000000000000 - - - - - - 666 - 30 - 81 - 22 - - - - 0 - - - 10000.000000000000000 - - - 1.000000000000000 - - - 1.000000000000000 - - - - - true - - - - 596 - 33 - 61 - 16 - - - - cpus: - - - - - true - - - - 596 - 59 - 61 - 16 - - - - Block size: - - - - - - - 20 - 250 - 761 - 121 - - - - - - - - true - - - - 10 - 58 - 91 - 20 - - - - Use default - - - true - - - - - true - - - - 110 - 56 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - - - true - - - - 10 - -1 - 271 - 17 - - - - - .SF NS Text - 13 - 75 - true - - - - Annotate Peak Patterns - - - true - - - - - true - - - - 10 - 33 - 181 - 17 - - - - Adducts - - - true - - - - - true - - - - 200 - 33 - 171 - 17 - - - - Isotopes - - - true - - - - - true - - - - 490 - 56 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - - - true - - - - 390 - 58 - 91 - 20 - - - - Use default - - - true - - - - - true - - - - 200 - 58 - 91 - 20 - - - - Use default - - - true - - - - - true - - - - 300 - 56 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - - - true - - - - 390 - 33 - 171 - 17 - - - - Multiple charged ions - - - true - - - - - true - - - - 580 - 33 - 171 - 17 - - - - Oligomers - - - true - - - - - - 670 - 58 - 51 - 22 - - - - 2 - - - 1000000 - - - 2 - - - 10 - - - - - true - - - - 580 - 60 - 81 - 16 - - - - Monomer Units: - - - - - true - - - - 10 - 90 - 141 - 16 - - - - Mass tolerance (ppm): - - - - - - 140 - 88 - 111 - 22 - - - - 100000.000000000000000 - - - 5.000000000000000 - - - - - - - 640 - 764 - 71 - 23 - - - - Start - - - - - - 711 - 764 - 71 - 23 - - - - Cancel - - - - - - 20 - 383 - 761 - 101 - - - - - - - - false - - - - 10 - 65 - 181 - 16 - - - - Reference file: - - - - - false - - - - 120 - 62 - 161 - 20 - - - - - - - true - - - - - true - - - - 10 - -1 - 281 - 17 - - - - - .SF NS Text - 13 - 75 - true - - - - Annotate Molecular Formulae - - - true - - - - - - 120 - 30 - 251 - 22 - - - - - http://mfdb.bham.ac.uk - - - - - Tab-delimited text file - - - - - - true - - - - 10 - 33 - 101 - 16 - - - - Source: - - - - - false - - - - 290 - 60 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - false - - - - - true - - - - 390 - 30 - 121 - 16 - - - - Maximum m/z: - - - - - - 490 - 27 - 111 - 22 - - - - 1000000 - - - 500 - - - 10 - - - - - true - - - - 630 - 30 - 171 - 17 - - - - Heuristic rules - - - true - - - - - true - - - - 390 - 65 - 141 - 16 - - - - Mass tolerance (ppm): - - - - - - 520 - 64 - 111 - 22 - - - - 100000.000000000000000 - - - 5.000000000000000 - - - - - - - 20 - 492 - 761 - 151 - - - - - - - - true - - - - 10 - -2 - 351 - 17 - - - - - .SF NS Text - 13 - 75 - true - - - - Annotate Compounds / Metabolites - - - true - - - - - false - - - - 480 - 70 - 161 - 20 - - - - - - - true - - - - - false - - - - 650 - 69 - 71 - 23 - - - - - 63 - 23 - - - - Browse... - - - - - true - - - - 480 - 50 - 251 - 17 - - - - Reference file - - - false - - - - - - 10 - 50 - 451 - 91 - - - - - - - 10 - 30 - 71 - 16 - - - - Databases - - - - - true - - - - 480 - 110 - 141 - 16 - - - - Mass tolerance (ppm): - - - - - - 610 - 108 - 111 - 22 - - - - 100000.000000000000000 - - - 5.000000000000000 - - - - - - - 20 - 657 - 761 - 101 - - - - + + true - - - - 110 - 31 - 161 - 20 - - - - - - - true - - - - - - 280 - 30 - 71 - 23 - - - - - 63 - 23 - - - - Save as... - - - - - true - - - - 12 - 33 - 111 - 16 - - - - Summary: - - - - - false - - - - 590 - 61 - 41 - 22 - - - - 1000000 - - - 5 - - - 10 - - - - - true - - - - 440 - 62 - 121 - 20 - - - - Number of digits m/z: - - - false - - - - - true - - - - 10 - -1 - 261 - 17 - - - - - .SF NS Text - 13 - 75 - true - - - - Create summary - - - true - - - - - - 460 - 30 - 281 - 22 - - - - - Multiple rows for each feature - - - - - Single row for each feature and separate columns - - - - - Single row for each feature and merged columns - - - - - - true - + - 372 - 33 - 111 - 16 - - - - Annotations: - + 0 + 0 + 959 + 729 + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + 5 + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Save as... + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + + 150 + 0 + + + + + 150 + 16777215 + + + + + + + true + + + + + + + true + + + + 150 + 0 + + + + + 150 + 16777215 + + + + + + + true + + + + + + + true + + + Graph: + + + + + + + true + + + + 150 + 0 + + + + + 150 + 16777215 + + + + + + + true + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Save as... + + + + + + + true + + + + 150 + 0 + + + + + 150 + 16777215 + + + + + + + true + + + + + + + true + + + + 150 + 0 + + + + + 150 + 16777215 + + + + + + + true + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + true + + + Adduct library: + + + + + + + Intensity matrix: + + + + + + + true + + + + 150 + 0 + + + + + 150 + 16777215 + + + + Use default + + + true + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + + 75 + 16777215 + + + + Ion mode: + + + + + + + + 100 + 0 + + + + + 100 + 16777215 + + + + + Positive + + + + + Negative + + + + + + + + true + + + Database: + + + + + + + true + + + Peaklist: + + + + + + + true + + + Working directory: + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Data Files & General Settings + + + + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + -1 + + + 5 + + + + + 0 + + + 100000000.000000000000000 + + + 1000.000000000000000 + + + 5000.000000000000000 + + + + + + + 0 + + + 10000.000000000000000 + + + 1.000000000000000 + + + 1.000000000000000 + + + + + + + true + + + Grouping method: + + + + + + + 1.000000000000000 + + + 0.100000000000000 + + + 0.700000000000000 + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Group Features + + + true + + + + + + + true + + + P-value threshold: + + + + + + + true + + + Maximum RT difference (sec): + + + + + + + 9999999.000000000000000 + + + 5.000000000000000 + + + + + + + 10 + + + 1.000000000000000 + + + 0.010000000000000 + + + 0.010000000000000 + + + + + + + true + + + Coefficent threshold: + + + + + + + + Pearson correlation + + + + + Spearman-rank correlation + + + + + + + + true + + + cpus: + + + + + + + true + + + Block size: + + + + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + 5 + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + true + + + + 110 + 0 + + + + Monomer Units: + + + + + + + true + + + Neutral losses + + + true + + + + + + + true + + + + 63 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + true + + + Isotopes + + + true + + + + + + + true + + + + 120 + 0 + + + + + 120 + 16777215 + + + + Use default + + + true + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Annotate Peak Patterns + + + true + + + + + + + true + + + Adducts + + + true + + + + + + + true + + + Oligomers + + + false + + + + + + + true + + + Mass tolerance (ppm): + + + + + + + true + + + + 120 + 0 + + + + + 120 + 16777215 + + + + Use default + + + true + + + + + + + 2 + + + 1000000 + + + 2 + + + 10 + + + + + + + true + + + + 120 + 0 + + + + + 120 + 16777215 + + + + Use default + + + true + + + + + + + true + + + + 95 + 23 + + + + + 95 + 16777215 + + + + Browse... + + + + + + + 100000.000000000000000 + + + 5.000000000000000 + + + + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + 5 + + + + + false + + + + + + true + + + + + + + false + + + 100000.000000000000000 + + + 5.000000000000000 + + + + + + + false + + + + 63 + 23 + + + + Browse... + + + false + + + + + + + false + + + Maximum m/z: + + + + + + + false + + + use peak patterns + + + true + + + + + + + false + + + 1000000 + + + 500 + + + 10 + + + + + + + false + + + Mass tolerance (ppm): + + + + + + + false + + + + https://mfdb.bham.ac.uk + + + + + Tab-delimited text file + + + + + + + + false + + + Reference file: + + + + + + + false + + + Heuristic rules + + + true + + + + + + + false + + + Source: + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Annotate Molecular Formulae + + + true + + + false + + + + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + 5 + + + + + false + + + + 63 + 23 + + + + Browse... + + + + + + + false + + + + + + true + + + + + + + true + + + use peak patterns + + + true + + + + + + + true + + + Reference file + + + false + + + + + + + + 510 + 0 + + + + + 16777215 + 95 + + + + + + + + 100000.000000000000000 + + + 5.000000000000000 + + + + + + + true + + + Mass tolerance (ppm): + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Annotate Compounds / Metabolites + + + true + + + + + + + + + + + + + + 10 + + + 5 + + + 10 + + + 5 + + + 5 + + + + + + 63 + 23 + + + + Save as... + + + + + + + false + + + 1000000 + + + 5 + + + 10 + + + + + + + true + + + Convert RT: + + + false + + + + + + + + Multiple rows for each feature + + + + + Single row for each feature and separate columns + + + + + Single row for each feature and merged columns + + + + + + + + true + + + Separator: + + + + + + + + + + true + + + + + + + false + + + + Minutes + + + + + Seconds + + + + + + + + true + + + + .SF NS Text + 13 + 75 + true + + + + Create summary + + + true + + + + + + + true + + + Number of digits m/z: + + + false + + + + + + + + tab + + + + + comma + + + + + + + + true + + + Summary: + + + + + + + true + + + Annotations: + + + + + + + - - - false - - - - 300 - 720 - 121 - 22 - - - - - Minutes - - - - - Seconds - - - - - - true - + - 32 - 720 - 101 - 16 + 860 + 740 + 100 + 32 - Separator: + Start - + - 100 - 718 - 71 - 22 + 760 + 740 + 100 + 32 - - - tab - - - - - comma - - - - - - true - - - - 210 - 720 - 91 - 20 - + + + 16777204 + 16777215 + - Convert RT: - - - false - - - groupBox_annotate_peak_patterns - groupBox_general - groupBox_group_features - pushButton_start - pushButton_cancel - groupBox_annotate_molecular_formulae - groupBox_annotate_compounds - groupBox_create_summary - comboBox_convert_rt - label_separator - comboBox_separator - checkBox_convert_rt - - - - - 0 - 0 - 795 - 22 - - - - - Help + Cancel - - - - Add example data diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..77b3f17 --- /dev/null +++ b/environment.yml @@ -0,0 +1,17 @@ +name: beamspy +channels: + - conda-forge + - bioconda +dependencies: + - numpy + - scipy + - requests + - networkx<=2.5 + - pandas + - matplotlib + - seaborn + - pyteomics<=4.4.1 + - biopython<=1.78 + - pyside2=5.13.1 + - tqdm + diff --git a/examples/run.bat b/examples/run.bat deleted file mode 100644 index 669a585..0000000 --- a/examples/run.bat +++ /dev/null @@ -1,46 +0,0 @@ -beamspy group-features^ - --peaklist "C:\beams\tests\test_data\variableMetadata.txt"^ - --intensity-matrix "C:\beams\tests\test_data\dataMatrix.txt"^ - --gml "C:\beams\tests\test_results\graph.gml"^ - --db "C:\beams\tests\test_results\results.sqlite"^ - --max-rt-diff 5.0^ - --method pearson^ - --coeff-threshold 0.7^ - --pvalue-threshold 0.01 - -beamspy annotate-peak-patterns^ - --peaklist "C:\beams\tests\test_data\variableMetadata.txt"^ - --intensity-matrix "C:\beams\tests\test_data\dataMatrix.txt"^ - --gml "C:\beams\tests\test_results\graph.gml"^ - --db "C:\beams\tests\test_results\results.sqlite"^ - --adducts^ - --adducts-library "C:\beams\beams\data\adducts.txt"^ - --isotopes^ - --isotopes-library "C:\beams\beams\data\isotopes.txt"^ - --ion-mode pos^ - --ppm 5.0 - -beamspy annotate-mf^ - --peaklist "C:\beams\tests\test_data\variableMetadata.txt"^ - --intensity-matrix "C:\beams\tests\test_data\dataMatrix.txt"^ - --db "C:\beams\tests\test_results\results.sqlite"^ - --adducts-library "C:\beams\beams\data\adducts.txt"^ - --ion-mode pos^ - --ppm 3.0^ - --max-mz 700.0 - -beamspy annotate-compounds^ - --peaklist "C:\beams\tests\test_data\variableMetadata.txt"^ - --intensity-matrix "C:\beams\tests\test_data\dataMatrix.txt"^ - --db "C:\beams\tests\test_results\results.sqlite"^ - --db-name hmdb_full_v4_0_v1^ - --adducts-library "C:\beams\beams\data\adducts.txt"^ - --ion-mode pos^ - --ppm 3.0 - -beamspy summary-results^ - --peaklist "C:\beams\tests\test_data\variableMetadata.txt"^ - --intensity-matrix "C:\beams\tests\test_data\dataMatrix.txt"^ - --db "C:\beams\tests\test_results\results.sqlite"^ - --output "C:\beams\tests\test_results\summary.txt"^ - --sep tab diff --git a/examples/run.py b/examples/run.py index cc9982d..4e5c9ca 100644 --- a/examples/run.py +++ b/examples/run.py @@ -8,7 +8,6 @@ from beamspy.annotation import annotate_adducts from beamspy.annotation import annotate_isotopes from beamspy.annotation import annotate_oligomers -from beamspy.annotation import annotate_multiple_charged_ions from beamspy.annotation import annotate_compounds from beamspy.annotation import annotate_molecular_formulae from beamspy.annotation import summary @@ -18,14 +17,13 @@ def main(): path = "../tests/test_data/" - fn_peaklist = os.path.join(path, "variableMetadata.txt") - fn_matrix = os.path.join(path, "dataMatrix.txt") + fn_peaklist = os.path.join(path, "peaklist_lcms_pos_theoretical.txt") + fn_matrix = os.path.join(path, "dataMatrix_lcms_theoretical.txt") df = in_out.combine_peaklist_matrix(fn_peaklist, fn_matrix) ion_mode = "pos" - - db_out = "results_{}.sqlite".format(ion_mode) + db_out = "results.sqlite".format(ion_mode) graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson") @@ -35,8 +33,6 @@ def main(): path = "../beamspy/data" lib_isotopes = in_out.read_isotopes(os.path.join(path, "isotopes.txt"), ion_mode) lib_adducts = in_out.read_adducts(os.path.join(path, "adducts.txt"), ion_mode) - lib_multiple_charged_ions = in_out.read_multiple_charged_ions(os.path.join(path, "multiple_charged_ions.txt"), ion_mode) - lib_mass_differences = in_out.read_mass_differences(os.path.join(path, "multiple_charged_differences.txt"), ion_mode) print(lib_isotopes) print(lib_adducts) @@ -45,17 +41,15 @@ def main(): annotate_adducts(graphs, db_out, ppm, lib_adducts) annotate_isotopes(graphs, db_out, ppm, lib_isotopes) - annotate_oligomers(graphs, db_out, ppm, lib_adducts) - annotate_multiple_charged_ions(graphs, db_out, ppm, lib_multiple_charged_ions) # annotate_molecular_formulae(df, lib_adducts, ppm, db_out) - annotate_compounds(df, lib_adducts, ppm, db_out, "lipidmaps_full_20181217_v1") + annotate_compounds(df, lib_adducts, ppm, db_out, "hmdb_full_v4_0_20200909_v1") df_out = summary(df, db_out) - fn_out = "summary_{}.txt".format(ion_mode) + fn_out = "summary.txt" df_out.to_csv(fn_out, sep="\t", index=False, encoding="utf-8") - pdf_out = "report_{}.pdf".format(ion_mode) + pdf_out = "report.pdf" plots.report(db=db_out, pdf_out=pdf_out, column_corr="r_value", column_pvalue="p_value", column_ppm_error="ppm_error", column_adducts="adduct") diff --git a/examples/run.sh b/examples/run.sh index d816d39..45bfcf1 100755 --- a/examples/run.sh +++ b/examples/run.sh @@ -1,20 +1,20 @@ #!/bin/bash beamspy group-features \ ---peaklist ../tests/test_data/variableMetadata.txt \ ---intensity-matrix ../tests/test_data/dataMatrix.txt \ ---gml ../tests/test_results/graph.gml \ ---db ../tests/test_results/results.sqlite \ +--peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ +--intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ +--gml graph.gml \ +--db results.sqlite \ --max-rt-diff 5.0 \ --method pearson \ --coeff-threshold 0.7 \ --pvalue-threshold 0.01 beamspy annotate-peak-patterns \ ---peaklist ../tests/test_data/variableMetadata.txt \ ---intensity-matrix ../tests/test_data/dataMatrix.txt \ ---gml ../tests/test_results/graph.gml \ ---db ../tests/test_results/results.sqlite \ +--peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ +--intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ +--gml graph.gml \ +--db results.sqlite \ --adducts \ --adducts-library ../beamspy/data/adducts.txt \ --isotopes \ @@ -22,26 +22,18 @@ beamspy annotate-peak-patterns \ --ion-mode pos \ --ppm 5.0 -beamspy annotate-mf \ ---peaklist ../tests/test_data/variableMetadata.txt \ ---intensity-matrix ../tests/test_data/dataMatrix.txt \ ---db ../tests/test_results/results.sqlite \ ---adducts-library ../beamspy/data/adducts.txt \ ---ion-mode pos \ ---ppm 3.0 - beamspy annotate-compounds \ ---peaklist ../tests/test_data/variableMetadata.txt \ ---intensity-matrix ../tests/test_data/dataMatrix.txt \ ---db ../tests/test_results/results.sqlite \ ---db-name hmdb_full_v4_0_v1 \ +--peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ +--intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ +--db results.sqlite \ +--db-name hmdb_full_v4_0_20200909_v1 \ --adducts-library ../beamspy/data/adducts.txt \ --ion-mode pos \ --ppm 3.0 beamspy summary-results \ ---peaklist ../tests/test_data/variableMetadata.txt \ ---intensity-matrix ../tests/test_data/dataMatrix.txt \ ---db ../tests/test_results/results.sqlite \ ---output ../tests/test_results/summary.txt \ +--peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ +--intensity-matrix ../tests/test_data/dataMatrix_lcms_theoretical.txt \ +--db results.sqlite \ +--output summary.txt \ --sep tab diff --git a/requirements.txt b/requirements.txt index 8aeb57c..3e5b242 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -numpy==1.17.1 -scipy==1.3.1 -requests==2.22.0 -networkx==2.4 -pandas==0.25.0 -matplotlib==3.1.1 -seaborn==0.9.0 -pyteomics==4.1.2 -biopython==1.74 +numpy +scipy +requests +networkx<=2.5 +pandas +matplotlib +seaborn +pyteomics<=4.4.1 +biopython<=1.78 pyside2==5.13.1 tqdm diff --git a/setup.py b/setup.py index e8de525..eea973e 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,9 @@ def main(): setuptools.setup(name="beamspy", version=beamspy.__version__, - description="Python package to annotate LC-MS and DIMS data", + description="Putative annotation of metabolites for mass spectrometry-based metabolomics datasets.", long_description=open("README.rst").read(), + long_description_content_type="text/x-rst", author="Ralf Weber", author_email="r.j.weber@bham.ac.uk", url="https://github.com/computational-metabolomics/beamspy", @@ -27,6 +28,7 @@ def main(): classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Chemistry", "Topic :: Utilities", @@ -42,4 +44,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/test_annotation.py b/tests/test_annotation.py index bdba1d3..c95b8c1 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -22,7 +22,7 @@ def setUp(self): self.lib_isotopes = read_isotopes(os.path.join(self.path, "beamspy", "data", "isotopes.txt"), "pos") self.lib_adducts = read_adducts(os.path.join(self.path, "beamspy", "data", "adducts.txt"), "pos") - self.lib_multiple_charged_ions = read_multiple_charged_ions(os.path.join(self.path, "beamspy", "data", "multiple_charged_ions.txt"), "pos") + # self.lib_multiple_charged_ions = read_multiple_charged_ions(os.path.join(self.path, "beamspy", "data", "multiple_charged_ions.txt"), "pos") # lib_mass_differences = read_mass_differences(os.path.join(self.path, "beamspy", "data", "multiple_charged_differences.txt"), "pos") self.db_results = "results_annotation.sqlite" @@ -31,56 +31,197 @@ def setUp(self): self.ppm = 2.0 - #def tearDown(self): - # os.remove(to_test_results("hmdb_full_v4_0_v1.sqlite")) + self.db_name = "hmdb_full_v4_0_20200909_v1" + + def test_dbs(self): + + path_dbs = os.path.join(self.path, "beamspy", "data", "databases") + df_dbs = pd.read_csv(os.path.join(path_dbs, "databases.txt"), sep="\t") + d = {} + for index, row in df_dbs.iterrows(): + + with gzip.GzipFile(os.path.join(path_dbs, row["database_name"] + ".sql.gz"), mode='rb') as db_dump: + + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + cursor.executescript(db_dump.read().decode('utf-8')) + conn.commit() + + cursor.execute("SELECT COUNT(*) FROM {}".format(row["database_name"])) + d[row["database_name"]] = int(cursor.fetchone()[0]) + + self.assertEqual(d["biocyc_chlamycyc_20180702_v1"], 1125) + self.assertEqual(d["chebi_complete_rel195_v1"], 124527) + self.assertEqual(d["chebi_complete_3star_rel195_v1"], 49820) + self.assertEqual(d["hmdb_urine_v4_0_20200910_v1"], 4364) + self.assertEqual(d["hmdb_serum_v4_0_20200910_v1"], 25411) + self.assertEqual(d["hmdb_csf_v4_0_20200910_v1"], 445) + self.assertEqual(d["hmdb_saliva_v4_0_20200910_v1"], 1245) + self.assertEqual(d["hmdb_feces_v4_0_20200910_v1"], 6810) + self.assertEqual(d["hmdb_sweat_v4_0_20200910_v1"], 91) + self.assertEqual(d["hmdb_full_v4_0_20200909_v1"], 114222) + self.assertEqual(d["kegg_dpx_20210111_v1"], 2641) + self.assertEqual(d["kegg_hsa_20210111_v1"], 3047) + self.assertEqual(d["kegg_full_20210111_v1"], 18749) + self.assertEqual(d["lipidmaps_fattyacyls_20201001_v1"], 9823) + self.assertEqual(d["lipidmaps_glycerolipids_20201001_v1"], 7642) + self.assertEqual(d["lipidmaps_slycerophospholipids_20201001_v1"], 9915) + self.assertEqual(d["lipidmaps_solyketides_20201001_v1"], 6951) + self.assertEqual(d["lipidmaps_srenollipids_20201001_v1"], 1420) + self.assertEqual(d["lipidmaps_sacccharolipids_20201001_v1"], 1326) + self.assertEqual(d["lipidmaps_sphingolipids_20201001_v1"], 4402) + self.assertEqual(d["lipidmaps_sterollipids_20201001_v1"], 2949) + self.assertEqual(d["lipidmaps_full_20201001_v1"], 44428) + + def test_neutral_losses(self): + + df_nls = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical_nls.txt"), + to_test_data("dataMatrix_lcms_theoretical_nls.txt")) + + db_nls = "results_annotation_nls.sqlite" + + lib_nls = read_neutral_losses(os.path.join(self.path, "beamspy", "data", "neutral_losses.txt")) + annotate_isotopes(df_nls, to_test_results(db_nls), self.ppm, self.lib_isotopes) + annotate_neutral_losses(df_nls, to_test_results(db_nls), self.ppm, lib_nls) + annotate_adducts(df_nls, to_test_results(db_nls), self.ppm, self.lib_adducts) + + self.assertSequenceEqual(sqlite_records(to_test_results(db_nls), "neutral_losses"), + sqlite_records(to_test_data(db_nls), "neutral_losses")) + + path_hmdb_sql_gz = os.path.join(os.getcwd(), "beamspy", "data", "databases", self.db_name + ".sql.gz") + path_hmdb_sqlite = to_test_results("{}.sqlite".format(self.db_name)) + + if os.path.isfile(path_hmdb_sqlite): + os.remove(path_hmdb_sqlite) + + with gzip.GzipFile(path_hmdb_sql_gz, mode='rb') as db_dump: + conn = sqlite3.connect(path_hmdb_sqlite) + cursor = conn.cursor() + cursor.executescript(db_dump.read().decode('utf-8')) + conn.commit() + conn.close() + + annotate_compounds(df_nls, self.lib_adducts, self.ppm, to_test_results(db_nls), self.db_name, + patterns=True, db_in=path_hmdb_sqlite) + + #l_01 = sorted(sqlite_records(to_test_data(db_nls), "compounds_{}".format(self.db_name)), key=lambda x: (x[0], x[-1])) + #l_02 = sorted(sqlite_records(to_test_results(db_nls), "compounds_{}".format(self.db_name)), key=lambda x: (x[0], x[-1])) + #self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(db_nls), "compounds_{}".format(self.db_name)), + sqlite_records(to_test_data(db_nls), "compounds_{}".format(self.db_name))) + self.assertEqual(sqlite_count(to_test_results(db_nls), "compounds_{}".format(self.db_name)), 26) + + annotate_molecular_formulae(df_nls, self.lib_adducts, self.ppm, to_test_results(db_nls), + patterns=True, rules=True) + + # l_01 = sorted(sqlite_records(to_test_data(db_nls), "molecular_formulae"), key=lambda x: (x[0], x[-1])) + # l_02 = sorted(sqlite_records(to_test_results(db_nls), "molecular_formulae"), key=lambda x: (x[0], x[-1])) + # self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(db_nls), "molecular_formulae"), + sqlite_records(to_test_data(db_nls), "molecular_formulae")) + self.assertEqual(sqlite_count(to_test_results(db_nls), "molecular_formulae"), 15) + + def test_annotate_multiple_charged_adducts(self): + df = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical_mc_o.txt"), + to_test_data("dataMatrix_lcms_theoretical_mc_o.txt")) + + lib_adducts = read_adducts(os.path.join(self.path, "beamspy", "data", "multiple_charged_ions.txt"), "pos") + db_mc = "results_annotation_mc_o.sqlite" + + annotate_adducts(df, to_test_results(db_mc), self.ppm, lib_adducts) + self.assertSequenceEqual(sqlite_records(to_test_results(db_mc), "adduct_pairs"), + sqlite_records(to_test_data(db_mc), "adduct_pairs")) + + annotate_isotopes(df, to_test_results(db_mc), self.ppm, self.lib_isotopes) + self.assertSequenceEqual(sqlite_records(to_test_results(db_mc), "isotopes"), + sqlite_records(to_test_data(db_mc), "isotopes")) + self.assertEqual(sqlite_count(to_test_results(db_mc), "isotopes"), 2) + + annotate_oligomers(df, to_test_results(db_mc), self.ppm, lib_adducts) + self.assertSequenceEqual(sqlite_records(to_test_results(db_mc), "oligomers"), + sqlite_records(to_test_data(db_mc), "oligomers")) + self.assertEqual(sqlite_count(to_test_results(db_mc), "oligomers"), 4) + + path_hmdb_sql_gz = os.path.join(os.getcwd(), "beamspy", "data", "databases", self.db_name + ".sql.gz") + path_hmdb_sqlite = to_test_results("{}.sqlite".format(self.db_name)) + + if os.path.isfile(path_hmdb_sqlite): + os.remove(path_hmdb_sqlite) + + with gzip.GzipFile(path_hmdb_sql_gz, mode='rb') as db_dump: + conn = sqlite3.connect(path_hmdb_sqlite) + cursor = conn.cursor() + cursor.executescript(db_dump.read().decode('utf-8')) + conn.commit() + conn.close() + + annotate_compounds(df, lib_adducts, self.ppm, to_test_results(db_mc), self.db_name, + patterns=True, db_in=path_hmdb_sqlite) + + # l_01 = sorted(sqlite_records(to_test_data(db_mc), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(db_mc), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(db_mc), "compounds_{}".format(self.db_name)), + sqlite_records(to_test_data(db_mc), "compounds_{}".format(self.db_name))) + self.assertEqual(sqlite_count(to_test_results(db_mc), "compounds_{}".format(self.db_name)), 40) + + annotate_molecular_formulae(df, lib_adducts, self.ppm, to_test_results(db_mc), + patterns=True, rules=True) + + # l_01 = sorted(sqlite_records(to_test_data(db_mc), "molecular_formulae"), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(db_mc), "molecular_formulae"), key = lambda x: x[0]) + # self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(db_mc), "molecular_formulae"), + sqlite_records(to_test_data(db_mc), "molecular_formulae")) + self.assertEqual(sqlite_count(to_test_results(db_mc), "molecular_formulae"), 2187) def test_annotate_adducts(self): annotate_adducts(self.df, to_test_results(self.db_results), self.ppm, self.lib_adducts) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "adduct_pairs"), sqlite_records(to_test_data(self.db_results), "adduct_pairs")) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "adduct_pairs"), + sqlite_records(to_test_data(self.db_results), "adduct_pairs")) annotate_adducts(self.graph, to_test_results(self.db_results_graph), self.ppm, self.lib_adducts) - self.assertEqual(sqlite_records(to_test_results(self.db_results_graph), "adduct_pairs"), sqlite_records(to_test_data(self.db_results_graph), "adduct_pairs")) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results_graph), "adduct_pairs"), + sqlite_records(to_test_data(self.db_results_graph), "adduct_pairs")) def test_annotate_isotopes(self): annotate_isotopes(self.df, to_test_results(self.db_results), self.ppm, self.lib_isotopes) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "isotopes"), sqlite_records(to_test_data(self.db_results), "isotopes")) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "isotopes"), + sqlite_records(to_test_data(self.db_results), "isotopes")) self.assertEqual(sqlite_count(to_test_results(self.db_results), "isotopes"), 1) annotate_isotopes(self.graph, to_test_results(self.db_results_graph), self.ppm, self.lib_isotopes) - self.assertEqual(sqlite_records(to_test_results(self.db_results_graph), "isotopes"), sqlite_records(to_test_data(self.db_results_graph), "isotopes")) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results_graph), "isotopes"), + sqlite_records(to_test_data(self.db_results_graph), "isotopes")) self.assertEqual(sqlite_count(to_test_results(self.db_results_graph), "isotopes"), 1) def test_annotate_oligomers(self): - annotate_oligomers(self.df, to_test_results(self.db_results), self.ppm, self.lib_adducts, maximum=5) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "oligomers"), - sqlite_records(to_test_data(self.db_results), "oligomers")) + annotate_oligomers(self.df, to_test_results(self.db_results), self.ppm, self.lib_adducts) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "oligomers"), + sqlite_records(to_test_data(self.db_results), "oligomers")) self.assertEqual(sqlite_count(to_test_results(self.db_results), "oligomers"), 2) annotate_oligomers(self.graph, to_test_results(self.db_results_graph), self.ppm, self.lib_adducts) - self.assertEqual(sqlite_records(to_test_results(self.db_results_graph), "oligomers"), - sqlite_records(to_test_data(self.db_results_graph), "oligomers")) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results_graph), "oligomers"), + sqlite_records(to_test_data(self.db_results_graph), "oligomers")) self.assertEqual(sqlite_count(to_test_results(self.db_results_graph), "oligomers"), 2) - # def test_annotate_drug_products(self): - # df = pd.DataFrame({"name": pd.Series(["M152T100", "M188T100", "M310T200", "M348T200"]), - # "mz": pd.Series([152.0706054, 188.0682004, 310.1413254, 348.0972084], dtype=np.float64), - # "rt": pd.Series([100.0, 100, 0, 200.0, 200.0], dtype=np.float64), - # "intensity": pd.Series([1234.45, 2345.67, 3456.78, 4567.89], dtype=np.float64)}, - # columns=["name", "mz", "rt", "intensity"], - # index=range(0, 4)) - # smiles = ["CC(=O)NC1=CC=C(C=C1)O", "CNCCC(OC1=CC=C(C=C1)C(F)(F)F)C1=CC=CC=C1"] - # annotate_drug_products(df, to_test_results(self.db_results), smiles, self.lib_adducts, self.ppm, - # phase1_cycles=1, phase2_cycles=1) - # self.assertEqual(sqlite_records(to_test_results(self.db_results), "drug_products"), - # sqlite_records(to_test_data(self.db_results), "drug_products")) - # self.assertEqual(sqlite_count(to_test_results(self.db_results), "drug_products"), 4) - def test_annotate_compounds(self): - db_name = "hmdb_full_v4_0_v1" + path_hmdb_sql_gz = os.path.join(os.getcwd(), "beamspy", "data", "databases", self.db_name + ".sql.gz") + path_hmdb_sqlite = to_test_results("{}.sqlite".format(self.db_name)) + + annotate_adducts(self.df, to_test_results(self.db_results), self.ppm, self.lib_adducts) + annotate_isotopes(self.df, to_test_results(self.db_results), self.ppm, self.lib_isotopes) + annotate_oligomers(self.df, to_test_results(self.db_results), self.ppm, self.lib_adducts) - path_hmdb_sql_gz = os.path.join(os.getcwd(), "beamspy", "data", "databases", db_name + ".sql.gz") - path_hmdb_sqlite = to_test_results("{}.sqlite".format(db_name)) + annotate_adducts(self.df, to_test_results(self.db_results_graph), self.ppm, self.lib_adducts) + annotate_isotopes(self.df, to_test_results(self.db_results_graph), self.ppm, self.lib_isotopes) + annotate_oligomers(self.df, to_test_results(self.db_results_graph), self.ppm, self.lib_adducts) if os.path.isfile(path_hmdb_sqlite): os.remove(path_hmdb_sqlite) @@ -92,34 +233,99 @@ def test_annotate_compounds(self): conn.commit() conn.close() - # sqlite profile provided - annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), db_name, path_hmdb_sqlite) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(db_name)), - sqlite_records(to_test_data(self.db_results), "compounds_{}".format(db_name))) - self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(db_name)), 57) + # sqlite file provided + annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), self.db_name, + patterns=True, db_in=path_hmdb_sqlite) + + # l_01 = sorted(sqlite_records(to_test_data(self.db_results), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), + sqlite_records(to_test_data(self.db_results), "compounds_{}".format(self.db_name))) + self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), 50) # internal sqlite databases - annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), db_name) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(db_name)), - sqlite_records(to_test_data(self.db_results), "compounds_{}".format(db_name))) - self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(db_name)), 57) + annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), self.db_name, + patterns=True, db_in="") + + # l_01 = sorted(sqlite_records(to_test_data(self.db_results), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # self.assertSequenceEqual(l_01, l_02) + + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), + sqlite_records(to_test_data(self.db_results), "compounds_{}".format(self.db_name))) + self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(self.db_name)), 50) + + # internal sqlite databases, including grouping + annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results_graph), self.db_name, + patterns=True, db_in="") + + # l_01 = sorted(sqlite_records(to_test_data(self.db_results_graph), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(self.db_results_graph), "compounds_{}".format(self.db_name)), key = lambda x: x[0]) + # self.assertSequenceEqual(l_01, l_02) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results_graph), "compounds_{}".format(self.db_name)), + sqlite_records(to_test_data(self.db_results_graph), "compounds_{}".format(self.db_name))) + self.assertEqual(sqlite_count(to_test_results(self.db_results_graph), "compounds_{}".format(self.db_name)), 50) + + # internal sqlite databases (excl. patterns) + db_results_excl_patterns = self.db_results.replace(".sqlite", "_excl_pattern.sqlite") + annotate_adducts(self.df, to_test_results(db_results_excl_patterns), self.ppm, self.lib_adducts) + annotate_isotopes(self.df, to_test_results(db_results_excl_patterns), self.ppm, self.lib_isotopes) + + annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(db_results_excl_patterns), self.db_name, + patterns=False, db_in="") + self.assertEqual(sqlite_count(to_test_results(db_results_excl_patterns), "compounds_{}".format(self.db_name)), 56) + + # text file provided path_db_txt = os.path.join(os.getcwd(), "beamspy", "data", "db_compounds.txt") - annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), "test", path_db_txt) - #self.assertEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(db_name)), sqlite_records(to_test_data(self.db_results), "compounds_{}".format(db_name))) - self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(db_name)), 57) + db_name = "test" + annotate_compounds(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), db_name, + patterns=True, db_in=path_db_txt) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "compounds_{}".format(db_name)), + sqlite_records(to_test_data(self.db_results), "compounds_{}".format(db_name))) + self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(db_name)), 66) + + path_db_txt = to_test_results("db_compounds_rt.txt") + db_name = "test_rt" + with open(path_db_txt, "w") as out: + out.write("compound_id\tmolecular_formula\tcompound_name\tadduct\tretention_time\n") + out.write("HMDB0000263\tC3H5O6P\tPhosphoenolpyruvic acid\t[M+H]+\t118.0\n") + + annotate_compounds(self.df, self.lib_adducts, 100.0, to_test_results(self.db_results), db_name, patterns=True, + db_in=path_db_txt, rt_tol=5.0) + self.assertEqual(sqlite_count(to_test_results(self.db_results), "compounds_{}".format(db_name)), 1) def test_annotate_molecular_formulae(self): + fn_mf = os.path.join(self.path, "beamspy", "data", "db_mf.txt") - annotate_molecular_formulae(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), fn_mf) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "molecular_formulae"), sqlite_records(to_test_data(self.db_results), "molecular_formulae")) - self.assertEqual(sqlite_count(to_test_results(self.db_results), "molecular_formulae"), 16) + annotate_molecular_formulae(self.df, self.lib_adducts, self.ppm, to_test_results(self.db_results), + fn_mf, patterns=True) + + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "molecular_formulae"), + sqlite_records(to_test_data(self.db_results), "molecular_formulae")) + self.assertEqual(sqlite_count(to_test_results(self.db_results), "molecular_formulae"), 15) db_mfdb_results = "results_mfdb.sqlite" - annotate_molecular_formulae(self.df, self.lib_adducts, self.ppm, to_test_results(db_mfdb_results)) - self.assertEqual(sqlite_records(to_test_results(db_mfdb_results), "molecular_formulae"), sqlite_records(to_test_data(db_mfdb_results), "molecular_formulae")) + annotate_molecular_formulae(self.df, self.lib_adducts, self.ppm, to_test_results(db_mfdb_results), + patterns=False, rules=True) + self.assertSequenceEqual(sqlite_records(to_test_results(db_mfdb_results), "molecular_formulae"), + sqlite_records(to_test_data(db_mfdb_results), "molecular_formulae")) self.assertEqual(sqlite_count(to_test_results(db_mfdb_results), "molecular_formulae"), 586) + db_mfdb_results = "results_mfdb_excl_hrules.sqlite" + annotate_adducts(self.df, to_test_results(db_mfdb_results), self.ppm, self.lib_adducts) + annotate_isotopes(self.df, to_test_results(db_mfdb_results), self.ppm, self.lib_isotopes) + + annotate_molecular_formulae(self.df, self.lib_adducts, self.ppm, to_test_results(db_mfdb_results), + patterns=True, rules=False) + records = sqlite_records(to_test_results(db_mfdb_results), "molecular_formulae") + self.assertEqual(len(records), 3869) + self.assertSequenceEqual(records[897], + ('M493T192', 493.063765, 493.06376, 0.010140676303965665, '[M+Na]+', '(13C)', '', + 14, 23, 4, 8, 2, 1, 1, 'C14H23N4O8P2S', 1, 1, 0, 1, 5.0)) + def test_summary(self): def _assert(summary_test_data, summary_result): @@ -129,27 +335,55 @@ def _assert(summary_test_data, summary_result): lines_test_data = test_data.read().splitlines() for i in range(len(lines_results)): self.assertEqual(lines_results[i], lines_test_data[i]) - self.assertEqual(sqlite_records(to_test_results(self.db_results), "summary"), sqlite_records(to_test_data(self.db_results), "summary")) fn_summary = "summary_mr_mc.txt" - df_summary = summary(self.df, to_test_results(self.db_results), single_row=False, single_column=False, convert_rt=None, ndigits_mz=None) + df_summary = summary(self.df, to_test_results(self.db_results), single_row=False, single_column=False, + convert_rt=None, ndigits_mz=None) + + # l_01 = sorted(sqlite_records(to_test_data(self.db_results), "summary"), key = lambda x: x[0]) + # l_02 = sorted(sqlite_records(to_test_results(self.db_results), "summary"), key = lambda x: x[0]) + + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results), "summary"), + sqlite_records(to_test_data(self.db_results), "summary")) + df_summary.to_csv(to_test_results(fn_summary), sep="\t", index=False) + self.assertSequenceEqual(df_summary.shape, (117, 29)) _assert(to_test_data(fn_summary), to_test_results(fn_summary)) fn_summary = "summary_sr_mc.txt" - df_summary = summary(self.df, to_test_results(self.db_results), single_row=True, single_column=False, convert_rt=None, ndigits_mz=None) + df_summary = summary(self.df, to_test_results(self.db_results), single_row=True, single_column=False, + convert_rt=None, ndigits_mz=None) df_summary.to_csv(to_test_results(fn_summary), sep="\t", index=False) + self.assertSequenceEqual(df_summary.shape, (17, 18)) _assert(to_test_data(fn_summary), to_test_results(fn_summary)) fn_summary = "summary_sr_sc.txt" - df_summary = summary(self.df, to_test_results(self.db_results), single_row=True, single_column=True, convert_rt=None, ndigits_mz=None) + df_summary = summary(self.df, to_test_results(self.db_results), single_row=True, single_column=True, + convert_rt=None, ndigits_mz=None) df_summary.to_csv(to_test_results(fn_summary), sep="\t", index=False) + self.assertSequenceEqual(df_summary.shape, (17, 13)) _assert(to_test_data(fn_summary), to_test_results(fn_summary)) fn_summary = "summary_mr_mc_graphs.txt" - df_summary = summary(self.df, to_test_results(self.db_results_graph), single_row=False, single_column=False, convert_rt=None, ndigits_mz=None) + df_summary = summary(self.df, to_test_results(self.db_results_graph), single_row=False, single_column=False, + convert_rt=None, ndigits_mz=None) + self.assertSequenceEqual(sqlite_records(to_test_results(self.db_results_graph), "summary"), + sqlite_records(to_test_data(self.db_results_graph), "summary")) df_summary.to_csv(to_test_results(fn_summary), sep="\t", index=False) + self.assertSequenceEqual(df_summary.shape, ((50, 33))) _assert(to_test_data(fn_summary), to_test_results(fn_summary)) + fn_summary = "summary_mr_mc_nls.txt" + db_nls = "results_annotation_nls.sqlite" + df_nls = combine_peaklist_matrix(to_test_data("peaklist_lcms_pos_theoretical_nls.txt"), + to_test_data("dataMatrix_lcms_theoretical_nls.txt")) + df_summary = summary(df_nls, to_test_results(db_nls), single_row=False, single_column=False, + convert_rt=None, ndigits_mz=None) + df_summary.to_csv(to_test_results(fn_summary), sep="\t", index=False) + + self.assertSequenceEqual(df_summary.shape, ((31, 29))) + _assert(to_test_data(fn_summary), to_test_results(fn_summary)) + + if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/test_data/dataMatrix_lcms_theoretical_mc_o.txt b/tests/test_data/dataMatrix_lcms_theoretical_mc_o.txt new file mode 100644 index 0000000..9d7e203 --- /dev/null +++ b/tests/test_data/dataMatrix_lcms_theoretical_mc_o.txt @@ -0,0 +1 @@ +name sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 M127T60 1348.35 1468.5 1348.35 1441.8 1361.7 1428.45 1428.45 1468.5 1455.15 1388.4 1468.5 1468.5 1388.4 1375.05 1428.45 1348.35 1428.45 1428.45 1441.8 1428.45 1375.05 1428.45 1415.1 1468.5 1361.7 1348.35 1388.4 1415.1 1415.1 1361.7 1428.45 1401.75 1428.45 1441.8 1375.05 1468.5 1455.15 1455.15 1388.4 1348.35 1428.45 1468.5 1375.05 1375.05 1415.1 1401.75 1361.7 1361.7 1468.5 1455.15 M135T70 2606.04 2581.91 2654.3 2606.04 2654.3 2485.39 2509.52 2606.04 2606.04 2654.3 2654.3 2630.17 2557.78 2630.17 2437.13 2606.04 2630.17 2581.91 2606.04 2533.65 2581.91 2581.91 2557.78 2485.39 2485.39 2581.91 2630.17 2509.52 2606.04 2606.04 2461.26 2533.65 2485.39 2485.39 2557.78 2437.13 2630.17 2509.52 2606.04 2533.65 2533.65 2437.13 2630.17 2654.3 2630.17 2485.39 2485.39 2606.04 2461.26 2557.78 M139T80 3796.58 3907.16 3944.02 3944.02 3870.3 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.3 3759.72 3944.02 3870.3 4054.6 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.3 4054.6 3907.16 3833.44 3980.88 3722.86 3944.02 3870.3 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 M147T90 2268.06 2400.18 2268.06 2290.08 2246.04 2312.1 2422.2 2268.06 2246.04 2422.2 2422.2 2224.02 2290.08 2378.16 2400.18 2268.06 2356.14 2290.08 2246.04 2422.2 2422.2 2378.16 2378.16 2290.08 2400.18 2400.18 2334.12 2400.18 2246.04 2312.1 2422.2 2224.02 2400.18 2246.04 2356.14 2268.06 2334.12 2400.18 2290.08 2268.06 2400.18 2400.18 2268.06 2400.18 2400.18 2312.1 2268.06 2334.12 2334.12 2400.18 M155T100 2296.9 2430.7 2274.6 2319.2 2252.3 2296.9 2252.3 2453 2453 2252.3 2453 2274.6 2408.4 2319.2 2363.8 2430.7 2408.4 2252.3 2341.5 2408.4 2453 2319.2 2386.1 2408.4 2341.5 2363.8 2341.5 2296.9 2319.2 2408.4 2386.1 2363.8 2319.2 2252.3 2252.3 2341.5 2453 2274.6 2363.8 2341.5 2341.5 2252.3 2252.3 2453 2319.2 2430.7 2319.2 2430.7 2319.2 2430.7 M157T110 1363.74 1457.33 1377.11 1363.74 1417.22 1350.37 1443.96 1417.22 1457.33 1457.33 1430.59 1443.96 1377.11 1443.96 1377.11 1470.7 1403.85 1430.59 1443.96 1363.74 1417.22 1457.33 1470.7 1363.74 1363.74 1457.33 1417.22 1377.11 1403.85 1443.96 1417.22 1377.11 1417.22 1350.37 1443.96 1377.11 1350.37 1350.37 1403.85 1417.22 1377.11 1470.7 1470.7 1350.37 1350.37 1443.96 1443.96 1430.59 1377.11 1363.74 M169T120 515 515 525 550 510 505 520 515 525 545 515 525 525 530 540 520 520 510 525 520 540 510 550 520 510 520 505 525 550 530 510 530 510 525 505 510 530 525 510 550 525 545 545 515 520 520 550 520 540 505 M337T121 1030 1030 1050 1100 1020 1010 1040 1030 1050 1090 1030 1050 1050 1060 1080 1040 1040 1020 1050 1040 1080 1020 1100 1040 1020 1040 1010 1050 1100 1060 1020 1060 1020 1050 1010 1020 1060 1050 1020 1100 1050 1090 1090 1030 1040 1040 1100 1040 1080 1010 M505T122 386.25 386.25 393.75 412.5 382.5 378.75 390 386.25 393.75 408.75 386.25 393.75 393.75 397.5 405 390 390 382.5 393.75 390 405 382.5 412.5 390 382.5 390 378.75 393.75 412.5 397.5 382.5 397.5 382.5 393.75 378.75 382.5 397.5 393.75 382.5 412.5 393.75 408.75 408.75 386.25 390 390 412.5 390 405 378.75 M197T150 5160.61 5160.61 4919.46 4967.69 4919.46 5112.38 5208.84 5112.38 5160.61 5160.61 5015.92 5257.07 5112.38 5208.84 5064.15 4871.23 5305.3 5112.38 5160.61 5160.61 5112.38 4871.23 4919.46 5160.61 5015.92 5257.07 5015.92 4967.69 4967.69 5305.3 4967.69 5160.61 5208.84 4967.69 5112.38 5305.3 4967.69 5208.84 5015.92 5160.61 5208.84 4919.46 5257.07 4967.69 4871.23 5160.61 5112.38 4967.69 5257.07 5305.3 M213T160 974.65 1013.25 984.3 1061.5 1061.5 974.65 1032.55 1032.55 1051.85 1042.2 1022.9 1061.5 1003.6 984.3 1051.85 1003.6 1042.2 1013.25 984.3 1061.5 1061.5 993.95 1003.6 1042.2 1003.6 1051.85 984.3 1013.25 1051.85 984.3 1042.2 1032.55 974.65 1051.85 1013.25 984.3 993.95 1003.6 1042.2 974.65 1003.6 984.3 1032.55 1032.55 1051.85 993.95 1061.5 1042.2 984.3 1042.2 M214T181 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M214T182 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M215T170 3859.8 3786.28 3970.08 3823.04 4006.84 3823.04 3970.08 4006.84 3859.8 3896.56 3749.52 3749.52 3786.28 3970.08 3823.04 3859.8 3970.08 3933.32 3970.08 3823.04 3970.08 3896.56 3823.04 3712.76 4006.84 3712.76 3970.08 3823.04 4006.84 3896.56 3823.04 3896.56 3970.08 3786.28 3712.76 3712.76 3970.08 3859.8 3933.32 3786.28 3786.28 3786.28 3970.08 3712.76 3749.52 3712.76 4006.84 4043.6 3859.8 3933.32 M225T182 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M231T174 1929.9 1893.14 1985.04 1911.52 2003.42 1911.52 1985.04 2003.42 1929.9 1948.28 1874.76 1874.76 1893.14 1985.04 1911.52 1929.9 1985.04 1966.66 1985.04 1911.52 1985.04 1948.28 1911.52 1856.38 2003.42 1856.38 1985.04 1911.52 2003.42 1948.28 1911.52 1948.28 1985.04 1893.14 1856.38 1856.38 1985.04 1929.9 1966.66 1893.14 1893.14 1893.14 1985.04 1856.38 1874.76 1856.38 2003.42 2021.8 1929.9 1966.66 M426T180 1144.5 1155.4 1199 1122.7 1155.4 1133.6 1122.7 1188.1 1111.8 1144.5 1133.6 1133.6 1166.3 1166.3 1144.5 1166.3 1122.7 1166.3 1155.4 1177.2 1155.4 1144.5 1199 1177.2 1100.9 1133.6 1177.2 1144.5 1177.2 1177.2 1122.7 1100.9 1177.2 1177.2 1166.3 1111.8 1177.2 1199 1144.5 1122.7 1100.9 1111.8 1122.7 1188.1 1144.5 1111.8 1144.5 1199 1177.2 1155.4 M492T190 1080 1100 1090 1060 1100 1070 1090 1030 1080 1050 1070 1060 1040 1100 1080 1100 1100 1090 1020 1060 1050 1080 1020 1060 1100 1040 1040 1030 1020 1030 1060 1010 1100 1030 1090 1090 1060 1050 1060 1060 1090 1060 1080 1080 1080 1030 1070 1040 1060 1060 M493T192 166.41 169.49 167.95 163.33 169.49 164.87 167.95 158.71 166.41 161.79 164.87 163.33 160.25 169.49 166.41 169.49 169.49 167.95 157.16 163.33 161.79 166.41 157.16 163.33 169.49 160.25 160.25 158.71 157.16 158.71 163.33 155.62 169.49 158.71 167.95 167.95 163.33 161.79 163.33 163.33 167.95 163.33 166.41 166.41 166.41 158.71 164.87 160.25 163.33 163.33 M550T200 4766.3 4506.32 4419.66 4549.65 4722.97 4376.33 4462.99 4419.66 4549.65 4636.31 4376.33 4679.64 4722.97 4376.33 4549.65 4679.64 4376.33 4722.97 4679.64 4462.99 4766.3 4766.3 4766.3 4419.66 4506.32 4636.31 4419.66 4766.3 4419.66 4592.98 4506.32 4766.3 4462.99 4419.66 4549.65 4766.3 4679.64 4419.66 4766.3 4549.65 4592.98 4376.33 4419.66 4419.66 4592.98 4549.65 4766.3 4636.31 4636.31 4462.99 \ No newline at end of file diff --git a/tests/test_data/dataMatrix_lcms_theoretical_nls.txt b/tests/test_data/dataMatrix_lcms_theoretical_nls.txt new file mode 100644 index 0000000..17f92ec --- /dev/null +++ b/tests/test_data/dataMatrix_lcms_theoretical_nls.txt @@ -0,0 +1 @@ +name sample01 sample02 sample03 sample04 sample05 sample06 sample07 sample08 sample09 sample10 sample11 sample12 sample13 sample14 sample15 sample16 sample17 sample18 sample19 sample20 sample21 sample22 sample23 sample24 sample25 sample26 sample27 sample28 sample29 sample30 sample31 sample32 sample33 sample34 sample35 sample36 sample37 sample38 sample39 sample40 sample41 sample42 sample43 sample44 sample45 sample46 sample47 sample48 sample49 sample50 M117T80 3451.436364 3551.963636 3585.472727 3585.472727 3518.454545 3451.436364 3484.945455 3551.963636 3652.490909 3618.981818 3551.963636 3585.472727 3585.472727 3451.436364 3618.981818 3417.927273 3518.454545 3417.927273 3585.472727 3518.454545 3686 3652.490909 3451.436364 3484.945455 3652.490909 3484.945455 3417.927273 3518.454545 3686 3551.963636 3484.945455 3618.981818 3384.418182 3585.472727 3518.454545 3585.472727 3417.927273 3551.963636 3451.436364 3585.472727 3585.472727 3618.981818 3417.927273 3384.418182 3484.945455 3451.436364 3551.963636 3451.436364 3451.436364 3484.945455 M121T80 3416.922 3516.444 3549.618 3549.618 3483.27 3416.922 3450.096 3516.444 3615.966 3582.792 3516.444 3549.618 3549.618 3416.922 3582.792 3383.748 3483.27 3383.748 3549.618 3483.27 3649.14 3615.966 3416.922 3450.096 3615.966 3450.096 3383.748 3483.27 3649.14 3516.444 3450.096 3582.792 3350.574 3549.618 3483.27 3549.618 3383.748 3516.444 3416.922 3549.618 3549.618 3582.792 3383.748 3350.574 3450.096 3416.922 3516.444 3416.922 3416.922 3450.096 M122T80 136.67688 140.65776 141.98472 141.98472 139.3308 136.67688 138.00384 140.65776 144.63864 143.31168 140.65776 141.98472 141.98472 136.67688 143.31168 135.34992 139.3308 135.34992 141.98472 139.3308 145.9656 144.63864 136.67688 138.00384 144.63864 138.00384 135.34992 139.3308 145.9656 140.65776 138.00384 143.31168 134.02296 141.98472 139.3308 141.98472 135.34992 140.65776 136.67688 141.98472 141.98472 143.31168 135.34992 134.02296 138.00384 136.67688 140.65776 136.67688 136.67688 138.00384 M139T80 3796.58 3907.16 3944.02 3944.02 3870.3 3796.58 3833.44 3907.16 4017.74 3980.88 3907.16 3944.02 3944.02 3796.58 3980.88 3759.72 3870.3 3759.72 3944.02 3870.3 4054.6 4017.74 3796.58 3833.44 4017.74 3833.44 3759.72 3870.3 4054.6 3907.16 3833.44 3980.88 3722.86 3944.02 3870.3 3944.02 3759.72 3907.16 3796.58 3944.02 3944.02 3980.88 3759.72 3722.86 3833.44 3796.58 3907.16 3796.58 3796.58 3833.44 M140T80 151.8632 156.2864 157.7608 157.7608 154.812 151.8632 153.3376 156.2864 160.7096 159.2352 156.2864 157.7608 157.7608 151.8632 159.2352 150.3888 154.812 150.3888 157.7608 154.812 162.184 160.7096 151.8632 153.3376 160.7096 153.3376 150.3888 154.812 162.184 156.2864 153.3376 159.2352 148.9144 157.7608 154.812 157.7608 150.3888 156.2864 151.8632 157.7608 157.7608 159.2352 150.3888 148.9144 153.3376 151.8632 156.2864 151.8632 151.8632 153.3376 M157T80 2657.606 2735.012 2760.814 2760.814 2709.21 2657.606 2683.408 2735.012 2812.418 2786.616 2735.012 2760.814 2760.814 2657.606 2786.616 2631.804 2709.21 2631.804 2760.814 2709.21 2838.22 2812.418 2657.606 2683.408 2812.418 2683.408 2631.804 2709.21 2838.22 2735.012 2683.408 2786.616 2606.002 2760.814 2709.21 2760.814 2631.804 2735.012 2657.606 2760.814 2760.814 2786.616 2631.804 2606.002 2683.408 2657.606 2735.012 2657.606 2657.606 2683.408 M158T80 106.30424 109.40048 110.43256 110.43256 108.3684 106.30424 107.33632 109.40048 112.49672 111.46464 109.40048 110.43256 110.43256 106.30424 111.46464 105.27216 108.3684 105.27216 110.43256 108.3684 113.5288 112.49672 106.30424 107.33632 112.49672 107.33632 105.27216 108.3684 113.5288 109.40048 107.33632 111.46464 104.24008 110.43256 108.3684 110.43256 105.27216 109.40048 106.30424 110.43256 110.43256 111.46464 105.27216 104.24008 107.33632 106.30424 109.40048 106.30424 106.30424 107.33632 \ No newline at end of file diff --git a/tests/test_data/peaklist_lcms_pos_theoretical_mc_o.txt b/tests/test_data/peaklist_lcms_pos_theoretical_mc_o.txt new file mode 100644 index 0000000..bca2df1 --- /dev/null +++ b/tests/test_data/peaklist_lcms_pos_theoretical_mc_o.txt @@ -0,0 +1 @@ +name mz rt intensity M127T60 126.979204 60 1421.78 M135T70 135.028801 70 2581.91 M139T80 139.000181 80 3870.30 M147T90 147.028801 90 2334.12 M155T100 154.995096 100 2341.50 M157T110 156.989769 110 1417.22 M169T120 168.989654 120 520.00 M337T121 336.972032 121 1040.00 M505T122 504.954410 122.5 390.00 M197T150 197.005661 150 5112.38 M213T160 213.000576 160 1018.08 M214T181 213.529763 181 530.00 M214T182 214.031441 182 81.50 M215T170 215.016226 170 3859.8 M225T182 224.520736 182 300.00 M231T174 230.990164 173.5 1929.90 M426T180 426.052250 180 1149.95 M492T190 492.060410 190 1060.00 M493T192 493.063765 192.5 163.33 M550T200 550.065890 200 4549.65 \ No newline at end of file diff --git a/tests/test_data/peaklist_lcms_pos_theoretical_nls.txt b/tests/test_data/peaklist_lcms_pos_theoretical_nls.txt new file mode 100644 index 0000000..17a3246 --- /dev/null +++ b/tests/test_data/peaklist_lcms_pos_theoretical_nls.txt @@ -0,0 +1 @@ +name mz rt intensity M117T80 117.018236 80 1000 M121T80 120.989616 80 100 M122T80 121.992971 80 101 M139T80 139.000181 80 3870.3 M140T80 140.003536 80 387.03 M157T80 157.010746 80 2715.92 M158T80 158.014101 80 271.592 \ No newline at end of file diff --git a/tests/test_data/results_annotation.sqlite b/tests/test_data/results_annotation.sqlite index e61814b..96972c6 100644 Binary files a/tests/test_data/results_annotation.sqlite and b/tests/test_data/results_annotation.sqlite differ diff --git a/tests/test_data/results_annotation_excl_pattern.sqlite b/tests/test_data/results_annotation_excl_pattern.sqlite new file mode 100644 index 0000000..93bbdc5 Binary files /dev/null and b/tests/test_data/results_annotation_excl_pattern.sqlite differ diff --git a/tests/test_data/results_annotation_graph.sqlite b/tests/test_data/results_annotation_graph.sqlite index 057a996..e0a9d88 100644 Binary files a/tests/test_data/results_annotation_graph.sqlite and b/tests/test_data/results_annotation_graph.sqlite differ diff --git a/tests/test_data/results_annotation_mc_o.sqlite b/tests/test_data/results_annotation_mc_o.sqlite new file mode 100644 index 0000000..f554ec6 Binary files /dev/null and b/tests/test_data/results_annotation_mc_o.sqlite differ diff --git a/tests/test_data/results_annotation_nls.sqlite b/tests/test_data/results_annotation_nls.sqlite new file mode 100644 index 0000000..de10e33 Binary files /dev/null and b/tests/test_data/results_annotation_nls.sqlite differ diff --git a/tests/test_data/results_mfdb.sqlite b/tests/test_data/results_mfdb.sqlite index 1e5e221..4b1ecbf 100644 Binary files a/tests/test_data/results_mfdb.sqlite and b/tests/test_data/results_mfdb.sqlite differ diff --git a/tests/test_data/results_mfdb_excl_hrules.sqlite b/tests/test_data/results_mfdb_excl_hrules.sqlite new file mode 100644 index 0000000..0b1013b Binary files /dev/null and b/tests/test_data/results_mfdb_excl_hrules.sqlite differ diff --git a/tests/test_data/summary_mr_mc.txt b/tests/test_data/summary_mr_mc.txt index 7482a4d..885353a 100644 --- a/tests/test_data/summary_mr_mc.txt +++ b/tests/test_data/summary_mr_mc.txt @@ -1,135 +1,118 @@ -name mz rt intensity label charge oligomer isotope_labels_a isotope_ids isotope_labels_b atoms exact_mass ppm_error adduct C H N O P S molecular_formula compound_name compound_id -M127T60 126.979204 60.0 1421.775 126.97908600000001 -0.9292868904907359 [M+H]+ 1.0 3.0 0.0 5.0 1.0 0.0 CH3O5P Formyl phosphate 1969 -M127T60 126.979204 60.0 1421.775 126.97908600000001 -0.9292868904907359 [M+H]+ 1.0 3.0 0.0 5.0 1.0 0.0 CH3O5P Foscarnet HMDB0014670 -M127T60 126.979204 60.0 1421.775 126.97908600000001 -0.9292868904907359 [M+H]+ 1.0 3.0 0.0 5.0 1.0 0.0 CH3O5P Phosphonoformate 4022 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 2-hydroxyacrylic Acid HMDB0062676 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 3-Hydroxypropenoate 8947 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 3-Oxopropanoate 721 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 Glucosereductone HMDB0040261 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 Malonic semialdehyde HMDB0011111 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 Pyruvate 578 -M127T60 126.979204 60.0 1421.775 126.97920199999999 -0.01575061094777821 [M+K]+ 3.0 4.0 0.0 3.0 0.0 0.0 C3H4O3 Pyruvic acid HMDB0000243 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 3-Dehydro-L-threonate 2300 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 D-Malic acid HMDB0031518 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 L-Malic acid HMDB0000156 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 Malate 1018 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 Malic acid HMDB0000744 -M135T70 135.028801 70.0 2581.91 135.028799 -0.01481165506737926 [M+H]+ 4.0 6.0 0.0 5.0 0.0 0.0 C4H6O5 Velcorin HMDB0032872 -M139T80 139.000181 80.0 3870.3 139.00018 -0.007194235269876936 [M+Na]+ 4.0 4.0 0.0 4.0 0.0 0.0 C4H4O4 Formylpyruvate 1832 -M139T80 139.000181 80.0 3870.3 139.00018 -0.007194235269876936 [M+Na]+ 4.0 4.0 0.0 4.0 0.0 0.0 C4H4O4 Fumarate 652 -M139T80 139.000181 80.0 3870.3 139.00018 -0.007194235269876936 [M+Na]+ 4.0 4.0 0.0 4.0 0.0 0.0 C4H4O4 Fumaric acid HMDB0000134 -M139T80 139.000181 80.0 3870.3 139.00018 -0.007194235269876936 [M+Na]+ 4.0 4.0 0.0 4.0 0.0 0.0 C4H4O4 Maleic acid 1414 -M139T80 139.000181 80.0 3870.3 139.00018 -0.007194235269876936 [M+Na]+ 4.0 4.0 0.0 4.0 0.0 0.0 C4H4O4 Maleic acid HMDB0000176 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 2-Oxoglutarate 582 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 3-Oxoglutaric acid HMDB0013701 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 D-erythro-Ascorbate 239 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 Dehydro-D-arabinono-1,4-lactone 3919 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 Methyloxaloacetate 3778 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 Oxaloacetate 4-methyl ester 2636 -M147T90 147.028801 90.0 2334.12 147.028799 -0.013602777201155575 [M+H]+ 5.0 6.0 0.0 5.0 0.0 0.0 C5H6O5 Oxoglutaric acid HMDB0000208 -M155T100 154.995096 100.0 2341.5 154.995094 -0.012903634194708677 [M+Na]+ 4.0 4.0 0.0 5.0 0.0 0.0 C4H4O5 2-Hydroxyethylenedicarboxylate 2770 -M155T100 154.995096 100.0 2341.5 154.995094 -0.012903634194708677 [M+Na]+ 4.0 4.0 0.0 5.0 0.0 0.0 C4H4O5 Oxalacetic acid HMDB0000223 -M155T100 154.995096 100.0 2341.5 154.995094 -0.012903634194708677 [M+Na]+ 4.0 4.0 0.0 5.0 0.0 0.0 C4H4O5 Oxaloacetate 589 -M155T100 154.995096 100.0 2341.5 154.995094 -0.012903634194708677 [M+Na]+ 4.0 4.0 0.0 5.0 0.0 0.0 C4H4O5 enol-oxaloacetate 19221 -M155T100 154.995096 100.0 2341.5 154.995094 -0.012903634194708677 [M+Na]+ 4.0 4.0 0.0 5.0 0.0 0.0 C4H4O5 trans-2,3-Epoxysuccinate 2551 -M157T110 156.989769 110.0 1417.22 156.98965099999998 -0.7516419029093673 [M+H]+ 2.0 5.0 0.0 6.0 1.0 0.0 C2H5O6P 2-Phosphoglycolate 1183 -M157T110 156.989769 110.0 1417.22 156.98965099999998 -0.7516419029093673 [M+H]+ 2.0 5.0 0.0 6.0 1.0 0.0 C2H5O6P Phosphoglycolic acid HMDB0000816 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 3-methoxy-3-oxopropanoic acid HMDB0130020 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 4-Hydroxy-2-oxobutanoic acid HMDB0031204 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 D,L-malic semialdehyde 18220 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Erythrono-1,4-lactone 11996 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Erythrono-1,4-lactone HMDB0000349 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Methyl oxalate 7890 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Methylmalonate 1845 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Methylmalonic acid HMDB0000202 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Succinate 592 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Succinic acid HMDB0000254 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 Threonolactone HMDB0000940 -M157T110 156.989769 110.0 1417.22 156.989767 -0.012739683822516187 [M+K]+ 4.0 6.0 0.0 4.0 0.0 0.0 C4H6O4 xi-3-Hydroxy-2-oxobutanoic acid HMDB0039324 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 (E)-Glutaconate 1867 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 2,5-Dioxopentanoate 851 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 2,5-Dioxopentanoate HMDB0060365 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 2-Hydroxyglutaric acid lactone HMDB0059743 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 2-Methylmaleate 1876 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 2-Pentendioate HMDB0061185 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 4,5-Dioxopentanoate 2163 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Acetylpyruvate 1822 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Citraconic acid HMDB0000634 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Gamma-delta-Dioxovaleric acid HMDB0013233 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Glutaconic acid HMDB0000620 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Itaconate 887 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Itaconic acid HMDB0002092 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Mesaconate 1600 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Mesaconic acid HMDB0000749 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 Methyl hydrogen fumarate HMDB0033809 -M169T120 168.989654 120.0 520.0 168.989767 0.6686795419921463 [M+K]+ 5.0 6.0 0.0 4.0 0.0 0.0 C5H6O4 glutaconate 19243 -M169T120 168.989654 120.0 520.0 [M+H]+ 1.0 1.0 168.98965099999998 -0.017752566521647164 [M+H]+ 3.0 5.0 0.0 6.0 1.0 0.0 C3H5O6P 3-Phosphonopyruvate 2162 -M169T120 168.989654 120.0 520.0 [M+H]+ 1.0 1.0 168.98965099999998 -0.017752566521647164 [M+H]+ 3.0 5.0 0.0 6.0 1.0 0.0 C3H5O6P Phosphoenolpyruvate 614 -M169T120 168.989654 120.0 520.0 [M+H]+ 1.0 1.0 168.98965099999998 -0.017752566521647164 [M+H]+ 3.0 5.0 0.0 6.0 1.0 0.0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 -M337T121 336.972032 121.0 1040.0 -M337T121 336.972032 121.0 1040.0 [2M+H]+ 1.0 2.0 -M505T122 504.95441 122.5 390.0 -M505T122 504.95441 122.5 390.0 [3M+H]+ 1.0 3.0 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 Aconitate Ion 38282 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 Dehydroascorbic acid 12423 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 Dehydroascorbic acid HMDB0001264 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 Dehydroascorbide(1-) HMDB0062706 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 cis-Aconitate 843 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 cis-Aconitic acid HMDB0000072 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 trans-Aconitate 1934 -M197T150 197.005661 150.0 5112.38 197.00565899999998 -0.010151992757589745 [M+Na]+ 6.0 6.0 0.0 6.0 0.0 0.0 C6H6O6 trans-Aconitic acid HMDB0000958 -M213T160 213.000576 160.0 1018.075 213.00057400000003 -0.009389645900807648 [M+Na]+ 6.0 6.0 0.0 7.0 0.0 0.0 C6H6O7 4-Hydroxy-Aconitate Ion 41990 -M213T160 213.000576 160.0 1018.075 213.00057400000003 -0.009389645900807648 [M+Na]+ 6.0 6.0 0.0 7.0 0.0 0.0 C6H6O7 Oxalosuccinate 3402 -M213T160 213.000576 160.0 1018.075 213.00057400000003 -0.009389645900807648 [M+Na]+ 6.0 6.0 0.0 7.0 0.0 0.0 C6H6O7 Oxalosuccinic acid HMDB0003974 -M215T170 215.016226 170.0 3859.8 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Dioxo-L-gulonate 13506 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-diketo-L-gulonate 20293 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-diketogulonate HMDB0062803 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,5-Didehydro-D-gluconate 2148 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Carboxymethyloxysuccinate 2580 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Citrate 675 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Citric acid HMDB0000094 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 D-threo-Isocitric acid HMDB0001874 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Diketogulonic acid HMDB0005971 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Isocitrate 781 -M215T170 215.016226 170.0 3859.8 [M+Na]+ 1.0 1.0 215.01622400000002 -0.009301623520878015 [M+Na]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Isocitric acid HMDB0000193 -M231T174 230.990164 173.5 1929.9 230.990009 -0.6710246935683117 [M+Na]+ 9.0 4.0 0.0 6.0 0.0 0.0 C9H4O6 Stipitatonate 1792 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-Dioxo-L-gulonate 13506 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-diketo-L-gulonate 20293 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,3-diketogulonate HMDB0062803 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2,5-Didehydro-D-gluconate 2148 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Carboxymethyloxysuccinate 2580 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Citrate 675 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Citric acid HMDB0000094 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 D-threo-Isocitric acid HMDB0001874 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Diketogulonic acid HMDB0005971 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Isocitrate 781 -M231T174 230.990164 173.5 1929.9 [M+K]+ 1.0 1.0 230.990161 -0.012987566134584096 [M+K]+ 6.0 8.0 0.0 7.0 0.0 0.0 C6H8O7 Isocitric acid HMDB0000193 -M426T180 426.05225 180.0 1149.95 426.05224499999997 -0.011735650035642068 [M+H]+ 12.0 19.0 4.0 7.0 2.0 1.0 C12H19N4O7P2S Thiamine pyrophosphate HMDB0001372 -M426T180 426.05225 180.0 1149.95 426.05231399999997 0.15021629468746972 [M+H]+ 14.0 19.0 1.0 10.0 0.0 2.0 C14H19NO10S2 Glucosinalbin HMDB0038401 -M426T180 426.05225 180.0 1149.95 426.05231399999997 0.15021629468746972 [M+H]+ 14.0 19.0 1.0 10.0 0.0 2.0 C14H19NO10S2 Sinalbin 40568 -M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 14.4 492.060405 -0.010161354046311069 [M+Na]+ 14.0 23.0 4.0 8.0 2.0 1.0 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 -M493T192 493.063765 192.5 163.33 (13C) M492T190 C 14.4 493.064388 1.2635266613874616 [M+K]+ 22.0 18.0 2.0 9.0 0.0 0.0 C22H18N2O9 Kinamycin D 9223 -M550T200 550.06589 200.0 4549.65 550.0658840000001 -0.01090778404131062 [M+Na]+ 16.0 25.0 4.0 10.0 2.0 1.0 C16H25N4O10P2S 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 +name mz rt intensity label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 compounds_test compounds_test_rt +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 2-hydroxyacrylic Acid HMDB0062676 1 1 0 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 3-Hydroxypropenoate 8947 1 0 1 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 3-Oxopropanoate 721 1 0 1 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Glucosereductone HMDB0040261 1 1 0 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Malonic semialdehyde HMDB0011111 1 1 0 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvate 578 1 0 1 0 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvic acid HMDB0000243 1 1 0 0 +M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Formyl phosphate 1969 1 0 1 0 +M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Foscarnet HMDB0014670 1 1 0 0 +M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Phosphonoformate 4022 1 0 1 0 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 3-Dehydro-L-threonate 2300 1 0 1 0 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 1 1 0 0 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malate 1018 1 0 1 0 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 1 1 0 0 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 1 1 0 0 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Formylpyruvate 1832 1 0 1 0 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumarate 652 1 0 1 0 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 1 1 0 0 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid 1414 1 0 1 0 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 1 1 0 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 2-Oxoglutarate 582 1 0 1 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 3-Oxoglutaric acid HMDB0013701 1 1 0 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 D-erythro-Ascorbate 239 1 0 1 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Dehydro-D-arabinono-1,4-lactone 3919 1 0 1 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Methyloxaloacetate 3778 1 0 1 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxaloacetate 4-methyl ester 2636 1 0 1 0 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxoglutaric acid HMDB0000208 1 1 0 0 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 2-Hydroxyethylenedicarboxylate 2770 1 0 1 0 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxalacetic acid HMDB0000223 1 1 0 0 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxaloacetate 589 1 0 1 0 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 enol-oxaloacetate 19221 1 0 1 0 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 trans-2,3-Epoxysuccinate 2551 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 3-methoxy-3-oxopropanoic acid HMDB0130020 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 4-Hydroxy-2-oxobutanoic acid HMDB0031204 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 D,L-malic semialdehyde 18220 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone 11996 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone HMDB0000349 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methyl oxalate 7890 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonate 1845 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonic acid HMDB0000202 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinate 592 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinic acid HMDB0000254 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Threonolactone HMDB0000940 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 xi-3-Hydroxy-2-oxobutanoic acid HMDB0039324 1 1 0 0 +M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P 2-Phosphoglycolate 1183 1 0 1 0 +M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P Phosphoglycolic acid HMDB0000816 1 1 0 0 +M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 +M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 +M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 +M169T120 168.989654 120.0 520.0 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 2.0 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 0 0 1 +M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 +M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 +M337T121 336.972032 121.0 1040.0 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 +M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P 3-Phosphonopyruvate 2162 3 0 1 0 +M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvate 614 3 0 1 0 +M505T122 504.95441 122.5 390.0 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 0 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Aconitate Ion 38282 1 0 1 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid 12423 1 0 1 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid HMDB0001264 1 1 0 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbide(1-) HMDB0062706 1 1 0 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitate 843 1 0 1 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitic acid HMDB0000072 1 1 0 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitate 1934 1 0 1 0 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitic acid HMDB0000958 1 1 0 0 +M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 4-Hydroxy-Aconitate Ion 41990 1 0 1 0 +M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinate 3402 1 0 1 0 +M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinic acid HMDB0003974 1 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Dioxo-L-gulonate 13506 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketo-L-gulonate 20293 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,5-Didehydro-D-gluconate 2148 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Carboxymethyloxysuccinate 2580 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citrate 675 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 0 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitrate 781 2 0 1 0 +M215T170 215.016226 170.0 3859.8 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 863 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate 3090 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate 3064 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Dihydroxy-5-Oxo-Hexanedioate 37494 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Dioxo-L-gulonate 13506 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketo-L-gulonate 20293 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,5-Didehydro-D-gluconate 2148 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2-Dehydro-3-deoxy-D-glucarate 2740 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 4,5-Dehydro-D-Glucuronic Acid 36800 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 5-Dehydro-4-deoxy-D-glucarate 1000 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 5-keto-4-deoxy-D-glucarate 20308 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Carboxymethyloxysuccinate 2580 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citrate 675 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 0 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitrate 781 2 0 1 0 +M231T174 230.990164 173.5 1929.9 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 0 0 +M426T180 426.05225 180.0 1149.95 426.05224499999997 0.011735650035642068 [M+H]+ 12 19 4 7 2 1 C12H19N4O7P2S Thiamine pyrophosphate HMDB0001372 1 1 0 0 +M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Glucosinalbin HMDB0038401 1 1 0 0 +M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Sinalbin 40568 1 0 1 0 +M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 492.060405 0.010161354046311069 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 0 0 +M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 493.06376 0.010140676303965665 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 0 0 +M550T200 550.06589 200.0 4549.65 550.0658840000001 0.01090778404131062 [M+Na]+ 16 25 4 10 2 1 C16H25N4O10P2S 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 1 0 0 diff --git a/tests/test_data/summary_mr_mc_graphs.txt b/tests/test_data/summary_mr_mc_graphs.txt index d4fa71f..444e614 100644 --- a/tests/test_data/summary_mr_mc_graphs.txt +++ b/tests/test_data/summary_mr_mc_graphs.txt @@ -1,23 +1,51 @@ -name mz rt intensity group_id degree_cor sub_group_id degree n_nodes n_edges label charge oligomer isotope_labels_a isotope_ids isotope_labels_b atoms -M127T60 126.979204 60.0 1421.775 -M135T70 135.028801 70.0 2581.91 -M139T80 139.000181 80.0 3870.3 -M147T90 147.028801 90.0 2334.12 -M155T100 154.995096 100.0 2341.5 -M157T110 156.989769 110.0 1417.22 -M169T120 168.989654 120.0 520.0 1.0 2.0 1.0 2.0 3.0 2.0 -M169T120 168.989654 120.0 520.0 1.0 2.0 1.0 2.0 3.0 2.0 [M+H]+ 1.0 1.0 -M337T121 336.972032 121.0 1040.0 1.0 2.0 1.0 1.0 3.0 2.0 -M337T121 336.972032 121.0 1040.0 1.0 2.0 1.0 1.0 3.0 2.0 [2M+H]+ 1.0 2.0 -M505T122 504.95441 122.5 390.0 1.0 2.0 1.0 1.0 3.0 2.0 -M505T122 504.95441 122.5 390.0 1.0 2.0 1.0 1.0 3.0 2.0 [3M+H]+ 1.0 3.0 -M197T150 197.005661 150.0 5112.38 -M213T160 213.000576 160.0 1018.075 -M215T170 215.016226 170.0 3859.8 2.0 1.0 2.0 1.0 2.0 1.0 -M215T170 215.016226 170.0 3859.8 2.0 1.0 2.0 1.0 2.0 1.0 [M+Na]+ 1.0 1.0 -M231T174 230.990164 173.5 1929.9 2.0 1.0 2.0 1.0 2.0 1.0 -M231T174 230.990164 173.5 1929.9 2.0 1.0 2.0 1.0 2.0 1.0 [M+K]+ 1.0 1.0 -M426T180 426.05225 180.0 1149.95 -M492T190 492.06041 190.0 1060.0 3.0 1.0 3.0 1.0 2.0 1.0 C M493T192 (13C) 14.4 -M493T192 493.063765 192.5 163.33 3.0 1.0 3.0 1.0 2.0 1.0 (13C) M492T190 C 14.4 -M550T200 550.06589 200.0 4549.65 +name mz rt intensity group_id degree_cor sub_group_id degree n_nodes n_edges label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 2-hydroxyacrylic Acid HMDB0062676 1 1 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Glucosereductone HMDB0040261 1 1 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Malonic semialdehyde HMDB0011111 1 1 +M127T60 126.979204 60.0 1421.775 126.97920199999999 0.01575061094777821 [M+K]+ 3 4 0 3 0 0 C3H4O3 Pyruvic acid HMDB0000243 1 1 +M127T60 126.979204 60.0 1421.775 126.97908600000001 0.9292868904907359 [M+H]+ 1 3 0 5 1 0 CH3O5P Foscarnet HMDB0014670 1 1 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 1 1 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 1 1 +M135T70 135.028801 70.0 2581.91 135.028799 0.01481165506737926 [M+H]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 1 1 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 1 1 +M139T80 139.000181 80.0 3870.3 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 1 1 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 3-Oxoglutaric acid HMDB0013701 1 1 +M147T90 147.028801 90.0 2334.12 147.028799 0.013602777201155575 [M+H]+ 5 6 0 5 0 0 C5H6O5 Oxoglutaric acid HMDB0000208 1 1 +M155T100 154.995096 100.0 2341.5 154.995094 0.012903634194708677 [M+Na]+ 4 4 0 5 0 0 C4H4O5 Oxalacetic acid HMDB0000223 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 3-methoxy-3-oxopropanoic acid HMDB0130020 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 4-Hydroxy-2-oxobutanoic acid HMDB0031204 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Erythrono-1,4-lactone HMDB0000349 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Methylmalonic acid HMDB0000202 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Succinic acid HMDB0000254 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 Threonolactone HMDB0000940 1 1 +M157T110 156.989769 110.0 1417.22 156.989767 0.012739683822516187 [M+K]+ 4 6 0 4 0 0 C4H6O4 xi-3-Hydroxy-2-oxobutanoic acid HMDB0039324 1 1 +M157T110 156.989769 110.0 1417.22 156.98965099999998 0.7516419029093673 [M+H]+ 2 5 0 6 1 0 C2H5O6P Phosphoglycolic acid HMDB0000816 1 1 +M169T120 168.989654 120.0 520.0 1 2 1 2 3 2 [M+H]+ 1 1 168.98965099999998 0.017752566521647164 [M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 +M337T121 336.972032 121.0 1040.0 1 2 1 1 3 2 [2M+H]+ 1 2 336.97202599999997 0.017805632452394953 [2M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 +M505T122 504.95441 122.5 390.0 1 2 1 1 3 2 [3M+H]+ 1 3 504.95440099999996 0.017823391609811126 [3M+H]+ 3 5 0 6 1 0 C3H5O6P Phosphoenolpyruvic acid HMDB0000263 3 1 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbic acid HMDB0001264 1 1 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 Dehydroascorbide(1-) HMDB0062706 1 1 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 cis-Aconitic acid HMDB0000072 1 1 +M197T150 197.005661 150.0 5112.38 197.00565899999998 0.010151992757589745 [M+Na]+ 6 6 0 6 0 0 C6H6O6 trans-Aconitic acid HMDB0000958 1 1 +M213T160 213.000576 160.0 1018.075 213.00057400000003 0.009389645900807648 [M+Na]+ 6 6 0 7 0 0 C6H6O7 Oxalosuccinic acid HMDB0003974 1 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 +M215T170 215.016226 170.0 3859.8 2 1 2 1 2 1 [M+Na]+ 1 1 215.01622400000002 0.009301623520878015 [M+Na]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 (1R,2R)-Isocitric acid HMDB0033717 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-Diketo-L-gulonate HMDB0006511 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 2,3-diketogulonate HMDB0062803 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Citric acid HMDB0000094 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-Glucaro-1,4-lactone HMDB0041862 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 D-threo-Isocitric acid HMDB0001874 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Diketogulonic acid HMDB0005971 2 1 +M231T174 230.990164 173.5 1929.9 2 1 2 1 2 1 [M+K]+ 1 1 230.990161 0.012987566134584096 [M+K]+ 6 8 0 7 0 0 C6H8O7 Isocitric acid HMDB0000193 2 1 +M426T180 426.05225 180.0 1149.95 426.05224499999997 0.011735650035642068 [M+H]+ 12 19 4 7 2 1 C12H19N4O7P2S Thiamine pyrophosphate HMDB0001372 1 1 +M426T180 426.05225 180.0 1149.95 426.05231399999997 -0.15021629468746972 [M+H]+ 14 19 1 10 0 2 C14H19NO10S2 Glucosinalbin HMDB0038401 1 1 +M492T190 492.06041 190.0 1060.0 3 1 3 1 2 1 C M493T192 (13C) 1 14.4 492.060405 0.010161354046311069 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 +M493T192 493.063765 192.5 163.33 3 1 3 1 2 1 (13C) M492T190 C 1 14.4 493.06376 0.010140676303965665 [M+Na]+ 14 23 4 8 2 1 C14H23N4O8P2S 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 1 +M550T200 550.06589 200.0 4549.65 550.0658840000001 0.01090778404131062 [M+Na]+ 16 25 4 10 2 1 C16H25N4O10P2S 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 1 diff --git a/tests/test_data/summary_mr_mc_nls.txt b/tests/test_data/summary_mr_mc_nls.txt new file mode 100644 index 0000000..1adb7de --- /dev/null +++ b/tests/test_data/summary_mr_mc_nls.txt @@ -0,0 +1,32 @@ +name mz rt intensity label charge oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms nl_labels nl_ids exact_mass ppm_error rt_diff adduct C H N O P S molecular_formula compound_name compound_id compound_count compounds_hmdb_full_v4_0_20200909_v1 +M117T80 117.018236 80 3518.454545 [M+H]+ 1 1 117.018235 0.008545676641552855 [M+H]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M117T80 117.018236 80 3518.454545 [M+H]+ 1 1 117.018235 0.008545676641552855 [M+H]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M121T80 120.989616 80 3483.27 C M122T80 (13C) 1 3.7 H2O M139T80 120.989615 0.008265172159405936 [M+Na]+ 4 2 0 3 0 0 C4H2O3 +M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M122T80 121.992971 80 139.3308 (13C) M121T80 C 1 3.7 H2O M140T80 121.99297 0.008197193637266498 [M+Na]+ 4 2 0 3 0 0 C4H2O3 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.00018 0.007194235269876936 [M+Na]+ 4 2 0 3 0 0 C4H2O3 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 +M139T80 139.000181 80 3870.3 [M+Na]+ 1 1 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 139.000179 0.014388470643267915 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 +M139T80 139.000181 80 3870.3 C M140T80 (13C) 1 3.7 H2O,H2O M121T80,M157T80 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003535 0.007142676772234664 [M+Na]+ 4 2 0 3 0 0 C4H2O3 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 +M140T80 140.003536 80 154.812 (13C) M139T80 C 1 3.7 H2O,H2O M122T80,M158T80 140.003534 0.014285353646504993 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 +M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010745 0.006368991026040621 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010745 0.006368991026040621 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 +M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 +M157T80 157.010746 80 2709.21 C M158T80 (13C) 1 3.7 H2O M139T80 157.010744 0.012737981952191725 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 +M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.01409999999998 0.006328549325009302 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Fumaric acid HMDB0000134 7.0 1 +M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.01409999999998 0.006328549325009302 [M+Na]+ 4 4 0 4 0 0 C4H4O4 Maleic acid HMDB0000176 7.0 1 +M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 D-Malic acid HMDB0031518 4.0 1 +M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Malic acid HMDB0000156 4.0 1 +M158T80 158.014101 80 108.3684 (13C) M157T80 C 1 3.7 H2O M140T80 158.014099 0.012657098550251489 [M+Na]+ 4 6 0 5 0 0 C4H6O5 Velcorin HMDB0032872 4.0 1 diff --git a/tests/test_data/summary_sr_mc.txt b/tests/test_data/summary_sr_mc.txt index 5bee80c..bcf4871 100644 --- a/tests/test_data/summary_sr_mc.txt +++ b/tests/test_data/summary_sr_mc.txt @@ -1,18 +1,18 @@ -name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b atoms molecular_formula adduct compound_name compound_id exact_mass ppm_error -M127T60 126.979204 60.0 1421.775 CH3O5P||CH3O5P||CH3O5P||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3 [M+H]+||[M+H]+||[M+H]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+ Formyl phosphate||Foscarnet||Phosphonoformate||2-hydroxyacrylic Acid||3-Hydroxypropenoate||3-Oxopropanoate||Glucosereductone||Malonic semialdehyde||Pyruvate||Pyruvic acid 1969||HMDB0014670||4022||HMDB0062676||8947||721||HMDB0040261||HMDB0011111||578||HMDB0000243 126.979086||126.979086||126.979086||126.979202||126.979202||126.979202||126.979202||126.979202||126.979202||126.979202 -0.93||-0.93||-0.93||-0.02||-0.02||-0.02||-0.02||-0.02||-0.02||-0.02 -M135T70 135.028801 70.0 2581.91 C4H6O5||C4H6O5||C4H6O5||C4H6O5||C4H6O5||C4H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 3-Dehydro-L-threonate||D-Malic acid||L-Malic acid||Malate||Malic acid||Velcorin 2300||HMDB0031518||HMDB0000156||1018||HMDB0000744||HMDB0032872 135.028799||135.028799||135.028799||135.028799||135.028799||135.028799 -0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M139T80 139.000181 80.0 3870.3 C4H4O4||C4H4O4||C4H4O4||C4H4O4||C4H4O4 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Formylpyruvate||Fumarate||Fumaric acid||Maleic acid||Maleic acid 1832||652||HMDB0000134||1414||HMDB0000176 139.00018||139.00018||139.00018||139.00018||139.00018 -0.01||-0.01||-0.01||-0.01||-0.01 -M147T90 147.028801 90.0 2334.12 C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 2-Oxoglutarate||3-Oxoglutaric acid||D-erythro-Ascorbate||Dehydro-D-arabinono-1,4-lactone||Methyloxaloacetate||Oxaloacetate 4-methyl ester||Oxoglutaric acid 582||HMDB0013701||239||3919||3778||2636||HMDB0000208 147.028799||147.028799||147.028799||147.028799||147.028799||147.028799||147.028799 -0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M155T100 154.995096 100.0 2341.5 C4H4O5||C4H4O5||C4H4O5||C4H4O5||C4H4O5 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ 2-Hydroxyethylenedicarboxylate||Oxalacetic acid||Oxaloacetate||enol-oxaloacetate||trans-2,3-Epoxysuccinate 2770||HMDB0000223||589||19221||2551 154.995094||154.995094||154.995094||154.995094||154.995094 -0.01||-0.01||-0.01||-0.01||-0.01 -M157T110 156.989769 110.0 1417.22 C2H5O6P||C2H5O6P||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4 [M+H]+||[M+H]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+ 2-Phosphoglycolate||Phosphoglycolic acid||3-methoxy-3-oxopropanoic acid||4-Hydroxy-2-oxobutanoic acid||D,L-malic semialdehyde||Erythrono-1,4-lactone||Erythrono-1,4-lactone||Methyl oxalate||Methylmalonate||Methylmalonic acid||Succinate||Succinic acid||Threonolactone||xi-3-Hydroxy-2-oxobutanoic acid 1183||HMDB0000816||HMDB0130020||HMDB0031204||18220||11996||HMDB0000349||7890||1845||HMDB0000202||592||HMDB0000254||HMDB0000940||HMDB0039324 156.989651||156.989651||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767 -0.75||-0.75||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C5H6O4||C3H5O6P||C3H5O6P||C3H5O6P [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+H]+||[M+H]+||[M+H]+ (E)-Glutaconate||2,5-Dioxopentanoate||2,5-Dioxopentanoate||2-Hydroxyglutaric acid lactone||2-Methylmaleate||2-Pentendioate||4,5-Dioxopentanoate||Acetylpyruvate||Citraconic acid||Gamma-delta-Dioxovaleric acid||Glutaconic acid||Itaconate||Itaconic acid||Mesaconate||Mesaconic acid||Methyl hydrogen fumarate||glutaconate||3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid 1867||851||HMDB0060365||HMDB0059743||1876||HMDB0061185||2163||1822||HMDB0000634||HMDB0013233||HMDB0000620||887||HMDB0002092||1600||HMDB0000749||HMDB0033809||19243||2162||614||HMDB0000263 168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989767||168.989651||168.989651||168.989651 0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||0.67||-0.02||-0.02||-0.02 -M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2.0 None||None None||None -M505T122 504.95441 122.5 390.0 [3M+H]+::1::3.0 None||None None||None -M197T150 197.005661 150.0 5112.38 C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Aconitate Ion||Dehydroascorbic acid||Dehydroascorbic acid||Dehydroascorbide(1-)||cis-Aconitate||cis-Aconitic acid||trans-Aconitate||trans-Aconitic acid 38282||12423||HMDB0001264||HMDB0062706||843||HMDB0000072||1934||HMDB0000958 197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659 -0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M213T160 213.000576 160.0 1018.075 C6H6O7||C6H6O7||C6H6O7 [M+Na]+||[M+Na]+||[M+Na]+ 4-Hydroxy-Aconitate Ion||Oxalosuccinate||Oxalosuccinic acid 41990||3402||HMDB0003974 213.000574||213.000574||213.000574 -0.01||-0.01||-0.01 -M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ None||(1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid None||HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224 -0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C9H4O6||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+Na]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+ Stipitatonate||(1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid 1792||HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 230.990009||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161 -0.67||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01||-0.01 -M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S||C14H19NO10S2||C14H19NO10S2 [M+H]+||[M+H]+||[M+H]+ Thiamine pyrophosphate||Glucosinalbin||Sinalbin HMDB0001372||HMDB0038401||40568 426.052245||426.052314||426.052314 -0.01||0.15||0.15 -M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 14.4 C14H23N4O8P2S [M+Na]+ 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 492.060405 -0.01 -M493T192 493.063765 192.5 163.33 (13C) M492T190 C 14.4 C22H18N2O9 [M+K]+ Kinamycin D 9223 493.064388 1.26 -M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S [M+Na]+ 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 550.065884 -0.01 +name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms molecular_formula adduct compound_name compound_id compound_count exact_mass ppm_error rt_diff +M127T60 126.979204 60.0 1421.775 C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||C3H4O3||CH3O5P||CH3O5P||CH3O5P [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+H]+||[M+H]+||[M+H]+ 2-hydroxyacrylic Acid||3-Hydroxypropenoate||3-Oxopropanoate||Glucosereductone||Malonic semialdehyde||Pyruvate||Pyruvic acid||Formyl phosphate||Foscarnet||Phosphonoformate HMDB0062676||8947||721||HMDB0040261||HMDB0011111||578||HMDB0000243||1969||HMDB0014670||4022 1||1||1||1||1||1||1||1||1||1 126.979202||126.979202||126.979202||126.979202||126.979202||126.979202||126.979202||126.979086||126.979086||126.979086 0.02||0.02||0.02||0.02||0.02||0.02||0.02||0.93||0.93||0.93 None||None||None||None||None||None||None||None||None||None +M135T70 135.028801 70.0 2581.91 C4H6O5||C4H6O5||C4H6O5||C4H6O5||C4H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 3-Dehydro-L-threonate||D-Malic acid||Malate||Malic acid||Velcorin 2300||HMDB0031518||1018||HMDB0000156||HMDB0032872 1||1||1||1||1 135.028799||135.028799||135.028799||135.028799||135.028799 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None +M139T80 139.000181 80.0 3870.3 C4H4O4||C4H4O4||C4H4O4||C4H4O4||C4H4O4 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Formylpyruvate||Fumarate||Fumaric acid||Maleic acid||Maleic acid 1832||652||HMDB0000134||1414||HMDB0000176 1||1||1||1||1 139.00018||139.00018||139.00018||139.00018||139.00018 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None +M147T90 147.028801 90.0 2334.12 C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5||C5H6O5 [M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+||[M+H]+ 2-Oxoglutarate||3-Oxoglutaric acid||D-erythro-Ascorbate||Dehydro-D-arabinono-1,4-lactone||Methyloxaloacetate||Oxaloacetate 4-methyl ester||Oxoglutaric acid 582||HMDB0013701||239||3919||3778||2636||HMDB0000208 1||1||1||1||1||1||1 147.028799||147.028799||147.028799||147.028799||147.028799||147.028799||147.028799 0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None +M155T100 154.995096 100.0 2341.5 C4H4O5||C4H4O5||C4H4O5||C4H4O5||C4H4O5 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ 2-Hydroxyethylenedicarboxylate||Oxalacetic acid||Oxaloacetate||enol-oxaloacetate||trans-2,3-Epoxysuccinate 2770||HMDB0000223||589||19221||2551 1||1||1||1||1 154.995094||154.995094||154.995094||154.995094||154.995094 0.01||0.01||0.01||0.01||0.01 None||None||None||None||None +M157T110 156.989769 110.0 1417.22 C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C4H6O4||C2H5O6P||C2H5O6P [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+H]+||[M+H]+ 3-methoxy-3-oxopropanoic acid||4-Hydroxy-2-oxobutanoic acid||D,L-malic semialdehyde||Erythrono-1,4-lactone||Erythrono-1,4-lactone||Methyl oxalate||Methylmalonate||Methylmalonic acid||Succinate||Succinic acid||Threonolactone||xi-3-Hydroxy-2-oxobutanoic acid||2-Phosphoglycolate||Phosphoglycolic acid HMDB0130020||HMDB0031204||18220||11996||HMDB0000349||7890||1845||HMDB0000202||592||HMDB0000254||HMDB0000940||HMDB0039324||1183||HMDB0000816 1||1||1||1||1||1||1||1||1||1||1||1||1||1 156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989767||156.989651||156.989651 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.75||0.75 None||None||None||None||None||None||None||None||None||None||None||None||None||None +M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C3H5O6P||C3H5O6P||C3H5O6P||C3H5O6P [M+H]+||[M+H]+||[M+H]+||[M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid||Phosphoenolpyruvic acid 2162||614||HMDB0000263||HMDB0000263 3||3||3||3 168.989651||168.989651||168.989651||168.989651 0.02||0.02||0.02||0.02 None||None||None||2.0 +M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2 C3H5O6P||C3H5O6P||C3H5O6P [2M+H]+||[2M+H]+||[2M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid 2162||614||HMDB0000263 3||3||3 336.972026||336.972026||336.972026 0.02||0.02||0.02 None||None||None +M505T122 504.95441 122.5 390.0 [3M+H]+::1::3 C3H5O6P||C3H5O6P||C3H5O6P [3M+H]+||[3M+H]+||[3M+H]+ 3-Phosphonopyruvate||Phosphoenolpyruvate||Phosphoenolpyruvic acid 2162||614||HMDB0000263 3||3||3 504.954401||504.954401||504.954401 0.02||0.02||0.02 None||None||None +M197T150 197.005661 150.0 5112.38 C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6||C6H6O6 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ Aconitate Ion||Dehydroascorbic acid||Dehydroascorbic acid||Dehydroascorbide(1-)||cis-Aconitate||cis-Aconitic acid||trans-Aconitate||trans-Aconitic acid 38282||12423||HMDB0001264||HMDB0062706||843||HMDB0000072||1934||HMDB0000958 1||1||1||1||1||1||1||1 197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659||197.005659 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None +M213T160 213.000576 160.0 1018.075 C6H6O7||C6H6O7||C6H6O7 [M+Na]+||[M+Na]+||[M+Na]+ 4-Hydroxy-Aconitate Ion||Oxalosuccinate||Oxalosuccinic acid 41990||3402||HMDB0003974 1||1||1 213.000574||213.000574||213.000574 0.01||0.01||0.01 None||None||None +M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+||[M+Na]+ (1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2 215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224||215.016224 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None +M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7||C6H8O7 [M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+||[M+K]+ (1R,2R)-Isocitric acid||(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate||(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate||2,3-Dihydroxy-5-Oxo-Hexanedioate||2,3-Diketo-L-gulonate||2,3-Dioxo-L-gulonate||2,3-diketo-L-gulonate||2,3-diketogulonate||2,5-Didehydro-D-gluconate||2-Dehydro-3-deoxy-D-glucarate||4,5-Dehydro-D-Glucuronic Acid||5-Dehydro-4-deoxy-D-glucarate||5-keto-4-deoxy-D-glucarate||Carboxymethyloxysuccinate||Citrate||Citric acid||D-Glucaro-1,4-lactone||D-threo-Isocitric acid||Diketogulonic acid||Isocitrate||Isocitric acid HMDB0033717||863||3090||3064||37494||HMDB0006511||13506||20293||HMDB0062803||2148||2740||36800||1000||20308||2580||675||HMDB0000094||HMDB0041862||HMDB0001874||HMDB0005971||781||HMDB0000193 2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2||2 230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161||230.990161 0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01||0.01 None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None||None +M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S||C14H19NO10S2||C14H19NO10S2 [M+H]+||[M+H]+||[M+H]+ Thiamine pyrophosphate||Glucosinalbin||Sinalbin HMDB0001372||HMDB0038401||40568 1||1||1 426.052245||426.052314||426.052314 0.01||-0.15||-0.15 None||None||None +M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 C14H23N4O8P2S [M+Na]+ 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 492.060405 0.01 None +M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 C14H23N4O8P2S [M+Na]+ 2-(a-Hydroxyethyl)thiamine diphosphate HMDB0003904 2 493.06376 0.01 None +M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S [M+Na]+ 3-Carboxy-1-hydroxypropylthiamine diphosphate HMDB0006744 1 550.065884 0.01 None diff --git a/tests/test_data/summary_sr_sc.txt b/tests/test_data/summary_sr_sc.txt index 56a87c9..c6ffc77 100644 --- a/tests/test_data/summary_sr_sc.txt +++ b/tests/test_data/summary_sr_sc.txt @@ -1,18 +1,18 @@ -name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b atoms annotation -M127T60 126.979204 60.0 1421.775 CH3O5P::[M+H]+::Formyl phosphate::1969::126.979086::-0.93||CH3O5P::[M+H]+::Foscarnet::HMDB0014670::126.979086::-0.93||CH3O5P::[M+H]+::Phosphonoformate::4022::126.979086::-0.93||C3H4O3::[M+K]+::2-hydroxyacrylic Acid::HMDB0062676::126.979202::-0.02||C3H4O3::[M+K]+::3-Hydroxypropenoate::8947::126.979202::-0.02||C3H4O3::[M+K]+::3-Oxopropanoate::721::126.979202::-0.02||C3H4O3::[M+K]+::Glucosereductone::HMDB0040261::126.979202::-0.02||C3H4O3::[M+K]+::Malonic semialdehyde::HMDB0011111::126.979202::-0.02||C3H4O3::[M+K]+::Pyruvate::578::126.979202::-0.02||C3H4O3::[M+K]+::Pyruvic acid::HMDB0000243::126.979202::-0.02 -M135T70 135.028801 70.0 2581.91 C4H6O5::[M+H]+::3-Dehydro-L-threonate::2300::135.028799::-0.01||C4H6O5::[M+H]+::D-Malic acid::HMDB0031518::135.028799::-0.01||C4H6O5::[M+H]+::L-Malic acid::HMDB0000156::135.028799::-0.01||C4H6O5::[M+H]+::Malate::1018::135.028799::-0.01||C4H6O5::[M+H]+::Malic acid::HMDB0000744::135.028799::-0.01||C4H6O5::[M+H]+::Velcorin::HMDB0032872::135.028799::-0.01 -M139T80 139.000181 80.0 3870.3 C4H4O4::[M+Na]+::Formylpyruvate::1832::139.00018::-0.01||C4H4O4::[M+Na]+::Fumarate::652::139.00018::-0.01||C4H4O4::[M+Na]+::Fumaric acid::HMDB0000134::139.00018::-0.01||C4H4O4::[M+Na]+::Maleic acid::1414::139.00018::-0.01||C4H4O4::[M+Na]+::Maleic acid::HMDB0000176::139.00018::-0.01 -M147T90 147.028801 90.0 2334.12 C5H6O5::[M+H]+::2-Oxoglutarate::582::147.028799::-0.01||C5H6O5::[M+H]+::3-Oxoglutaric acid::HMDB0013701::147.028799::-0.01||C5H6O5::[M+H]+::D-erythro-Ascorbate::239::147.028799::-0.01||C5H6O5::[M+H]+::Dehydro-D-arabinono-1,4-lactone::3919::147.028799::-0.01||C5H6O5::[M+H]+::Methyloxaloacetate::3778::147.028799::-0.01||C5H6O5::[M+H]+::Oxaloacetate 4-methyl ester::2636::147.028799::-0.01||C5H6O5::[M+H]+::Oxoglutaric acid::HMDB0000208::147.028799::-0.01 -M155T100 154.995096 100.0 2341.5 C4H4O5::[M+Na]+::2-Hydroxyethylenedicarboxylate::2770::154.995094::-0.01||C4H4O5::[M+Na]+::Oxalacetic acid::HMDB0000223::154.995094::-0.01||C4H4O5::[M+Na]+::Oxaloacetate::589::154.995094::-0.01||C4H4O5::[M+Na]+::enol-oxaloacetate::19221::154.995094::-0.01||C4H4O5::[M+Na]+::trans-2,3-Epoxysuccinate::2551::154.995094::-0.01 -M157T110 156.989769 110.0 1417.22 C2H5O6P::[M+H]+::2-Phosphoglycolate::1183::156.989651::-0.75||C2H5O6P::[M+H]+::Phosphoglycolic acid::HMDB0000816::156.989651::-0.75||C4H6O4::[M+K]+::3-methoxy-3-oxopropanoic acid::HMDB0130020::156.989767::-0.01||C4H6O4::[M+K]+::4-Hydroxy-2-oxobutanoic acid::HMDB0031204::156.989767::-0.01||C4H6O4::[M+K]+::D,L-malic semialdehyde::18220::156.989767::-0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::11996::156.989767::-0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::HMDB0000349::156.989767::-0.01||C4H6O4::[M+K]+::Methyl oxalate::7890::156.989767::-0.01||C4H6O4::[M+K]+::Methylmalonate::1845::156.989767::-0.01||C4H6O4::[M+K]+::Methylmalonic acid::HMDB0000202::156.989767::-0.01||C4H6O4::[M+K]+::Succinate::592::156.989767::-0.01||C4H6O4::[M+K]+::Succinic acid::HMDB0000254::156.989767::-0.01||C4H6O4::[M+K]+::Threonolactone::HMDB0000940::156.989767::-0.01||C4H6O4::[M+K]+::xi-3-Hydroxy-2-oxobutanoic acid::HMDB0039324::156.989767::-0.01 -M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C5H6O4::[M+K]+::(E)-Glutaconate::1867::168.989767::0.67||C5H6O4::[M+K]+::2,5-Dioxopentanoate::851::168.989767::0.67||C5H6O4::[M+K]+::2,5-Dioxopentanoate::HMDB0060365::168.989767::0.67||C5H6O4::[M+K]+::2-Hydroxyglutaric acid lactone::HMDB0059743::168.989767::0.67||C5H6O4::[M+K]+::2-Methylmaleate::1876::168.989767::0.67||C5H6O4::[M+K]+::2-Pentendioate::HMDB0061185::168.989767::0.67||C5H6O4::[M+K]+::4,5-Dioxopentanoate::2163::168.989767::0.67||C5H6O4::[M+K]+::Acetylpyruvate::1822::168.989767::0.67||C5H6O4::[M+K]+::Citraconic acid::HMDB0000634::168.989767::0.67||C5H6O4::[M+K]+::Gamma-delta-Dioxovaleric acid::HMDB0013233::168.989767::0.67||C5H6O4::[M+K]+::Glutaconic acid::HMDB0000620::168.989767::0.67||C5H6O4::[M+K]+::Itaconate::887::168.989767::0.67||C5H6O4::[M+K]+::Itaconic acid::HMDB0002092::168.989767::0.67||C5H6O4::[M+K]+::Mesaconate::1600::168.989767::0.67||C5H6O4::[M+K]+::Mesaconic acid::HMDB0000749::168.989767::0.67||C5H6O4::[M+K]+::Methyl hydrogen fumarate::HMDB0033809::168.989767::0.67||C5H6O4::[M+K]+::glutaconate::19243::168.989767::0.67||C3H5O6P::[M+H]+::3-Phosphonopyruvate::2162::168.989651::-0.02||C3H5O6P::[M+H]+::Phosphoenolpyruvate::614::168.989651::-0.02||C3H5O6P::[M+H]+::Phosphoenolpyruvic acid::HMDB0000263::168.989651::-0.02 -M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2.0 -M505T122 504.95441 122.5 390.0 [3M+H]+::1::3.0 -M197T150 197.005661 150.0 5112.38 C6H6O6::[M+Na]+::Aconitate Ion::38282::197.005659::-0.01||C6H6O6::[M+Na]+::Dehydroascorbic acid::12423::197.005659::-0.01||C6H6O6::[M+Na]+::Dehydroascorbic acid::HMDB0001264::197.005659::-0.01||C6H6O6::[M+Na]+::Dehydroascorbide(1-)::HMDB0062706::197.005659::-0.01||C6H6O6::[M+Na]+::cis-Aconitate::843::197.005659::-0.01||C6H6O6::[M+Na]+::cis-Aconitic acid::HMDB0000072::197.005659::-0.01||C6H6O6::[M+Na]+::trans-Aconitate::1934::197.005659::-0.01||C6H6O6::[M+Na]+::trans-Aconitic acid::HMDB0000958::197.005659::-0.01 -M213T160 213.000576 160.0 1018.075 C6H6O7::[M+Na]+::4-Hydroxy-Aconitate Ion::41990::213.000574::-0.01||C6H6O7::[M+Na]+::Oxalosuccinate::3402::213.000574::-0.01||C6H6O7::[M+Na]+::Oxalosuccinic acid::HMDB0003974::213.000574::-0.01 -M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7::[M+Na]+::(1R,2R)-Isocitric acid::HMDB0033717::215.016224::-0.01||C6H8O7::[M+Na]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::215.016224::-0.01||C6H8O7::[M+Na]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::215.016224::-0.01||C6H8O7::[M+Na]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::215.016224::-0.01||C6H8O7::[M+Na]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::215.016224::-0.01||C6H8O7::[M+Na]+::2,3-Diketo-L-gulonate::HMDB0006511::215.016224::-0.01||C6H8O7::[M+Na]+::2,3-Dioxo-L-gulonate::13506::215.016224::-0.01||C6H8O7::[M+Na]+::2,3-diketo-L-gulonate::20293::215.016224::-0.01||C6H8O7::[M+Na]+::2,3-diketogulonate::HMDB0062803::215.016224::-0.01||C6H8O7::[M+Na]+::2,5-Didehydro-D-gluconate::2148::215.016224::-0.01||C6H8O7::[M+Na]+::2-Dehydro-3-deoxy-D-glucarate::2740::215.016224::-0.01||C6H8O7::[M+Na]+::4,5-Dehydro-D-Glucuronic Acid::36800::215.016224::-0.01||C6H8O7::[M+Na]+::5-Dehydro-4-deoxy-D-glucarate::1000::215.016224::-0.01||C6H8O7::[M+Na]+::5-keto-4-deoxy-D-glucarate::20308::215.016224::-0.01||C6H8O7::[M+Na]+::Carboxymethyloxysuccinate::2580::215.016224::-0.01||C6H8O7::[M+Na]+::Citrate::675::215.016224::-0.01||C6H8O7::[M+Na]+::Citric acid::HMDB0000094::215.016224::-0.01||C6H8O7::[M+Na]+::D-Glucaro-1,4-lactone::HMDB0041862::215.016224::-0.01||C6H8O7::[M+Na]+::D-threo-Isocitric acid::HMDB0001874::215.016224::-0.01||C6H8O7::[M+Na]+::Diketogulonic acid::HMDB0005971::215.016224::-0.01||C6H8O7::[M+Na]+::Isocitrate::781::215.016224::-0.01||C6H8O7::[M+Na]+::Isocitric acid::HMDB0000193::215.016224::-0.01 -M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C9H4O6::[M+Na]+::Stipitatonate::1792::230.990009::-0.67||C6H8O7::[M+K]+::(1R,2R)-Isocitric acid::HMDB0033717::230.990161::-0.01||C6H8O7::[M+K]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::230.990161::-0.01||C6H8O7::[M+K]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::230.990161::-0.01||C6H8O7::[M+K]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::230.990161::-0.01||C6H8O7::[M+K]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::230.990161::-0.01||C6H8O7::[M+K]+::2,3-Diketo-L-gulonate::HMDB0006511::230.990161::-0.01||C6H8O7::[M+K]+::2,3-Dioxo-L-gulonate::13506::230.990161::-0.01||C6H8O7::[M+K]+::2,3-diketo-L-gulonate::20293::230.990161::-0.01||C6H8O7::[M+K]+::2,3-diketogulonate::HMDB0062803::230.990161::-0.01||C6H8O7::[M+K]+::2,5-Didehydro-D-gluconate::2148::230.990161::-0.01||C6H8O7::[M+K]+::2-Dehydro-3-deoxy-D-glucarate::2740::230.990161::-0.01||C6H8O7::[M+K]+::4,5-Dehydro-D-Glucuronic Acid::36800::230.990161::-0.01||C6H8O7::[M+K]+::5-Dehydro-4-deoxy-D-glucarate::1000::230.990161::-0.01||C6H8O7::[M+K]+::5-keto-4-deoxy-D-glucarate::20308::230.990161::-0.01||C6H8O7::[M+K]+::Carboxymethyloxysuccinate::2580::230.990161::-0.01||C6H8O7::[M+K]+::Citrate::675::230.990161::-0.01||C6H8O7::[M+K]+::Citric acid::HMDB0000094::230.990161::-0.01||C6H8O7::[M+K]+::D-Glucaro-1,4-lactone::HMDB0041862::230.990161::-0.01||C6H8O7::[M+K]+::D-threo-Isocitric acid::HMDB0001874::230.990161::-0.01||C6H8O7::[M+K]+::Diketogulonic acid::HMDB0005971::230.990161::-0.01||C6H8O7::[M+K]+::Isocitrate::781::230.990161::-0.01||C6H8O7::[M+K]+::Isocitric acid::HMDB0000193::230.990161::-0.01 -M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S::[M+H]+::Thiamine pyrophosphate::HMDB0001372::426.052245::-0.01||C14H19NO10S2::[M+H]+::Glucosinalbin::HMDB0038401::426.052314::0.15||C14H19NO10S2::[M+H]+::Sinalbin::40568::426.052314::0.15 -M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 14.4 C14H23N4O8P2S::[M+Na]+::2-(a-Hydroxyethyl)thiamine diphosphate::HMDB0003904::492.060405::-0.01 -M493T192 493.063765 192.5 163.33 (13C) M492T190 C 14.4 C22H18N2O9::[M+K]+::Kinamycin D::9223::493.064388::1.26 -M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S::[M+Na]+::3-Carboxy-1-hydroxypropylthiamine diphosphate::HMDB0006744::550.065884::-0.01 +name mz rt intensity label_charge_oligomer isotope_labels_a isotope_ids isotope_labels_b isotope_charges atoms compounds_hmdb_full_v4_0_20200909_v1 compounds_test compounds_test_rt +M127T60 126.979204 60.0 1421.775 C3H4O3::[M+K]+::2-hydroxyacrylic Acid::HMDB0062676::1::126.979202::0.02||C3H4O3::[M+K]+::Glucosereductone::HMDB0040261::1::126.979202::0.02||C3H4O3::[M+K]+::Malonic semialdehyde::HMDB0011111::1::126.979202::0.02||C3H4O3::[M+K]+::Pyruvic acid::HMDB0000243::1::126.979202::0.02||CH3O5P::[M+H]+::Foscarnet::HMDB0014670::1::126.979086::0.93 C3H4O3::[M+K]+::3-Hydroxypropenoate::8947::1::126.979202::0.02||C3H4O3::[M+K]+::3-Oxopropanoate::721::1::126.979202::0.02||C3H4O3::[M+K]+::Pyruvate::578::1::126.979202::0.02||CH3O5P::[M+H]+::Formyl phosphate::1969::1::126.979086::0.93||CH3O5P::[M+H]+::Phosphonoformate::4022::1::126.979086::0.93 +M135T70 135.028801 70.0 2581.91 C4H6O5::[M+H]+::D-Malic acid::HMDB0031518::1::135.028799::0.01||C4H6O5::[M+H]+::Malic acid::HMDB0000156::1::135.028799::0.01||C4H6O5::[M+H]+::Velcorin::HMDB0032872::1::135.028799::0.01 C4H6O5::[M+H]+::3-Dehydro-L-threonate::2300::1::135.028799::0.01||C4H6O5::[M+H]+::Malate::1018::1::135.028799::0.01 +M139T80 139.000181 80.0 3870.3 C4H4O4::[M+Na]+::Fumaric acid::HMDB0000134::1::139.00018::0.01||C4H4O4::[M+Na]+::Maleic acid::HMDB0000176::1::139.00018::0.01 C4H4O4::[M+Na]+::Formylpyruvate::1832::1::139.00018::0.01||C4H4O4::[M+Na]+::Fumarate::652::1::139.00018::0.01||C4H4O4::[M+Na]+::Maleic acid::1414::1::139.00018::0.01 +M147T90 147.028801 90.0 2334.12 C5H6O5::[M+H]+::3-Oxoglutaric acid::HMDB0013701::1::147.028799::0.01||C5H6O5::[M+H]+::Oxoglutaric acid::HMDB0000208::1::147.028799::0.01 C5H6O5::[M+H]+::2-Oxoglutarate::582::1::147.028799::0.01||C5H6O5::[M+H]+::D-erythro-Ascorbate::239::1::147.028799::0.01||C5H6O5::[M+H]+::Dehydro-D-arabinono-1,4-lactone::3919::1::147.028799::0.01||C5H6O5::[M+H]+::Methyloxaloacetate::3778::1::147.028799::0.01||C5H6O5::[M+H]+::Oxaloacetate 4-methyl ester::2636::1::147.028799::0.01 +M155T100 154.995096 100.0 2341.5 C4H4O5::[M+Na]+::Oxalacetic acid::HMDB0000223::1::154.995094::0.01 C4H4O5::[M+Na]+::2-Hydroxyethylenedicarboxylate::2770::1::154.995094::0.01||C4H4O5::[M+Na]+::Oxaloacetate::589::1::154.995094::0.01||C4H4O5::[M+Na]+::enol-oxaloacetate::19221::1::154.995094::0.01||C4H4O5::[M+Na]+::trans-2,3-Epoxysuccinate::2551::1::154.995094::0.01 +M157T110 156.989769 110.0 1417.22 C4H6O4::[M+K]+::3-methoxy-3-oxopropanoic acid::HMDB0130020::1::156.989767::0.01||C4H6O4::[M+K]+::4-Hydroxy-2-oxobutanoic acid::HMDB0031204::1::156.989767::0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::HMDB0000349::1::156.989767::0.01||C4H6O4::[M+K]+::Methylmalonic acid::HMDB0000202::1::156.989767::0.01||C4H6O4::[M+K]+::Succinic acid::HMDB0000254::1::156.989767::0.01||C4H6O4::[M+K]+::Threonolactone::HMDB0000940::1::156.989767::0.01||C4H6O4::[M+K]+::xi-3-Hydroxy-2-oxobutanoic acid::HMDB0039324::1::156.989767::0.01||C2H5O6P::[M+H]+::Phosphoglycolic acid::HMDB0000816::1::156.989651::0.75 C4H6O4::[M+K]+::D,L-malic semialdehyde::18220::1::156.989767::0.01||C4H6O4::[M+K]+::Erythrono-1,4-lactone::11996::1::156.989767::0.01||C4H6O4::[M+K]+::Methyl oxalate::7890::1::156.989767::0.01||C4H6O4::[M+K]+::Methylmalonate::1845::1::156.989767::0.01||C4H6O4::[M+K]+::Succinate::592::1::156.989767::0.01||C2H5O6P::[M+H]+::2-Phosphoglycolate::1183::1::156.989651::0.75 +M169T120 168.989654 120.0 520.0 [M+H]+::1::1 C3H5O6P::[M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::168.989651::0.02 C3H5O6P::[M+H]+::3-Phosphonopyruvate::2162::3::168.989651::0.02||C3H5O6P::[M+H]+::Phosphoenolpyruvate::614::3::168.989651::0.02 C3H5O6P::[M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::168.989651::0.02::2.0 +M337T121 336.972032 121.0 1040.0 [2M+H]+::1::2 C3H5O6P::[2M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::336.972026::0.02 C3H5O6P::[2M+H]+::3-Phosphonopyruvate::2162::3::336.972026::0.02||C3H5O6P::[2M+H]+::Phosphoenolpyruvate::614::3::336.972026::0.02 +M505T122 504.95441 122.5 390.0 [3M+H]+::1::3 C3H5O6P::[3M+H]+::Phosphoenolpyruvic acid::HMDB0000263::3::504.954401::0.02 C3H5O6P::[3M+H]+::3-Phosphonopyruvate::2162::3::504.954401::0.02||C3H5O6P::[3M+H]+::Phosphoenolpyruvate::614::3::504.954401::0.02 +M197T150 197.005661 150.0 5112.38 C6H6O6::[M+Na]+::Dehydroascorbic acid::HMDB0001264::1::197.005659::0.01||C6H6O6::[M+Na]+::Dehydroascorbide(1-)::HMDB0062706::1::197.005659::0.01||C6H6O6::[M+Na]+::cis-Aconitic acid::HMDB0000072::1::197.005659::0.01||C6H6O6::[M+Na]+::trans-Aconitic acid::HMDB0000958::1::197.005659::0.01 C6H6O6::[M+Na]+::Aconitate Ion::38282::1::197.005659::0.01||C6H6O6::[M+Na]+::Dehydroascorbic acid::12423::1::197.005659::0.01||C6H6O6::[M+Na]+::cis-Aconitate::843::1::197.005659::0.01||C6H6O6::[M+Na]+::trans-Aconitate::1934::1::197.005659::0.01 +M213T160 213.000576 160.0 1018.075 C6H6O7::[M+Na]+::Oxalosuccinic acid::HMDB0003974::1::213.000574::0.01 C6H6O7::[M+Na]+::4-Hydroxy-Aconitate Ion::41990::1::213.000574::0.01||C6H6O7::[M+Na]+::Oxalosuccinate::3402::1::213.000574::0.01 +M215T170 215.016226 170.0 3859.8 [M+Na]+::1::1 C6H8O7::[M+Na]+::(1R,2R)-Isocitric acid::HMDB0033717::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Diketo-L-gulonate::HMDB0006511::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-diketogulonate::HMDB0062803::2::215.016224::0.01||C6H8O7::[M+Na]+::Citric acid::HMDB0000094::2::215.016224::0.01||C6H8O7::[M+Na]+::D-Glucaro-1,4-lactone::HMDB0041862::2::215.016224::0.01||C6H8O7::[M+Na]+::D-threo-Isocitric acid::HMDB0001874::2::215.016224::0.01||C6H8O7::[M+Na]+::Diketogulonic acid::HMDB0005971::2::215.016224::0.01||C6H8O7::[M+Na]+::Isocitric acid::HMDB0000193::2::215.016224::0.01 C6H8O7::[M+Na]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::2::215.016224::0.01||C6H8O7::[M+Na]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::2::215.016224::0.01||C6H8O7::[M+Na]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-Dioxo-L-gulonate::13506::2::215.016224::0.01||C6H8O7::[M+Na]+::2,3-diketo-L-gulonate::20293::2::215.016224::0.01||C6H8O7::[M+Na]+::2,5-Didehydro-D-gluconate::2148::2::215.016224::0.01||C6H8O7::[M+Na]+::2-Dehydro-3-deoxy-D-glucarate::2740::2::215.016224::0.01||C6H8O7::[M+Na]+::4,5-Dehydro-D-Glucuronic Acid::36800::2::215.016224::0.01||C6H8O7::[M+Na]+::5-Dehydro-4-deoxy-D-glucarate::1000::2::215.016224::0.01||C6H8O7::[M+Na]+::5-keto-4-deoxy-D-glucarate::20308::2::215.016224::0.01||C6H8O7::[M+Na]+::Carboxymethyloxysuccinate::2580::2::215.016224::0.01||C6H8O7::[M+Na]+::Citrate::675::2::215.016224::0.01||C6H8O7::[M+Na]+::Isocitrate::781::2::215.016224::0.01 +M231T174 230.990164 173.5 1929.9 [M+K]+::1::1 C6H8O7::[M+K]+::(1R,2R)-Isocitric acid::HMDB0033717::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Diketo-L-gulonate::HMDB0006511::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-diketogulonate::HMDB0062803::2::230.990161::0.01||C6H8O7::[M+K]+::Citric acid::HMDB0000094::2::230.990161::0.01||C6H8O7::[M+K]+::D-Glucaro-1,4-lactone::HMDB0041862::2::230.990161::0.01||C6H8O7::[M+K]+::D-threo-Isocitric acid::HMDB0001874::2::230.990161::0.01||C6H8O7::[M+K]+::Diketogulonic acid::HMDB0005971::2::230.990161::0.01||C6H8O7::[M+K]+::Isocitric acid::HMDB0000193::2::230.990161::0.01 C6H8O7::[M+K]+::(1R,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::863::2::230.990161::0.01||C6H8O7::[M+K]+::(1S,2S)-1-Hydroxypropane-1,2,3-tricarboxylate::3090::2::230.990161::0.01||C6H8O7::[M+K]+::(4R,5S)-4,5,6-Trihydroxy-2,3-dioxohexanoate::3064::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Dihydroxy-5-Oxo-Hexanedioate::37494::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-Dioxo-L-gulonate::13506::2::230.990161::0.01||C6H8O7::[M+K]+::2,3-diketo-L-gulonate::20293::2::230.990161::0.01||C6H8O7::[M+K]+::2,5-Didehydro-D-gluconate::2148::2::230.990161::0.01||C6H8O7::[M+K]+::2-Dehydro-3-deoxy-D-glucarate::2740::2::230.990161::0.01||C6H8O7::[M+K]+::4,5-Dehydro-D-Glucuronic Acid::36800::2::230.990161::0.01||C6H8O7::[M+K]+::5-Dehydro-4-deoxy-D-glucarate::1000::2::230.990161::0.01||C6H8O7::[M+K]+::5-keto-4-deoxy-D-glucarate::20308::2::230.990161::0.01||C6H8O7::[M+K]+::Carboxymethyloxysuccinate::2580::2::230.990161::0.01||C6H8O7::[M+K]+::Citrate::675::2::230.990161::0.01||C6H8O7::[M+K]+::Isocitrate::781::2::230.990161::0.01 +M426T180 426.05225 180.0 1149.95 C12H19N4O7P2S::[M+H]+::Thiamine pyrophosphate::HMDB0001372::1::426.052245::0.01||C14H19NO10S2::[M+H]+::Glucosinalbin::HMDB0038401::1::426.052314::-0.15 C14H19NO10S2::[M+H]+::Sinalbin::40568::1::426.052314::-0.15 +M492T190 492.06041 190.0 1060.0 C M493T192 (13C) 1 14.4 C14H23N4O8P2S::[M+Na]+::2-(a-Hydroxyethyl)thiamine diphosphate::HMDB0003904::2::492.060405::0.01 +M493T192 493.063765 192.5 163.33 (13C) M492T190 C 1 14.4 C14H23N4O8P2S::[M+Na]+::2-(a-Hydroxyethyl)thiamine diphosphate::HMDB0003904::2::493.06376::0.01 +M550T200 550.06589 200.0 4549.65 C16H25N4O10P2S::[M+Na]+::3-Carboxy-1-hydroxypropylthiamine diphosphate::HMDB0006744::1::550.065884::0.01 diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 0c9d1fb..8d7ef53 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -22,7 +22,7 @@ def test_group_features(self): records_comp = sqlite_records(to_test_data(fn_sql), "groups") for i in range(len(records)): self.assertEqual(records[i][0:6], records_comp[i][0:6]) - np.testing.assert_allclose(records[i][6:], records_comp[i][6:], rtol=1e-8) + np.testing.assert_almost_equal(records[i][6:], records_comp[i][6:]) fn_sql = "results_pearson_all.sqlite" db_out = to_test_results(fn_sql) diff --git a/tests/test_in_out.py b/tests/test_in_out.py index f1d0d9b..957e80a 100644 --- a/tests/test_in_out.py +++ b/tests/test_in_out.py @@ -144,38 +144,43 @@ def test_read_compounds(self): def test_read_adducts(self): adducts_lib = os.path.join(self.path, "beamspy", "data", "adducts.txt") records_pos = read_adducts(adducts_lib, "pos") - records_pos_comp = [('[M+H]+', 1.007276), ('[M+Na]+', 22.989221), - ('[M+K]+', 38.963158)] + records_pos_comp = OrderedDict([('[M+H]+', OrderedDict([('mass', 1.007276), ('charge', 1)])), + ('[M+Na]+', OrderedDict([('mass', 22.989221), ('charge', 1)])), + ('[M+K]+', OrderedDict([('mass', 38.963158), ('charge', 1)]))]) self.assertEqual(records_pos.lib, OrderedDict(records_pos_comp)) records_neg = read_adducts(adducts_lib, "neg") - records_neg_comp = [('[M-H]-', -1.007276), ('[M+Na-2H]-', 20.974668), - ('[M+Cl]-', 34.969401), ('[M+K-2H]-', 36.948605), - ('[M+Hac-H]-', 59.013853)] + records_neg_comp = OrderedDict([('[M-H]-', OrderedDict([('mass', -1.007276), ('charge', 1)])), + ('[M+Na-2H]-', OrderedDict([('mass', 20.974668), ('charge', 1)])), + ('[M+Cl]-', OrderedDict([('mass', 34.969401), ('charge', 1)])), + ('[M+K-2H]-', OrderedDict([('mass', 36.948605), ('charge', 1)])), + ('[M+Hac-H]-', OrderedDict([('mass', 59.013853), ('charge', 1)]))]) self.assertEqual(records_neg.lib, OrderedDict(records_neg_comp)) def test_read_isotopes(self): isotopes_lib = os.path.join(self.path, "beamspy", "data", "isotopes.txt") records_pos = read_isotopes(isotopes_lib, "pos") - records_pos_comp = [OrderedDict([('C', {'abundance': 100.0}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355)]), - OrderedDict([('S', {'abundance': 100.0}), ('(34S)', {'abundance': 4.21}), ('mass_difference', 1.995796)]), - OrderedDict([('K', {'abundance': 100.0}), ('(41K)', {'abundance': 6.73}), ('mass_difference', 1.998119)])] + records_pos_comp = [OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 0.5016775), ('charge', 2)]), + OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355), ('charge', 1)]), + OrderedDict([('S', {'abundance': 94.99}), ('(34S)', {'abundance': 4.25}), ('mass_difference', 1.995796), ('charge', 1)]), + OrderedDict([('K', {'abundance': 93.25}), ('(41K)', {'abundance': 6.73}), ('mass_difference', 1.998119), ('charge', 1)])] self.assertEqual(records_pos.lib, records_pos_comp) records_neg = read_isotopes(isotopes_lib, "neg") - records_neg_comp = [OrderedDict([('C', {'abundance': 100.0}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355)]), - OrderedDict([('S', {'abundance': 100.0}), ('(34S)', {'abundance': 4.21}), ('mass_difference', 1.995796)]), - OrderedDict([('Cl', {'abundance': 100.0}), ('(37Cl)', {'abundance': 24.23}), ('mass_difference', 1.997050)])] + records_neg_comp = [OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 0.5016775), ('charge', 2)]), + OrderedDict([('C', {'abundance': 98.93}), ('(13C)', {'abundance': 1.07}), ('mass_difference', 1.003355), ('charge', 1)]), + OrderedDict([('S', {'abundance': 94.99}), ('(34S)', {'abundance': 4.25}), ('mass_difference', 1.995796), ('charge', 1)]), + OrderedDict([('Cl', {'abundance': 75.76}), ('(37Cl)', {'abundance': 24.24}), ('mass_difference', 1.997050), ('charge', 1)])] self.assertEqual(records_neg.lib, records_neg_comp) - def test_read_mass_differences(self): - differences_lib = os.path.join(self.path, "beamspy", "data", "adducts_differences.txt") - records = read_mass_differences(differences_lib, ion_mode="pos") - self.assertEqual(records.lib, [OrderedDict([('[M+H]+', {'charge': 1.0}), - ('[M+Na]+', {'charge': 1.0}), - ('mass_difference', 21.981945)])]) - records = read_mass_differences(differences_lib, ion_mode="neg") - self.assertEqual(records.lib, []) - records = read_mass_differences(differences_lib, ion_mode="both") - self.assertEqual(records.lib, []) + # def test_read_mass_differences(self): + # differences_lib = os.path.join(self.path, "beamspy", "data", "adducts_differences.txt") + # records = read_mass_differences(differences_lib, ion_mode="pos") + # self.assertEqual(records.lib, [OrderedDict([('[M+H]+', {'charge': 1.0}), + # ('[M+Na]+', {'charge': 1.0}), + # ('mass_difference', 21.981945)])]) + # records = read_mass_differences(differences_lib, ion_mode="neg") + # self.assertEqual(records.lib, []) + # records = read_mass_differences(differences_lib, ion_mode="both") + # self.assertEqual(records.lib, []) if __name__ == '__main__': diff --git a/tests/test_libraries.py b/tests/test_libraries.py index 862a2b6..59bb5fc 100644 --- a/tests/test_libraries.py +++ b/tests/test_libraries.py @@ -20,25 +20,19 @@ def test_read_isotopes(self): def test_read_adducts(self): lib_adducts = read_adducts(os.path.join(self.path, "beamspy", "data", "adducts.txt"), "pos") self.assertTrue("in library" in lib_adducts.__str__()) - lib_adducts.add("test", 10) - self.assertEqual(lib_adducts.lib["test"], 10) + lib_adducts.add("test", 100.0, 1) + self.assertEqual(lib_adducts.lib["test"]["mass"], 100.0) + self.assertEqual(lib_adducts.lib["test"]["charge"], 1) lib_adducts.remove("*") self.assertEqual(lib_adducts.lib, OrderedDict()) - def test_multiple_charged_ions(self): - lib_multiple_charged_ions = read_multiple_charged_ions(os.path.join(self.path, "beamspy", "data", "multiple_charged_ions.txt"), "pos") - self.assertTrue("in library" in lib_multiple_charged_ions.__str__()) - - lib_multiple_charged_ions.remove("*") - self.assertEqual(lib_multiple_charged_ions.lib, OrderedDict()) - - def test_mass_differences(self): - lib_differences = read_mass_differences(os.path.join(self.path, "beamspy", "data", "adducts_differences.txt"), "pos") - self.assertTrue("in library" in lib_differences.__str__()) - - lib_differences.remove("*", "*") - self.assertEqual(lib_differences.lib, []) + # def test_mass_differences(self): + # lib_differences = read_mass_differences(os.path.join(self.path, "beamspy", "data", "adducts_differences.txt"), "pos") + # self.assertTrue("in library" in lib_differences.__str__()) + # + # lib_differences.remove("*", "*") + # self.assertEqual(lib_differences.lib, []) def test_nist_database_to_pyteomics(self): nist_database = nist_database_to_pyteomics(os.path.join(self.path, "beamspy", "data", "nist_database.txt")) diff --git a/tests/test_statistics.py b/tests/test_statistics.py index 39afed2..db4b512 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -22,7 +22,7 @@ def test_correlation_coefficients(self): "r_value": [np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)], "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(5.85415087865495e-157)]}, columns=["name_a", "name_b", "r_value", "p_value"]) df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) - pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp, check_exact=True) + pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) df_coeffs_comp = pd.DataFrame({"name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], @@ -30,7 +30,7 @@ def test_correlation_coefficients(self): "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]}, columns=["name_a", "name_b", "r_value", "p_value"]) df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", block=5000, ncpus=None) - pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp, check_exact=True) + pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) df_coeffs = correlation_coefficients(self.df, max_rt_diff=50000.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) self.assertEqual(df_coeffs.shape, (136, 4))