From 66c74c16fe744582fed730cde843fc2b6e9f061f Mon Sep 17 00:00:00 2001 From: Ralf Weber Date: Fri, 12 May 2023 13:55:53 +0100 Subject: [PATCH] Release v1.2.0 (#18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add "positive" flag for correlation analysis and update pandas data fra… (#15) * Add check box to GUI for "filtering" positive correlations * Update CLI for neutral losses * Update GitHub actions and configs (#17) * Update README.rst (Minor) --------- Co-authored-by: Wanchang Lin --- .github/workflows/build-test.yml | 13 +- .travis.yml | 27 - README.rst | 5 +- appveyor.yml | 12 - beamspy/__init__.py | 4 +- beamspy/__main__.py | 7 +- beamspy/annotation.py | 3 +- beamspy/data/adducts.txt | 10 +- beamspy/data/isotopes.txt | 7 +- beamspy/data/multiple_charged_ions.txt | 6 +- beamspy/data/neutral_losses.txt | 4 +- beamspy/grouping.py | 4 +- beamspy/gui.py | 3 + beamspy/in_out.py | 4 +- beamspy/qt/form.py | 1450 +++++++++++++----------- beamspy/qt/form.ui | 136 ++- beamspy/statistics.py | 20 +- codecov.yml | 8 + environment.yml | 23 +- examples/run.py | 3 +- examples/run.sh | 3 +- notebooks/examples.ipynb | 1037 ++++++++++++++--- requirements.txt | 4 +- setup.py | 5 +- tests/test_annotation.py | 2 +- tests/test_grouping.py | 8 +- tests/test_statistics.py | 8 +- 27 files changed, 1890 insertions(+), 926 deletions(-) delete mode 100644 .travis.yml delete mode 100644 appveyor.yml create mode 100644 codecov.yml diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 857fe05..1faaff7 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -9,7 +9,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.7, 3.8] + python-version: ['3.8', '3.9', '3.10'] env: OS: ${{ matrix.os }} @@ -29,7 +29,7 @@ jobs: run: | python --version - conda env update --file environment.yml --name base + conda env update --file environment.yml --name __setup_conda - name: Lint with flake8 run: | @@ -44,15 +44,16 @@ jobs: - name: Test with pytest-cov run: | - - python setup.py install - beamspy --help - conda install pytest codecov pytest-cov -c conda-forge + + python -m pip install --no-deps -e . + beamspy --help + pytest --cov ./ --cov-config=.coveragerc --cov-report=xml - name: Upload code coverage to codecov uses: codecov/codecov-action@v1 + if: matrix.os == 'ubuntu-latest' with: flags: unittests env_vars: OS,PYTHON diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7635162..0000000 --- a/.travis.yml +++ /dev/null @@ -1,27 +0,0 @@ -dist: xenial -language: python -python: - - "3.7" - -services: - - xvfb - -before_install: - - python --version - - pip install -U pip - - pip install -U pytest - - pip install codecov pytest-cov - -install: - - pip install . - -before_script: - - "export DISPLAY=:99.0" - -script: - - beamspy --help - - pytest --cov=beamspy tests/ - -after_script: - - codecov - diff --git a/README.rst b/README.rst index 3ddd994..920118c 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,6 @@ BEAMSpy - Birmingham mEtabolite Annotation for Mass Spectrometry (Python package) ================================================================================== -|Version| |Py versions| |Git| |Bioconda| |Build Status| |Build Status (AppVeyor)| |License| |RTD doc| |codecov| |mybinder| +|Version| |Py versions| |Git| |Bioconda| |Build Status| |License| |RTD doc| |codecov| |mybinder| BEAMSpy (Birmingham mEtabolite Annotation for Mass Spectrometry) is a Python package that includes several automated and seamless computational modules that are applied to putatively annotate metabolites detected in untargeted ultra (high) @@ -82,9 +82,6 @@ Released under the GNU General Public License v3.0 (see `LICENSE 5 - -1 + 6 5 - - - - 0 - + + - 100000000.000000000000000 - - - 1000.000000000000000 + 9999999.000000000000000 - 5000.000000000000000 + 5.000000000000000 + + + + + + + true + + + cpus: + + + + + + + true + + + Block size: + + + + + + + true + + + Maximum RT difference (sec): + + + + + + + true + + + Coefficent threshold: @@ -515,17 +549,7 @@ - - - - true - - - Grouping method: - - - - + 1.000000000000000 @@ -559,59 +583,48 @@ - - - - true - - - P-value threshold: - - - - - - - true - - - Maximum RT difference (sec): + + + + 10 - - - - - 9999999.000000000000000 + 1.000000000000000 + + + 0.010000000000000 - 5.000000000000000 + 0.010000000000000 - - + + - 10 + 0 - 1.000000000000000 + 100000000.000000000000000 - 0.010000000000000 + 1000.000000000000000 - 0.010000000000000 + 5000.000000000000000 - - + + true - Coefficent threshold: + Grouping method: + + + -1 @@ -629,23 +642,26 @@ - - + + true - cpus: + P-value threshold: - - + + true - Block size: + Positive Correlation + + + true diff --git a/beamspy/statistics.py b/beamspy/statistics.py index fca33c4..401e625 100644 --- a/beamspy/statistics.py +++ b/beamspy/statistics.py @@ -52,7 +52,7 @@ def _spearmanr(pairs): return temp -def correlation_coefficients(df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.05, method="pearson", block=5000, ncpus=None): +def correlation_coefficients(df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.05, method="pearson", positive=True, block=5000, ncpus=None): if ["name", "mz", "rt"] == list(df.columns.values[0:3]): ncols = 4 df = df.sort_values(['rt', 'mz']).reset_index(drop=True) @@ -103,10 +103,13 @@ def correlation_coefficients(df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres= if len(pairs) == block * ncpus: #print("Calculating correlations for {} pairs (subset)".format(len(pairs))) coeffs = _cc_pp_(pairs, method, ncpus) + coe = [] for k in range(len(coeffs)): if abs(coeffs[k][0]) > coeff_thres and (abs(coeffs[k][1]) < pvalue_thres or pvalue_thres is None): s = pd.Series([peaks[k][0], peaks[k][1], round(coeffs[k][0], 2), coeffs[k][1]], index=column_names) - df_coeffs = df_coeffs.append(s, ignore_index=True) # pandas append + coe.append(s) + tmp = pd.DataFrame(coe, columns=column_names) + df_coeffs = pd.concat([df_coeffs, tmp], ignore_index=True) pairs, peaks = [], [] else: break @@ -114,10 +117,18 @@ def correlation_coefficients(df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres= if len(pairs) > 0: #print("Calculating correlations for {} pairs (subset)".format(len(pairs))) coeffs = _cc_pp_(pairs, method, ncpus) + coe = [] for k in range(len(coeffs)): if abs(coeffs[k][0]) > coeff_thres and (abs(coeffs[k][1]) < pvalue_thres or pvalue_thres is None): s = pd.Series([peaks[k][0], peaks[k][1], round(coeffs[k][0], 2), coeffs[k][1]], index=column_names) - df_coeffs = df_coeffs.append(s, ignore_index=True) + coe.append(s) + tmp = pd.DataFrame(coe, columns=column_names) + df_coeffs = pd.concat([df_coeffs, tmp], ignore_index=True) + + # filter + if positive: + df_coeffs = df_coeffs[df_coeffs['r_value'] > 0].reset_index(drop=True) + return df_coeffs @@ -125,7 +136,8 @@ def correlation_graphs(df_coeffs, df): df_coeffs = df_coeffs.merge(df[["name", "mz", "intensity", "rt"]], how='left', left_on=['name_a'], right_on=['name']) df_coeffs = df_coeffs.merge(df[["name", "mz", "intensity", "rt"]], how='left', left_on=['name_b'], right_on=['name']) from decimal import Decimal - graphs = nx.OrderedDiGraph() + # graphs = nx.OrderedDiGraph() # networkx version < 3.0 + graphs = nx.DiGraph() for index, row in df_coeffs.iterrows(): graphs.add_node(str(row["name_a"]), mz=row["mz_x"], intensity=row["intensity_x"], rt=row["rt_x"]) graphs.add_node(str(row["name_b"]), mz=row["mz_y"], intensity=row["intensity_y"], rt=row["rt_y"]) diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..66445ee --- /dev/null +++ b/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + patch: + default: + target: 80% + project: + default: + threshold: 5% diff --git a/environment.yml b/environment.yml index 77b3f17..5157e41 100644 --- a/environment.yml +++ b/environment.yml @@ -3,15 +3,14 @@ channels: - conda-forge - bioconda dependencies: - - numpy - - scipy - - requests - - networkx<=2.5 - - pandas - - matplotlib - - seaborn - - pyteomics<=4.4.1 - - biopython<=1.78 - - pyside2=5.13.1 - - tqdm - + - numpy + - scipy + - requests + - networkx<=2.5 + - pandas<=1.5.3 + - matplotlib + - seaborn + - pyteomics<=4.4.1 + - biopython<=1.78 + - pyside2 + - tqdm diff --git a/examples/run.py b/examples/run.py index 4e5c9ca..3a2448d 100644 --- a/examples/run.py +++ b/examples/run.py @@ -25,7 +25,8 @@ def main(): ion_mode = "pos" db_out = "results.sqlite".format(ion_mode) - graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson") + # graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson") + graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson", positive=False) nx.write_gml(graphs, "graphs.gml") # graphs = nx.read_gml("graphs.gml") diff --git a/examples/run.sh b/examples/run.sh index 45bfcf1..4764f91 100755 --- a/examples/run.sh +++ b/examples/run.sh @@ -8,7 +8,8 @@ beamspy group-features \ --max-rt-diff 5.0 \ --method pearson \ --coeff-threshold 0.7 \ ---pvalue-threshold 0.01 +--pvalue-threshold 0.01 \ +--positive beamspy annotate-peak-patterns \ --peaklist ../tests/test_data/peaklist_lcms_pos_theoretical.txt \ diff --git a/notebooks/examples.ipynb b/notebooks/examples.ipynb index e5d7da3..b73e220 100644 --- a/notebooks/examples.ipynb +++ b/notebooks/examples.ipynb @@ -27,81 +27,88 @@ { "cell_type": "code", "execution_count": 2, + "metadata": { + "collapsed": false, + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } + }, "outputs": [ { "name": "stderr", + "output_type": "stream", "text": [ - "\r 0%| | 0/400 [00:00=3.7", + python_requires=">=3.8", test_suite="tests.suite", install_requires=install_requires, include_package_data=True, classifiers=[ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Chemistry", "Topic :: Utilities", diff --git a/tests/test_annotation.py b/tests/test_annotation.py index c95b8c1..d2a9f0e 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -27,7 +27,7 @@ def setUp(self): self.db_results = "results_annotation.sqlite" self.db_results_graph = "results_annotation_graph.sqlite" - self.graph = group_features(self.df, to_test_results(self.db_results_graph), max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + self.graph = group_features(self.df, to_test_results(self.db_results_graph), max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) self.ppm = 2.0 diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 8d7ef53..e35a35f 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -16,7 +16,7 @@ def setUp(self): def test_group_features(self): fn_sql = "results_pearson.sqlite" db_out = to_test_results(fn_sql) - group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) records = sqlite_records(to_test_results(fn_sql), "groups") records_comp = sqlite_records(to_test_data(fn_sql), "groups") @@ -26,7 +26,7 @@ def test_group_features(self): fn_sql = "results_pearson_all.sqlite" db_out = to_test_results(fn_sql) - group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) records = sqlite_records(to_test_results(fn_sql), "groups") records_comp = sqlite_records(to_test_data(fn_sql), "groups") @@ -36,7 +36,7 @@ def test_group_features(self): fn_sql = "results_pearson_all.sqlite" db_out = to_test_results(fn_sql) - group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", block=20, ncpus=1) + group_features(self.df, db_out, max_rt_diff=200.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=20, ncpus=1) records = sqlite_records(to_test_results(fn_sql), "groups") records_comp = sqlite_records(to_test_data(fn_sql), "groups") @@ -46,7 +46,7 @@ def test_group_features(self): fn_sql = "results_spearman.sqlite" db_out = to_test_results(fn_sql) - group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", block=5000, ncpus=None) + group_features(self.df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", positive=False, block=5000, ncpus=None) records = sqlite_records(to_test_results(fn_sql), "groups") records_comp = sqlite_records(to_test_data(fn_sql), "groups") diff --git a/tests/test_statistics.py b/tests/test_statistics.py index db4b512..2509151 100644 --- a/tests/test_statistics.py +++ b/tests/test_statistics.py @@ -21,7 +21,7 @@ def test_correlation_coefficients(self): "name_b": ["M337T121", "M505T122", "M505T122", "M231T174", "M493T192"], "r_value": [np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)], "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(5.85415087865495e-157)]}, columns=["name_a", "name_b", "r_value", "p_value"]) - df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) df_coeffs_comp = pd.DataFrame({"name_a": ["M169T120", "M169T120", "M337T121", "M215T170", "M492T190"], @@ -29,14 +29,14 @@ def test_correlation_coefficients(self): "r_value": [np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0), np.float64(1.0)], "p_value": [np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0), np.float64(0.0)]}, columns=["name_a", "name_b", "r_value", "p_value"]) - df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", block=5000, ncpus=None) + df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="spearman", positive=False, block=5000, ncpus=None) pd.testing.assert_frame_equal(df_coeffs, df_coeffs_comp) - df_coeffs = correlation_coefficients(self.df, max_rt_diff=50000.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + df_coeffs = correlation_coefficients(self.df, max_rt_diff=50000.0, coeff_thres=0.0, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) self.assertEqual(df_coeffs.shape, (136, 4)) def test_correlation_graphs(self): - df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", block=5000, ncpus=None) + df_coeffs = correlation_coefficients(self.df, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=1.0, method="pearson", positive=False, block=5000, ncpus=None) graph = correlation_graphs(df_coeffs, self.df) n0 = list(graph.nodes(data=True))[0]