Merge pull request #806 from NeuroVault/readd_gene_expression

re-add gene expression routes and views.
NeuroVault · Aug 11, 2023 · 7a61f13 · 7a61f13
2 parents e3dc3c7 + e6f9106
commit 7a61f13
Show file tree

Hide file tree

Showing 21 changed files with 24,896 additions and 107 deletions.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -85,9 +85,8 @@ jobs:
       - 
         name: Migrate Database
         run: |
-          until docker compose exec -T \
+          sleep 10 && until docker compose exec -T \
           postgres pg_isready -U postgres; do sleep 1; done
-
           docker compose exec -T django python manage.py migrate
       -
         name: Backend Tests

diff --git a/.gitignore b/.gitignore
@@ -76,3 +76,5 @@ local_data/
 .envs/
 
 *.swp
+
+ahba_data/store_max1_reduced.h5
diff --git a/ahba_data/README.md b/ahba_data/README.md
@@ -0,0 +1,3 @@
+This directory contains files used by `neurovault/apps/statmaps/ahba.py` which is called by the two gene expression views in in the statmaps app.
+
+A file called `store_max1_reduced.h5` is not tracked by git due to its size. This file is generated by `scripts/preparing_AHBA_data.py` and has additional dependencies from the rest of the project. `store_max1_reduced` can also be extracted from the neurovault/ahba image on dockerhub. 
diff --git a/ahba_data/Richiardi_Data_File_S2.xlsx b/ahba_data/Richiardi_Data_File_S2.xlsx
diff --git a/ahba_data/corrected_mni_coordinates.csv b/ahba_data/corrected_mni_coordinates.csv
diff --git a/ahba_data/probe_info_max1.csv b/ahba_data/probe_info_max1.csv
diff --git a/scripts/subcortex_mask.npy → ahba_data/subcortex_mask.npy b/scripts/subcortex_mask.npy → ahba_data/subcortex_mask.npy
diff --git a/compose/django/requirements.txt b/compose/django/requirements.txt
@@ -1,104 +1,131 @@
 amqp==5.1.1
-asgiref==3.5.2
-async-timeout==4.0.2
-billiard==3.6.4.0
-cachetools==5.2.0
-celery==5.2.7
+asgiref==3.7.2
+billiard==4.1.0
+blosc2==2.0.0
+cachetools==5.3.1
+celery==5.3.1
+certifi==2023.7.22
 cffi==1.15.1
-charset-normalizer==2.1.0
-citeproc-py==0.6.0
-click==8.1.3
+charset-normalizer==3.2.0
+click==8.1.6
 click-didyoumean==0.3.0
 click-plugins==1.1.1
-click-repl==0.2.0
+click-repl==0.3.0
+cmake==3.27.1
 cognitiveatlas==0.1.9
-cryptography==37.0.4
+contourpy==1.1.0
+cryptography==41.0.3
 cycler==0.11.0
-Cython==0.29.30
+Cython==0.29.36
 defusedxml==0.7.1
-Deprecated==1.2.13
-Django==4.0.10
+Deprecated==1.2.14
+Django==4.2.4
 django-braces==1.15.0
-django-celery-results==2.4.0
-django-cleanup==6.0.0
-django-cors-headers==3.13.0
-django-crispy-forms==1.14.0
+django-cleanup==8.0.0
+django-cors-headers==4.2.0
+django-crispy-forms==2.0
 django-datatables-view==1.20.0
 django-file-resubmit==0.5.2
-django-filter==22.1
 django-guardian==2.4.0
-django-oauth-toolkit==2.1.0
+django-oauth-toolkit==2.3.0
 django-polymorphic==3.1.0
 django-sendfile==0.3.11
-django-taggit==3.0.0
-djangorestframework==3.13.1
-duecredit==0.9.1
-fonttools==4.34.4
-frozendict==2.3.2
+django-taggit==4.0.0
+djangorestframework==3.14.0
+filelock==3.12.2
+fonttools==4.42.0
+frozendict==2.3.8
 future==0.18.3
 fuzzywuzzy==0.18.0
-h5py==3.7.0
+gunicorn==21.2.0
+h5py==3.9.0
 html5lib==1.1
-idna==3.3
-imageio==2.19.3
-indexed-gzip==1.6.13
+idna==3.4
+imageio==2.31.1
 isodate==0.6.1
-joblib==1.1.1
-jwcrypto==1.4
-kiwisolver==1.4.3
-kombu==5.2.4
-lxml==4.9.1
-matplotlib==3.4.3
-mpmath==1.2.1
-networkx==2.8.4
-nibabel==3.2.2
+Jinja2==3.1.2
+joblib==1.3.2
+jwcrypto==1.5.0
+kiwisolver==1.4.4
+kombu==5.3.1
+lit==16.0.6
+llvmlite==0.40.1
+lxml==4.9.3
+MarkupSafe==2.1.3
+matplotlib==3.7.2
+mpmath==1.3.0
+msgpack==1.0.5
+networkx==3.1
+nibabel==4.0.2
 nidmfsl==2.2.0
 nidmresults==2.1.0
-nilearn==0.9.1
-NiMARE==0.0.11
-numexpr==2.8.3
-numpy==1.23.1
+nilearn==0.10.1
+NiMARE==0.1.1
+numba==0.57.1
+numexpr==2.8.5
+numpy==1.24.4
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
 oauthlib==3.2.2
-packaging==21.3
-pandas==1.4.3
-patsy==0.5.2
-Pillow==9.3.0
-prompt-toolkit==3.0.30
+packaging==23.1
+pandas==2.0.3
+patsy==0.5.3
+Pillow==10.0.0
+plotly==5.15.0
+prompt-toolkit==3.0.39
 prov==2.0.0
-psycopg==3.0.15
-psycopg2==2.9.3
-pycortex==1.2.5
+psycopg==3.1.10
+py-cpuinfo==9.0.0
+pycortex==1.2.7
 pycparser==2.21
-PyJWT==2.4.0
+PyJWT==2.8.0
 PyLD==2.0.3
-PyMARE==0.0.3
+PyMARE==0.0.4rc2
 pyparsing==3.0.9
 python-dateutil==2.8.2
 python3-openid==3.2.0
-pytz==2022.1
-rdflib==6.1.1
+pytz==2023.3
+PyYAML==6.0.1
+rdflib==7.0.0
 rdflib-jsonld==0.6.2
-redis==4.5.4
-requests==2.28.1
+redis==4.6.0
+requests==2.31.0
 requests-oauthlib==1.3.1
-scikit-learn==1.1.1
-scipy==1.8.1
-Shapely==1.8.2
+scikit-learn==1.3.0
+scipy==1.11.1
+shapely==2.0.1
 six==1.16.0
-social-auth-app-django==5.0.0
-social-auth-core==4.3.0
+social-auth-app-django==5.2.0
+social-auth-core==4.4.2
+sparse==0.14.0
 sqlparse==0.4.4
-statsmodels==0.13.2
-sympy==1.10.1
-threadpoolctl==3.1.0
-tornado==6.2
-tqdm==4.64.0
-typing-extensions==4.3.0
-urllib3==1.26.10
+statsmodels==0.14.0
+sympy==1.12
+tables==3.8.0
+tenacity==8.2.2
+threadpoolctl==3.2.0
+torch==2.0.1
+tornado==6.3.2
+tqdm==4.66.1
+triton==2.0.0
+typing_extensions==4.7.1
+tzdata==2023.3
+urllib3==2.0.4
 vine==5.0.0
-wcwidth==0.2.5
+wcwidth==0.2.6
 webencodings==0.5.1
 wget==3.2
-wrapt==1.14.1
-zipstream==1.1.4
-gunicorn==20.1.0
+wrapt==1.15.0
+django-celery-results
+zipstream
+django-filter
+crispy-bootstrap4
diff --git a/compose/django/upgrade_pip_calls.txt b/compose/django/upgrade_pip_calls.txt
@@ -1,4 +1,4 @@
-pip install scipy celery[redis] requests lxml django-oauth-toolkit pandas nibabel matplotlib django-taggit nilearn django-datatables-view statsmodels django-cleanup django-polymorphic djangorestframework 
+pip install scipy celery[redis] requests lxml django-oauth-toolkit pandas nibabel matplotlib django-taggit nilearn django-datatables-view statsmodels django-cleanup django-polymorphic djangorestframework django-celery-results django-filter
 pip install psycopg
 pip install nidmresults nidmfsl cognitiveatlas 
 pip install social-app-django
@@ -16,3 +16,5 @@ pip install django-braces
 pip install corsheaders
 pip install django-cors-headers
 pip install gunicorn
+pip install tables
+
diff --git a/neurovault/apps/statmaps/ahba.py b/neurovault/apps/statmaps/ahba.py
@@ -0,0 +1,72 @@
+
+# coding: utf-8
+
+# In[33]:
+
+from glob import glob
+import os
+import pandas as pd
+import numpy as np
+import nibabel as nb
+import numpy.linalg as npl
+from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore
+from statsmodels.sandbox.stats.multicomp import multipletests
+
+
+def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
+    store_file = "/code/ahba_data/store_max1_reduced.h5"
+    subcortex_mask = "/code/ahba_data/subcortex_mask.npy"
+
+    results_dfs = []
+    with pd.HDFStore(store_file, 'r') as store:
+        for donor_id in store.keys():
+            print("Loading expression data (%s)" % donor_id)
+            expression_data = store.get(donor_id.replace(".", "_"))
+
+            print("Getting statmap values (%s)" % donor_id)
+            nifti_values = reduced_stat_map_data[expression_data.columns.astype(int)]
+
+            print("Removing missing values (%s)" % donor_id)
+            na_mask = np.isnan(nifti_values)
+            if mask == "subcortex":
+                na_mask = np.logical_or(na_mask,
+                    np.isnan(np.load(subcortex_mask)[expression_data.columns.astype(int)]))
+            elif mask == "cortex":
+                na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
+                    np.load(subcortex_mask)[expression_data.columns.astype(int)])))
+            else:
+                assert mask == "full"
+
+            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
+            expression_data.drop(expression_data.columns[na_mask].astype(int), axis=1, inplace=True)
+
+            print("z scoring (%s)" % donor_id)
+            expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns.astype(int),
+                                           index=expression_data.index)
+            nifti_values = zscore(nifti_values)
+
+            print("Calculating linear regressions (%s)" % donor_id)
+            regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
+            results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)
+
+            results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
+                                                           names=['donor_id', 'parameter'])
+
+            results_dfs.append(results_df)
+
+        print("Concatenating results")
+        results_df = pd.concat(results_dfs, axis=1)
+        del results_dfs
+
+    t, p = ttest_1samp(results_df, 0.0, axis=1)
+    group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
+    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
+    group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
+    group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
+    del results_df
+    probe_info = pd.read_csv("/code/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
+    group_results_df = group_results_df.join(probe_info)
+    group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
+                                         "variance explained (mean)", "variance explained (std)"]]
+
+    return group_results_df
diff --git a/neurovault/apps/statmaps/forms.py b/neurovault/apps/statmaps/forms.py
@@ -618,7 +618,7 @@ def clean_and_validate(self, cleaned_data):
                 nii = nb.load(ribbon_projection_file[:-3])
                 affine = nii.affine
                 affine[0, 3] -= 1
-                nb.Nifti1Image(nii.get_data(), affine).to_filename(
+                nb.Nifti1Image(np.asanyarray(nii.dataobj), affine).to_filename(
                     ribbon_projection_file
                 )
 
@@ -706,7 +706,7 @@ def clean_and_validate(self, cleaned_data):
                     ):
                         # convert pseudo 4D to 3D
                         if squeezable_dimensions < len(nii.shape):
-                            new_data = np.squeeze(nii.get_data())
+                            new_data = np.squeeze(np.asanyarray(nii.dataobj))
                             nii = nb.Nifti1Image(new_data, nii.affine, nii.header)
 
                         # Papaya does not handle float64, but by converting

diff --git a/neurovault/apps/statmaps/static/anatomical/MNI152_T1_4mm_brain_mask.nii.gz b/neurovault/apps/statmaps/static/anatomical/MNI152_T1_4mm_brain_mask.nii.gz
diff --git a/neurovault/apps/statmaps/tasks.py b/neurovault/apps/statmaps/tasks.py
@@ -5,14 +5,15 @@
 from django.http import Http404
 from django.shortcuts import get_object_or_404
 
-"""
+""" Old imports
 from pybraincompare.compare.maths import calculate_correlation, calculate_pairwise_correlation
 from pybraincompare.compare.mrutils import resample_images_ref, make_binary_deletion_mask, make_binary_deletion_vector
 from pybraincompare.mr.datasets import get_data_directory
 from pybraincompare.mr.transformation import make_resampled_transformation_vector
 """
 
 import nilearn
+from nilearn.image import resample_img
 
 nilearn.EXPAND_PATH_WILDCARDS = False
 from celery import shared_task, Celery
@@ -206,7 +207,7 @@ def generate_surface_image(image_pk):
         and img.data_origin == "volume"
     ):
         img_vol = nib.load(img.file.path)
-        data_vol = img_vol.get_data()
+        data_vol = numpy.asanyarray(img_vol.dataobj)
         if data_vol.ndim > 3:
             data_vol = data_vol[:, :, :, 0]  # number of time points
         this_path = os.path.abspath(os.path.dirname(__file__))
@@ -287,10 +288,21 @@ def save_resampled_transformation_single(pk1, resample_dim=[4, 4, 4]):
     from neurovault.apps.statmaps.models import Image
     from six import BytesIO
     import numpy as np
+    from nilearn.maskers import NiftiMasker
 
     img = get_object_or_404(Image, pk=pk1)
-    nii_obj = nib.load(img.file.path)  # standard_mask=True is default
-    image_vector = make_resampled_transformation_vector(nii_obj, resample_dim)
+    nii_obj = nib.load(img.file.path)
+    mask_path = f'${settings.BASE_DIR}/apps/statmaps/static/anatomical/MNI152_T1_4mm_brain_mask.nii.gz'
+    mask = nib.load(mask_path)
+    image_resampled = resample_img(
+        nii_obj,
+        target_affine=mask.affine,
+        target_shape=mask.shape
+    )
+    masker = NiftiMasker(mask_img=mask)
+    masker.fit()
+    image_masked = masker.transform(image_resampled)
+    image_vector = image_masked.flatten()
 
     f = BytesIO()
     np.save(f, image_vector)
@@ -402,10 +414,10 @@ def save_voxelwise_pearson_similarity_resample(pk1, pk2,resample_dim=[4,4,4]):
                                                resample_dim=resample_dim)
         # resample_images_ref will "squeeze" images, but we should keep error here for now
         for image_nii, image_obj in zip(images_resamp, [image1, image2]):
-            if len(numpy.squeeze(image_nii.get_data()).shape) != 3:
+            if len(numpy.squeeze(np.asanyarray(image_nii.dataobj).shape) != 3:
                 raise Exception("Image %s (id=%d) has incorrect number of dimensions %s"%(image_obj.name, 
                                                                                           image_obj.id, 
-                                                                                          str(image_nii.get_data().shape)))
+                                                                                          str(np.asanyarray(image_nii.dataobj).shape)))
 
         # Calculate correlation only on voxels that are in both maps (not zero, and not nan)
         image1_res = images_resamp[0]