Skip to content

Commit

Permalink
Merge pull request #806 from NeuroVault/readd_gene_expression
Browse files Browse the repository at this point in the history
re-add gene expression routes and views.
  • Loading branch information
rwblair committed Aug 11, 2023
2 parents e3dc3c7 + e6f9106 commit 7a61f13
Show file tree
Hide file tree
Showing 21 changed files with 24,896 additions and 107 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build_and_test.yml
Expand Up @@ -85,9 +85,8 @@ jobs:
-
name: Migrate Database
run: |
until docker compose exec -T \
sleep 10 && until docker compose exec -T \
postgres pg_isready -U postgres; do sleep 1; done
docker compose exec -T django python manage.py migrate
-
name: Backend Tests
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -76,3 +76,5 @@ local_data/
.envs/

*.swp

ahba_data/store_max1_reduced.h5
3 changes: 3 additions & 0 deletions ahba_data/README.md
@@ -0,0 +1,3 @@
This directory contains files used by `neurovault/apps/statmaps/ahba.py` which is called by the two gene expression views in in the statmaps app.

A file called `store_max1_reduced.h5` is not tracked by git due to its size. This file is generated by `scripts/preparing_AHBA_data.py` and has additional dependencies from the rest of the project. `store_max1_reduced` can also be extracted from the neurovault/ahba image on dockerhub.
Binary file added ahba_data/Richiardi_Data_File_S2.xlsx
Binary file not shown.
3,703 changes: 3,703 additions & 0 deletions ahba_data/corrected_mni_coordinates.csv

Large diffs are not rendered by default.

20,788 changes: 20,788 additions & 0 deletions ahba_data/probe_info_max1.csv

Large diffs are not rendered by default.

File renamed without changes.
169 changes: 98 additions & 71 deletions compose/django/requirements.txt
@@ -1,104 +1,131 @@
amqp==5.1.1
asgiref==3.5.2
async-timeout==4.0.2
billiard==3.6.4.0
cachetools==5.2.0
celery==5.2.7
asgiref==3.7.2
billiard==4.1.0
blosc2==2.0.0
cachetools==5.3.1
celery==5.3.1
certifi==2023.7.22
cffi==1.15.1
charset-normalizer==2.1.0
citeproc-py==0.6.0
click==8.1.3
charset-normalizer==3.2.0
click==8.1.6
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
click-repl==0.3.0
cmake==3.27.1
cognitiveatlas==0.1.9
cryptography==37.0.4
contourpy==1.1.0
cryptography==41.0.3
cycler==0.11.0
Cython==0.29.30
Cython==0.29.36
defusedxml==0.7.1
Deprecated==1.2.13
Django==4.0.10
Deprecated==1.2.14
Django==4.2.4
django-braces==1.15.0
django-celery-results==2.4.0
django-cleanup==6.0.0
django-cors-headers==3.13.0
django-crispy-forms==1.14.0
django-cleanup==8.0.0
django-cors-headers==4.2.0
django-crispy-forms==2.0
django-datatables-view==1.20.0
django-file-resubmit==0.5.2
django-filter==22.1
django-guardian==2.4.0
django-oauth-toolkit==2.1.0
django-oauth-toolkit==2.3.0
django-polymorphic==3.1.0
django-sendfile==0.3.11
django-taggit==3.0.0
djangorestframework==3.13.1
duecredit==0.9.1
fonttools==4.34.4
frozendict==2.3.2
django-taggit==4.0.0
djangorestframework==3.14.0
filelock==3.12.2
fonttools==4.42.0
frozendict==2.3.8
future==0.18.3
fuzzywuzzy==0.18.0
h5py==3.7.0
gunicorn==21.2.0
h5py==3.9.0
html5lib==1.1
idna==3.3
imageio==2.19.3
indexed-gzip==1.6.13
idna==3.4
imageio==2.31.1
isodate==0.6.1
joblib==1.1.1
jwcrypto==1.4
kiwisolver==1.4.3
kombu==5.2.4
lxml==4.9.1
matplotlib==3.4.3
mpmath==1.2.1
networkx==2.8.4
nibabel==3.2.2
Jinja2==3.1.2
joblib==1.3.2
jwcrypto==1.5.0
kiwisolver==1.4.4
kombu==5.3.1
lit==16.0.6
llvmlite==0.40.1
lxml==4.9.3
MarkupSafe==2.1.3
matplotlib==3.7.2
mpmath==1.3.0
msgpack==1.0.5
networkx==3.1
nibabel==4.0.2
nidmfsl==2.2.0
nidmresults==2.1.0
nilearn==0.9.1
NiMARE==0.0.11
numexpr==2.8.3
numpy==1.23.1
nilearn==0.10.1
NiMARE==0.1.1
numba==0.57.1
numexpr==2.8.5
numpy==1.24.4
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
oauthlib==3.2.2
packaging==21.3
pandas==1.4.3
patsy==0.5.2
Pillow==9.3.0
prompt-toolkit==3.0.30
packaging==23.1
pandas==2.0.3
patsy==0.5.3
Pillow==10.0.0
plotly==5.15.0
prompt-toolkit==3.0.39
prov==2.0.0
psycopg==3.0.15
psycopg2==2.9.3
pycortex==1.2.5
psycopg==3.1.10
py-cpuinfo==9.0.0
pycortex==1.2.7
pycparser==2.21
PyJWT==2.4.0
PyJWT==2.8.0
PyLD==2.0.3
PyMARE==0.0.3
PyMARE==0.0.4rc2
pyparsing==3.0.9
python-dateutil==2.8.2
python3-openid==3.2.0
pytz==2022.1
rdflib==6.1.1
pytz==2023.3
PyYAML==6.0.1
rdflib==7.0.0
rdflib-jsonld==0.6.2
redis==4.5.4
requests==2.28.1
redis==4.6.0
requests==2.31.0
requests-oauthlib==1.3.1
scikit-learn==1.1.1
scipy==1.8.1
Shapely==1.8.2
scikit-learn==1.3.0
scipy==1.11.1
shapely==2.0.1
six==1.16.0
social-auth-app-django==5.0.0
social-auth-core==4.3.0
social-auth-app-django==5.2.0
social-auth-core==4.4.2
sparse==0.14.0
sqlparse==0.4.4
statsmodels==0.13.2
sympy==1.10.1
threadpoolctl==3.1.0
tornado==6.2
tqdm==4.64.0
typing-extensions==4.3.0
urllib3==1.26.10
statsmodels==0.14.0
sympy==1.12
tables==3.8.0
tenacity==8.2.2
threadpoolctl==3.2.0
torch==2.0.1
tornado==6.3.2
tqdm==4.66.1
triton==2.0.0
typing_extensions==4.7.1
tzdata==2023.3
urllib3==2.0.4
vine==5.0.0
wcwidth==0.2.5
wcwidth==0.2.6
webencodings==0.5.1
wget==3.2
wrapt==1.14.1
zipstream==1.1.4
gunicorn==20.1.0
wrapt==1.15.0
django-celery-results
zipstream
django-filter
crispy-bootstrap4
4 changes: 3 additions & 1 deletion compose/django/upgrade_pip_calls.txt
@@ -1,4 +1,4 @@
pip install scipy celery[redis] requests lxml django-oauth-toolkit pandas nibabel matplotlib django-taggit nilearn django-datatables-view statsmodels django-cleanup django-polymorphic djangorestframework
pip install scipy celery[redis] requests lxml django-oauth-toolkit pandas nibabel matplotlib django-taggit nilearn django-datatables-view statsmodels django-cleanup django-polymorphic djangorestframework django-celery-results django-filter
pip install psycopg
pip install nidmresults nidmfsl cognitiveatlas
pip install social-app-django
Expand All @@ -16,3 +16,5 @@ pip install django-braces
pip install corsheaders
pip install django-cors-headers
pip install gunicorn
pip install tables

72 changes: 72 additions & 0 deletions neurovault/apps/statmaps/ahba.py
@@ -0,0 +1,72 @@

# coding: utf-8

# In[33]:

from glob import glob
import os
import pandas as pd
import numpy as np
import nibabel as nb
import numpy.linalg as npl
from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore
from statsmodels.sandbox.stats.multicomp import multipletests


def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
store_file = "/code/ahba_data/store_max1_reduced.h5"
subcortex_mask = "/code/ahba_data/subcortex_mask.npy"

results_dfs = []
with pd.HDFStore(store_file, 'r') as store:
for donor_id in store.keys():
print("Loading expression data (%s)" % donor_id)
expression_data = store.get(donor_id.replace(".", "_"))

print("Getting statmap values (%s)" % donor_id)
nifti_values = reduced_stat_map_data[expression_data.columns.astype(int)]

print("Removing missing values (%s)" % donor_id)
na_mask = np.isnan(nifti_values)
if mask == "subcortex":
na_mask = np.logical_or(na_mask,
np.isnan(np.load(subcortex_mask)[expression_data.columns.astype(int)]))
elif mask == "cortex":
na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
np.load(subcortex_mask)[expression_data.columns.astype(int)])))
else:
assert mask == "full"

nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
expression_data.drop(expression_data.columns[na_mask].astype(int), axis=1, inplace=True)

print("z scoring (%s)" % donor_id)
expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns.astype(int),
index=expression_data.index)
nifti_values = zscore(nifti_values)

print("Calculating linear regressions (%s)" % donor_id)
regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
names=['donor_id', 'parameter'])

results_dfs.append(results_df)

print("Concatenating results")
results_df = pd.concat(results_dfs, axis=1)
del results_dfs

t, p = ttest_1samp(results_df, 0.0, axis=1)
group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
_, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
del results_df
probe_info = pd.read_csv("/code/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
group_results_df = group_results_df.join(probe_info)
group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
"variance explained (mean)", "variance explained (std)"]]

return group_results_df
4 changes: 2 additions & 2 deletions neurovault/apps/statmaps/forms.py
Expand Up @@ -618,7 +618,7 @@ def clean_and_validate(self, cleaned_data):
nii = nb.load(ribbon_projection_file[:-3])
affine = nii.affine
affine[0, 3] -= 1
nb.Nifti1Image(nii.get_data(), affine).to_filename(
nb.Nifti1Image(np.asanyarray(nii.dataobj), affine).to_filename(
ribbon_projection_file
)

Expand Down Expand Up @@ -706,7 +706,7 @@ def clean_and_validate(self, cleaned_data):
):
# convert pseudo 4D to 3D
if squeezable_dimensions < len(nii.shape):
new_data = np.squeeze(nii.get_data())
new_data = np.squeeze(np.asanyarray(nii.dataobj))
nii = nb.Nifti1Image(new_data, nii.affine, nii.header)

# Papaya does not handle float64, but by converting
Expand Down
Binary file not shown.
24 changes: 18 additions & 6 deletions neurovault/apps/statmaps/tasks.py
Expand Up @@ -5,14 +5,15 @@
from django.http import Http404
from django.shortcuts import get_object_or_404

"""
""" Old imports
from pybraincompare.compare.maths import calculate_correlation, calculate_pairwise_correlation
from pybraincompare.compare.mrutils import resample_images_ref, make_binary_deletion_mask, make_binary_deletion_vector
from pybraincompare.mr.datasets import get_data_directory
from pybraincompare.mr.transformation import make_resampled_transformation_vector
"""

import nilearn
from nilearn.image import resample_img

nilearn.EXPAND_PATH_WILDCARDS = False
from celery import shared_task, Celery
Expand Down Expand Up @@ -206,7 +207,7 @@ def generate_surface_image(image_pk):
and img.data_origin == "volume"
):
img_vol = nib.load(img.file.path)
data_vol = img_vol.get_data()
data_vol = numpy.asanyarray(img_vol.dataobj)
if data_vol.ndim > 3:
data_vol = data_vol[:, :, :, 0] # number of time points
this_path = os.path.abspath(os.path.dirname(__file__))
Expand Down Expand Up @@ -287,10 +288,21 @@ def save_resampled_transformation_single(pk1, resample_dim=[4, 4, 4]):
from neurovault.apps.statmaps.models import Image
from six import BytesIO
import numpy as np
from nilearn.maskers import NiftiMasker

img = get_object_or_404(Image, pk=pk1)
nii_obj = nib.load(img.file.path) # standard_mask=True is default
image_vector = make_resampled_transformation_vector(nii_obj, resample_dim)
nii_obj = nib.load(img.file.path)
mask_path = f'${settings.BASE_DIR}/apps/statmaps/static/anatomical/MNI152_T1_4mm_brain_mask.nii.gz'
mask = nib.load(mask_path)
image_resampled = resample_img(
nii_obj,
target_affine=mask.affine,
target_shape=mask.shape
)
masker = NiftiMasker(mask_img=mask)
masker.fit()
image_masked = masker.transform(image_resampled)
image_vector = image_masked.flatten()

f = BytesIO()
np.save(f, image_vector)
Expand Down Expand Up @@ -402,10 +414,10 @@ def save_voxelwise_pearson_similarity_resample(pk1, pk2,resample_dim=[4,4,4]):
resample_dim=resample_dim)
# resample_images_ref will "squeeze" images, but we should keep error here for now
for image_nii, image_obj in zip(images_resamp, [image1, image2]):
if len(numpy.squeeze(image_nii.get_data()).shape) != 3:
if len(numpy.squeeze(np.asanyarray(image_nii.dataobj).shape) != 3:
raise Exception("Image %s (id=%d) has incorrect number of dimensions %s"%(image_obj.name,
image_obj.id,
str(image_nii.get_data().shape)))
str(np.asanyarray(image_nii.dataobj).shape)))
# Calculate correlation only on voxels that are in both maps (not zero, and not nan)
image1_res = images_resamp[0]
Expand Down

0 comments on commit 7a61f13

Please sign in to comment.