Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #806 from NeuroVault/readd_gene_expression
re-add gene expression routes and views.
- Loading branch information
Showing
21 changed files
with
24,896 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,3 +76,5 @@ local_data/ | |
.envs/ | ||
|
||
*.swp | ||
|
||
ahba_data/store_max1_reduced.h5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
This directory contains files used by `neurovault/apps/statmaps/ahba.py` which is called by the two gene expression views in in the statmaps app. | ||
|
||
A file called `store_max1_reduced.h5` is not tracked by git due to its size. This file is generated by `scripts/preparing_AHBA_data.py` and has additional dependencies from the rest of the project. `store_max1_reduced` can also be extracted from the neurovault/ahba image on dockerhub. |
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,104 +1,131 @@ | ||
amqp==5.1.1 | ||
asgiref==3.5.2 | ||
async-timeout==4.0.2 | ||
billiard==3.6.4.0 | ||
cachetools==5.2.0 | ||
celery==5.2.7 | ||
asgiref==3.7.2 | ||
billiard==4.1.0 | ||
blosc2==2.0.0 | ||
cachetools==5.3.1 | ||
celery==5.3.1 | ||
certifi==2023.7.22 | ||
cffi==1.15.1 | ||
charset-normalizer==2.1.0 | ||
citeproc-py==0.6.0 | ||
click==8.1.3 | ||
charset-normalizer==3.2.0 | ||
click==8.1.6 | ||
click-didyoumean==0.3.0 | ||
click-plugins==1.1.1 | ||
click-repl==0.2.0 | ||
click-repl==0.3.0 | ||
cmake==3.27.1 | ||
cognitiveatlas==0.1.9 | ||
cryptography==37.0.4 | ||
contourpy==1.1.0 | ||
cryptography==41.0.3 | ||
cycler==0.11.0 | ||
Cython==0.29.30 | ||
Cython==0.29.36 | ||
defusedxml==0.7.1 | ||
Deprecated==1.2.13 | ||
Django==4.0.10 | ||
Deprecated==1.2.14 | ||
Django==4.2.4 | ||
django-braces==1.15.0 | ||
django-celery-results==2.4.0 | ||
django-cleanup==6.0.0 | ||
django-cors-headers==3.13.0 | ||
django-crispy-forms==1.14.0 | ||
django-cleanup==8.0.0 | ||
django-cors-headers==4.2.0 | ||
django-crispy-forms==2.0 | ||
django-datatables-view==1.20.0 | ||
django-file-resubmit==0.5.2 | ||
django-filter==22.1 | ||
django-guardian==2.4.0 | ||
django-oauth-toolkit==2.1.0 | ||
django-oauth-toolkit==2.3.0 | ||
django-polymorphic==3.1.0 | ||
django-sendfile==0.3.11 | ||
django-taggit==3.0.0 | ||
djangorestframework==3.13.1 | ||
duecredit==0.9.1 | ||
fonttools==4.34.4 | ||
frozendict==2.3.2 | ||
django-taggit==4.0.0 | ||
djangorestframework==3.14.0 | ||
filelock==3.12.2 | ||
fonttools==4.42.0 | ||
frozendict==2.3.8 | ||
future==0.18.3 | ||
fuzzywuzzy==0.18.0 | ||
h5py==3.7.0 | ||
gunicorn==21.2.0 | ||
h5py==3.9.0 | ||
html5lib==1.1 | ||
idna==3.3 | ||
imageio==2.19.3 | ||
indexed-gzip==1.6.13 | ||
idna==3.4 | ||
imageio==2.31.1 | ||
isodate==0.6.1 | ||
joblib==1.1.1 | ||
jwcrypto==1.4 | ||
kiwisolver==1.4.3 | ||
kombu==5.2.4 | ||
lxml==4.9.1 | ||
matplotlib==3.4.3 | ||
mpmath==1.2.1 | ||
networkx==2.8.4 | ||
nibabel==3.2.2 | ||
Jinja2==3.1.2 | ||
joblib==1.3.2 | ||
jwcrypto==1.5.0 | ||
kiwisolver==1.4.4 | ||
kombu==5.3.1 | ||
lit==16.0.6 | ||
llvmlite==0.40.1 | ||
lxml==4.9.3 | ||
MarkupSafe==2.1.3 | ||
matplotlib==3.7.2 | ||
mpmath==1.3.0 | ||
msgpack==1.0.5 | ||
networkx==3.1 | ||
nibabel==4.0.2 | ||
nidmfsl==2.2.0 | ||
nidmresults==2.1.0 | ||
nilearn==0.9.1 | ||
NiMARE==0.0.11 | ||
numexpr==2.8.3 | ||
numpy==1.23.1 | ||
nilearn==0.10.1 | ||
NiMARE==0.1.1 | ||
numba==0.57.1 | ||
numexpr==2.8.5 | ||
numpy==1.24.4 | ||
nvidia-cublas-cu11==11.10.3.66 | ||
nvidia-cuda-cupti-cu11==11.7.101 | ||
nvidia-cuda-nvrtc-cu11==11.7.99 | ||
nvidia-cuda-runtime-cu11==11.7.99 | ||
nvidia-cudnn-cu11==8.5.0.96 | ||
nvidia-cufft-cu11==10.9.0.58 | ||
nvidia-curand-cu11==10.2.10.91 | ||
nvidia-cusolver-cu11==11.4.0.1 | ||
nvidia-cusparse-cu11==11.7.4.91 | ||
nvidia-nccl-cu11==2.14.3 | ||
nvidia-nvtx-cu11==11.7.91 | ||
oauthlib==3.2.2 | ||
packaging==21.3 | ||
pandas==1.4.3 | ||
patsy==0.5.2 | ||
Pillow==9.3.0 | ||
prompt-toolkit==3.0.30 | ||
packaging==23.1 | ||
pandas==2.0.3 | ||
patsy==0.5.3 | ||
Pillow==10.0.0 | ||
plotly==5.15.0 | ||
prompt-toolkit==3.0.39 | ||
prov==2.0.0 | ||
psycopg==3.0.15 | ||
psycopg2==2.9.3 | ||
pycortex==1.2.5 | ||
psycopg==3.1.10 | ||
py-cpuinfo==9.0.0 | ||
pycortex==1.2.7 | ||
pycparser==2.21 | ||
PyJWT==2.4.0 | ||
PyJWT==2.8.0 | ||
PyLD==2.0.3 | ||
PyMARE==0.0.3 | ||
PyMARE==0.0.4rc2 | ||
pyparsing==3.0.9 | ||
python-dateutil==2.8.2 | ||
python3-openid==3.2.0 | ||
pytz==2022.1 | ||
rdflib==6.1.1 | ||
pytz==2023.3 | ||
PyYAML==6.0.1 | ||
rdflib==7.0.0 | ||
rdflib-jsonld==0.6.2 | ||
redis==4.5.4 | ||
requests==2.28.1 | ||
redis==4.6.0 | ||
requests==2.31.0 | ||
requests-oauthlib==1.3.1 | ||
scikit-learn==1.1.1 | ||
scipy==1.8.1 | ||
Shapely==1.8.2 | ||
scikit-learn==1.3.0 | ||
scipy==1.11.1 | ||
shapely==2.0.1 | ||
six==1.16.0 | ||
social-auth-app-django==5.0.0 | ||
social-auth-core==4.3.0 | ||
social-auth-app-django==5.2.0 | ||
social-auth-core==4.4.2 | ||
sparse==0.14.0 | ||
sqlparse==0.4.4 | ||
statsmodels==0.13.2 | ||
sympy==1.10.1 | ||
threadpoolctl==3.1.0 | ||
tornado==6.2 | ||
tqdm==4.64.0 | ||
typing-extensions==4.3.0 | ||
urllib3==1.26.10 | ||
statsmodels==0.14.0 | ||
sympy==1.12 | ||
tables==3.8.0 | ||
tenacity==8.2.2 | ||
threadpoolctl==3.2.0 | ||
torch==2.0.1 | ||
tornado==6.3.2 | ||
tqdm==4.66.1 | ||
triton==2.0.0 | ||
typing_extensions==4.7.1 | ||
tzdata==2023.3 | ||
urllib3==2.0.4 | ||
vine==5.0.0 | ||
wcwidth==0.2.5 | ||
wcwidth==0.2.6 | ||
webencodings==0.5.1 | ||
wget==3.2 | ||
wrapt==1.14.1 | ||
zipstream==1.1.4 | ||
gunicorn==20.1.0 | ||
wrapt==1.15.0 | ||
django-celery-results | ||
zipstream | ||
django-filter | ||
crispy-bootstrap4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
|
||
# coding: utf-8 | ||
|
||
# In[33]: | ||
|
||
from glob import glob | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
import nibabel as nb | ||
import numpy.linalg as npl | ||
from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore | ||
from statsmodels.sandbox.stats.multicomp import multipletests | ||
|
||
|
||
def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"): | ||
store_file = "/code/ahba_data/store_max1_reduced.h5" | ||
subcortex_mask = "/code/ahba_data/subcortex_mask.npy" | ||
|
||
results_dfs = [] | ||
with pd.HDFStore(store_file, 'r') as store: | ||
for donor_id in store.keys(): | ||
print("Loading expression data (%s)" % donor_id) | ||
expression_data = store.get(donor_id.replace(".", "_")) | ||
|
||
print("Getting statmap values (%s)" % donor_id) | ||
nifti_values = reduced_stat_map_data[expression_data.columns.astype(int)] | ||
|
||
print("Removing missing values (%s)" % donor_id) | ||
na_mask = np.isnan(nifti_values) | ||
if mask == "subcortex": | ||
na_mask = np.logical_or(na_mask, | ||
np.isnan(np.load(subcortex_mask)[expression_data.columns.astype(int)])) | ||
elif mask == "cortex": | ||
na_mask = np.logical_or(na_mask, np.logical_not(np.isnan( | ||
np.load(subcortex_mask)[expression_data.columns.astype(int)]))) | ||
else: | ||
assert mask == "full" | ||
|
||
nifti_values = np.array(nifti_values)[np.logical_not(na_mask)] | ||
expression_data.drop(expression_data.columns[na_mask].astype(int), axis=1, inplace=True) | ||
|
||
print("z scoring (%s)" % donor_id) | ||
expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns.astype(int), | ||
index=expression_data.index) | ||
nifti_values = zscore(nifti_values) | ||
|
||
print("Calculating linear regressions (%s)" % donor_id) | ||
regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T) | ||
results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index) | ||
|
||
results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns], | ||
names=['donor_id', 'parameter']) | ||
|
||
results_dfs.append(results_df) | ||
|
||
print("Concatenating results") | ||
results_df = pd.concat(results_dfs, axis=1) | ||
del results_dfs | ||
|
||
t, p = ttest_1samp(results_df, 0.0, axis=1) | ||
group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index) | ||
_, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh') | ||
group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1) | ||
group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1) | ||
del results_df | ||
probe_info = pd.read_csv("/code/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1) | ||
group_results_df = group_results_df.join(probe_info) | ||
group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)", | ||
"variance explained (mean)", "variance explained (std)"]] | ||
|
||
return group_results_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+5.21 KB
neurovault/apps/statmaps/static/anatomical/MNI152_T1_4mm_brain_mask.nii.gz
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.