Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore gene decoding #779

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: Building and Testing Workflow
on: [workflow_dispatch,push]
on: [workflow_dispatch, push]

concurrency:
group: ${{ github.head_ref || github.run_id }}
Expand Down
8 changes: 8 additions & 0 deletions neurovault/apps/main/templates/cite.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ <h3>How to cite NeuroVault in papers.</h3>
a web-based repository for collecting and sharing
unthresholded statistical maps of the brain.</a> Front.
Neuroinform. 9:8. doi: 10.3389/fninf.2015.00008</i></p>
<p>If you used the gene decoding function implemented in NeuroVault
please cite:</p>
<p><i>Gorgolewski KJ, Fox AS, Chang L, Schäfer A, Arélin K, Burmann
I, Sacher J, Margulies DS (2014) <a
href="https://f1000research.com/posters/1097120">Tight
fitting genes: finding relations between statistical maps
and gene expression patterns.</a> F1000Posters, 5:1607.
doi: 10.7490/f1000research.1097120.1</i></p>
<p>If you downloaded surface data (.gii files) please cite:</p>
<p><i>Wu J, Ngo GH, Greve DN, Li J, He T, Fischl B, Eickhoff SB,
Yeo BTT (2018) <a href="https://doi.org/10.1002/hbm.24213">Accurate
Expand Down
72 changes: 72 additions & 0 deletions neurovault/apps/statmaps/ahba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@

# coding: utf-8

# In[33]:

from glob import glob
import os
import pandas as pd
import numpy as np
import nibabel as nb
import numpy.linalg as npl
from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore
from statsmodels.sandbox.stats.multicomp import multipletests


def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
store_file = "/ahba_data/store_max1_reduced.h5"
subcortex_mask = "/ahba_data/subcortex_mask.npy"

results_dfs = []
with pd.HDFStore(store_file, 'r') as store:
for donor_id in list(store.keys()):
print("Loading expression data (%s)" % donor_id)
expression_data = store.get(donor_id.replace(".", "_"))

print("Getting statmap values (%s)" % donor_id)
nifti_values = reduced_stat_map_data[expression_data.columns]

print("Removing missing values (%s)" % donor_id)
na_mask = np.isnan(nifti_values)
if mask == "subcortex":
na_mask = np.logical_or(na_mask,
np.isnan(np.load(subcortex_mask)[expression_data.columns]))
elif mask == "cortex":
na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
np.load(subcortex_mask)[expression_data.columns])))
else:
assert mask == "full"

nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)

print("z scoring (%s)" % donor_id)
expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
index=expression_data.index)
nifti_values = zscore(nifti_values)

print("Calculating linear regressions (%s)" % donor_id)
regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
names=['donor_id', 'parameter'])

results_dfs.append(results_df)

print("Concatenating results")
results_df = pd.concat(results_dfs, axis=1)
del results_dfs

t, p = ttest_1samp(results_df, 0.0, axis=1)
group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
_, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
del results_df
probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
group_results_df = group_results_df.join(probe_info)
group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
"variance explained (mean)", "variance explained (std)"]]

return group_results_df
132 changes: 132 additions & 0 deletions neurovault/apps/statmaps/templates/statmaps/gene_expression.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
{% extends "base.html" %}

{% block head %}
<title>{% block title %}Gene expression decoding: ({ image.name
}}{% endblock %}</title>
<script src="//cdn.datatables.net/plug-ins/1.10.11/sorting/natural.js"
type="text/javascript"></script>
<script>
$(document).ready(function () {
var prevSearch;
var saveState = true;
var captured = /q=([^&]+)/.exec(window.location.href);
var result = captured ? captured[1] : '';
var base_url = captured != '' ? window.location.href.split("?")[0] : window.location.href;
$('#genes-table').dataTable({
"bJQueryUI": true,
iDisplayLength: 25,
"ajax": "{% url 'statmaps:gene_expression_json' pk=image.pk %}?mask={{ mask }}",
"order": [[4, "asc"]],
"columnDefs": [
{
"render": function (data, type, row) {
return data + " <a href='http://www.ncbi.nlm.nih.gov/gene/" + row[1] + "'>[NCBI]</a> <a href='http://neurosynth.org/genes/" + data + "'>[Neurosynth]</a>";
},
"targets": [0]
},
{
"render": function (data, type, row) {
return data.toFixed(6);
},
"searchable": false,
"targets": [4, 5]
},
{
"render": function (data, type, row) {
return data.toFixed(2);
},
"searchable": false,
"targets": [3, 6, 7]
},
{"visible": false, "targets": [1]}
],
"oSearch": {"sSearch": result},
"fnDrawCallback": function (oSettings) {
var curSearch = oSettings.oPreviousSearch.sSearch;
history.replaceState({query: curSearch}, "title", base_url + "?q=" + curSearch + "&mask={{ mask }}");
$("#full_button").attr("href", base_url + "?q=" + curSearch + "&mask=full");
$("#cortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=cortex");
$("#subcortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=subcortex");

if ("{{ mask }}" == "full") {
$("#full_button").removeClass("btn-secondary");
$("#full_button").addClass("btn-primary");
}
if ("{{ mask }}" == "cortex") {
$("#cortex_button").removeClass("btn-secondary");
$("#cortex_button").addClass("btn-primary");
}
if ("{{ mask }}" == "subcortex") {
$("#subcortex_button").removeClass("btn-secondary");
$("#subcortex_button").addClass("btn-primary");
}
}
});
});

</script>

{% endblock %}

{% block content %}
<div class="row">
<div class="col">
<h2>Gene expression decoding of the <a
href="{{ image.get_absolute_url }}">{{ image.name }}</a>
statistical map</h2>
<p>Limit decoding to :
<a class="btn btn-secondary btn-sm" type="button"
id="full_button">Full brain</a>
<a class="btn btn-secondary btn-sm" type="button"
id="subcortex_button">Subcortex</a>
<a class="btn btn-secondary btn-sm" type="button"
id="cortex_button">Cortex</a>
</p>
<div class="table-responsive-md">
<table id="genes-table"
class="table table-condensed table-striped table-sm table-hover">
<thead>
<tr>
<th scope="row">Symbol</th>
<th scope="row">entrez_id</th>
<th scope="row">Name</th>
<th scope="row">t</th>
<th scope="row">p</th>
<th scope="row">p&nbsp;(corr)</th>
<th scope="row">Variance explained [%]</th>
<th scope="row">± [%]</th>
</tr>
</thead>
</table>
</div>
</div>
</div>
<div class="row">
<div class="col">
<p><strong>About</strong></p>
<p>This map has been compared with gene expression data obtained
from <a href="http://human.brain-map.org/">Allen Human Brain
Atlas</a>. For every
gene and each of the six brains donated to the Allen Brain
Institute a linear model has been fitted to see
how similar they are to the evaluated map. A one sample t test
has been used see how consistent the relation
between the gene expression and evaluated map values are across
the six donated brains. To account for
the number of tested genes False Discovery Rate correction has
been applied.</p>
<p>Decoding can be performed on the full brain or alternatively to
subcortical or cortical areas. <a
href="http://neurovault.org/images/39877/">This
mask</a> is used to
limit the datapoints for the subcortical analysis variant and
its inverse in the cortical case.</p>
<p>Please cite: <a
href="https://f1000research.com/posters/1097120">Gorgolewski
KJ, Fox AS, Chang L et al. Tight fitting genes: finding
relations between statistical maps and gene expression
patterns. F1000Posters 2014, 5:1607 (poster)</a></p>
</div>
</div>

{% endblock %}
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ <h2>{{ image.name }}</h2>
<a class="dropdown-item" href="{% url 'statmaps:find_similar' image.id %}" data-toggle="tooltip" title="Find maps with similar patters.">
Similar maps search
</a>
<a class="dropdown-item" href="{% url 'statmaps:gene_expression' image.id %}" data-toggle="tooltip" title="Find genes with similar expression patters.">
Gene expression decoding
</a>
{% else %}
<a class="dropdown-item disabled" tabindex="-1" role="button" aria-disabled="true" href="#" data-toggle="tooltip" title="Find maps with similar patters. This function is only enabled for public group level unthresholded statistical maps.">
Similar maps search
Expand Down
134 changes: 134 additions & 0 deletions neurovault/apps/statmaps/tests/test_gene_decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from neurovault.apps.statmaps.tasks import save_resampled_transformation_single
from neurovault.apps.statmaps.tests.utils import (clearDB, save_statmap_form)
from neurovault.api.tests.utils import _setup_test_cognitive_atlas
from neurovault.apps.statmaps.models import (Collection)
from django.contrib.auth.models import User
from django.test import TestCase, Client
import pandas as pd
import os.path
import json


class TestGeneDecoding(TestCase):
_map = None

@classmethod
def setUpClass(cls):
cls.test_path = os.path.abspath(os.path.dirname(__file__))
cls.user, _ = User.objects.get_or_create(username='neurovault')
cls.client = Client()
cls.client.login(username=cls.user)
cls.Collection1 = Collection(name='Collection1', owner=cls.user)
cls.Collection1.save()

_setup_test_cognitive_atlas()

nii_path = os.path.join(
cls.test_path, cls._map)
map = save_statmap_form(
image_path=nii_path, collection=cls.Collection1)
save_resampled_transformation_single(map.pk)
response = json.loads(cls.client.get("/images/%d/gene_expression/json?mask=full" % map.pk, follow=True).content)
cls.df = pd.DataFrame(response["data"], columns=response["columns"])

@classmethod
def tearDownClass(cls):
clearDB()
cls.user.delete()

def _assess_gene(self, gene_name, field='t'):
value = self.df.loc[self.df['gene_symbol'] == gene_name][field]

self.assertEqual(len(value), 1)

value = list(value)[0]

self.assertGreaterEqual(value, 0.0)

def _assess_gene_relation(self, gene_name1, gene_name2, field='variance explained (mean)'):
value1 = self.df.loc[self.df['gene_symbol'] == gene_name1][field]
self.assertEqual(len(value1), 1)

value2 = self.df.loc[self.df['gene_symbol'] == gene_name2][field]
self.assertEqual(len(value2), 1)

value1 = list(value1)[0]
value2 = list(value2)[0]

self.assertGreater(value1, value2)


class TestWAY1(TestGeneDecoding):
_map = 'test_data/gene_validation/WAY_HC36_mean.nii.gz'

def test_positive_HTR1A(self):
self._assess_gene("HTR1A")


# class TestCUM1(TestGeneDecoding):
# _map = 'test_data/gene_validation/CUMl_BP_MNI.nii.gz'
#
# def test_HTR1A_greater_DRD2(self):
# self._assess_gene_relation("HTR1A", "DRD2")
#
#
# class TestFDOPA(TestGeneDecoding):
# _map = 'test_data/gene_validation/18FDOPA.nii.gz'
#
# def test_positive_DDC(self):
# self._assess_gene("DDC")
#
#
# class TestMWC(TestGeneDecoding):
# _map = 'test_data/gene_validation/MNI152_WaterContent_figureAlignedForPaper_resliceForSTOLTUSanalysis.nii.gz'
#
# def test_MBP_greater_DDC(self):
# self._assess_gene_relation("MBP", "DDC")
#
# def test_MOG_greater_DDC(self):
# self._assess_gene_relation("MOG", "DDC")
#
# def test_MOBP_greater_DDC(self):
# self._assess_gene_relation("MOBP", "DDC")
#
#
# class TestRACLOPRIDE(TestGeneDecoding):
# _map = 'test_data/gene_validation/RACLOPRIDE_TEMPLATE_inMNI_181_217_181.nii.gz'
#
# def test_positive_DRD2(self):
# self._assess_gene("DRD2")
#
#
# class TestFP_CIT(TestGeneDecoding):
# _map = 'test_data/gene_validation/123I-FP-CIT.nii.gz'
#
# def test_positive_SLC6A3_greater_HTR1A(self):
# self._assess_gene_relation("SLC6A3", "HTR1A")
#
#
# class TestDASB(TestGeneDecoding):
# _map = 'test_data/gene_validation/DASB_HC30_mean.nii.gz'
#
# def test_positive_SLC6A4(self):
# self._assess_gene("SLC6A4")
#
#
# class TestWAY2(TestGeneDecoding):
# _map = 'test_data/gene_validation/WAY_VT_MNI.nii.gz'
#
# def test_positive_HTR1A(self):
# self._assess_gene("HTR1A")
#
#
# #class TestP943(TestGeneDecoding):
# # _map = 'test_data/gene_validation/P943_HC22_mean.nii.gz'
# #
# # def test_positive_HTR1B(self):
# # self._assess_gene("HTR1B")
#
#
# class TestALTANSERIN(TestGeneDecoding):
# _map = 'test_data/gene_validation/ALT_HC19_mean.nii.gz'
#
# def test_positive_HTR1B(self):
# self._assess_gene("HTR2A")