Skip to content

Commit

Permalink
readd geneexpression route and view. Add in the small files needed fr…
Browse files Browse the repository at this point in the history
…om old ahba data.
  • Loading branch information
rwblair committed Aug 10, 2023
1 parent e3dc3c7 commit bfc4557
Show file tree
Hide file tree
Showing 11 changed files with 24,754 additions and 8 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -76,3 +76,5 @@ local_data/
.envs/

*.swp

ahba_data/store_max1_reduced.h5
3 changes: 3 additions & 0 deletions ahba_data/README.md
@@ -0,0 +1,3 @@
This directory contains files used by `neurovault/apps/statmaps/ahba.py` which is called by the two gene expression views in in the statmaps app.

A file called `store_max1_reduced.h5` is not tracked by git due to its size. This file is generated by `scripts/preparing_AHBA_data.py` and has additional dependencies from the rest of the project. `store_max1_reduced` can also be extracted from the neurovault/ahba image on dockerhub.
Binary file added ahba_data/Richiardi_Data_File_S2.xlsx
Binary file not shown.
3,703 changes: 3,703 additions & 0 deletions ahba_data/corrected_mni_coordinates.csv

Large diffs are not rendered by default.

20,788 changes: 20,788 additions & 0 deletions ahba_data/probe_info_max1.csv

Large diffs are not rendered by default.

File renamed without changes.
72 changes: 72 additions & 0 deletions neurovault/apps/statmaps/ahba.py
@@ -0,0 +1,72 @@

# coding: utf-8

# In[33]:

from glob import glob
import os
import pandas as pd
import numpy as np
import nibabel as nb
import numpy.linalg as npl
from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore
from statsmodels.sandbox.stats.multicomp import multipletests


def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
store_file = "/ahba_data/store_max1_reduced.h5"
subcortex_mask = "/ahba_data/subcortex_mask.npy"

results_dfs = []
with pd.HDFStore(store_file, 'r') as store:
for donor_id in store.keys():
print "Loading expression data (%s)" % donor_id
expression_data = store.get(donor_id.replace(".", "_"))

print "Getting statmap values (%s)" % donor_id
nifti_values = reduced_stat_map_data[expression_data.columns]

print "Removing missing values (%s)" % donor_id
na_mask = np.isnan(nifti_values)
if mask == "subcortex":
na_mask = np.logical_or(na_mask,
np.isnan(np.load(subcortex_mask)[expression_data.columns]))
elif mask == "cortex":
na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
np.load(subcortex_mask)[expression_data.columns])))
else:
assert mask == "full"

nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)

print "z scoring (%s)" % donor_id
expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
index=expression_data.index)
nifti_values = zscore(nifti_values)

print "Calculating linear regressions (%s)" % donor_id
regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
names=['donor_id', 'parameter'])

results_dfs.append(results_df)

print "Concatenating results"
results_df = pd.concat(results_dfs, axis=1)
del results_dfs

t, p = ttest_1samp(results_df, 0.0, axis=1)
group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
_, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
del results_df
probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
group_results_df = group_results_df.join(probe_info)
group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
"variance explained (mean)", "variance explained (std)"]]

return group_results_df
132 changes: 132 additions & 0 deletions neurovault/apps/statmaps/templates/statmaps/gene_expression.html
@@ -0,0 +1,132 @@
{% extends "base.html" %}

{% block head %}
<title>{% block title %}Gene expression decoding: ({ image.name
}}{% endblock %}</title>
<script src="//cdn.datatables.net/plug-ins/1.10.11/sorting/natural.js"
type="text/javascript"></script>
<script>
$(document).ready(function () {
var prevSearch;
var saveState = true;
var captured = /q=([^&]+)/.exec(window.location.href);
var result = captured ? captured[1] : '';
var base_url = captured != '' ? window.location.href.split("?")[0] : window.location.href;
$('#genes-table').dataTable({
"bJQueryUI": true,
iDisplayLength: 25,
"ajax": "{% url 'statmaps:gene_expression_json' pk=image.pk %}?mask={{ mask }}",
"order": [[4, "asc"]],
"columnDefs": [
{
"render": function (data, type, row) {
return data + " <a href='http://www.ncbi.nlm.nih.gov/gene/" + row[1] + "'>[NCBI]</a> <a href='http://neurosynth.org/genes/" + data + "'>[Neurosynth]</a>";
},
"targets": [0]
},
{
"render": function (data, type, row) {
return data.toFixed(6);
},
"searchable": false,
"targets": [4, 5]
},
{
"render": function (data, type, row) {
return data.toFixed(2);
},
"searchable": false,
"targets": [3, 6, 7]
},
{"visible": false, "targets": [1]}
],
"oSearch": {"sSearch": result},
"fnDrawCallback": function (oSettings) {
var curSearch = oSettings.oPreviousSearch.sSearch;
history.replaceState({query: curSearch}, "title", base_url + "?q=" + curSearch + "&mask={{ mask }}");
$("#full_button").attr("href", base_url + "?q=" + curSearch + "&mask=full");
$("#cortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=cortex");
$("#subcortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=subcortex");

if ("{{ mask }}" == "full") {
$("#full_button").removeClass("btn-secondary");
$("#full_button").addClass("btn-primary");
}
if ("{{ mask }}" == "cortex") {
$("#cortex_button").removeClass("btn-secondary");
$("#cortex_button").addClass("btn-primary");
}
if ("{{ mask }}" == "subcortex") {
$("#subcortex_button").removeClass("btn-secondary");
$("#subcortex_button").addClass("btn-primary");
}
}
});
});

</script>

{% endblock %}

{% block content %}
<div class="row">
<div class="col">
<h2>Gene expression decoding of the <a
href="{{ image.get_absolute_url }}">{{ image.name }}</a>
statistical map</h2>
<p>Limit decoding to :
<a class="btn btn-secondary btn-sm" type="button"
id="full_button">Full brain</a>
<a class="btn btn-secondary btn-sm" type="button"
id="subcortex_button">Subcortex</a>
<a class="btn btn-secondary btn-sm" type="button"
id="cortex_button">Cortex</a>
</p>
<div class="table-responsive-md">
<table id="genes-table"
class="table table-condensed table-striped table-sm table-hover">
<thead>
<tr>
<th scope="row">Symbol</th>
<th scope="row">entrez_id</th>
<th scope="row">Name</th>
<th scope="row">t</th>
<th scope="row">p</th>
<th scope="row">p&nbsp;(corr)</th>
<th scope="row">Variance explained [%]</th>
<th scope="row">± [%]</th>
</tr>
</thead>
</table>
</div>
</div>
</div>
<div class="row">
<div class="col">
<p><strong>About</strong></p>
<p>This map has been compared with gene expression data obtained
from <a href="http://human.brain-map.org/">Allen Human Brain
Atlas</a>. For every
gene and each of the six brains donated to the Allen Brain
Institute a linear model has been fitted to see
how similar they are to the evaluated map. A one sample t test
has been used see how consistent the relation
between the gene expression and evaluated map values are across
the six donated brains. To account for
the number of tested genes False Discovery Rate correction has
been applied.</p>
<p>Decoding can be performed on the full brain or alternatively to
subcortical or cortical areas. <a
href="http://neurovault.org/images/39877/">This
mask</a> is used to
limit the datapoints for the subcortical analysis variant and
its inverse in the cortical case.</p>
<p>Please cite: <a
href="https://f1000research.com/posters/1097120">Gorgolewski
KJ, Fox AS, Chang L et al. Tight fitting genes: finding
relations between statistical maps and gene expression
patterns. F1000Posters 2014, 5:1607 (poster)</a></p>
</div>
</div>

{% endblock %}
6 changes: 6 additions & 0 deletions neurovault/apps/statmaps/urls.py
Expand Up @@ -346,4 +346,10 @@
),
re_path(r"^tasks/(?P<cog_atlas_id>[A-Za-z0-9].*)$", view_task, name="view_task"),
re_path(r"^tasks$", view_task, name="view_task"),
url(r'^images/(?P<pk>\d+)/gene_expression$',
gene_expression,
name='gene_expression'),
url(r'^images/(?P<pk>\d+)/gene_expression/json$',
gene_expression_json,
name='gene_expression_json'),
]
38 changes: 38 additions & 0 deletions neurovault/apps/statmaps/views.py
Expand Up @@ -49,6 +49,7 @@

import neurovault
from neurovault import settings
from neurovault.apps.statmaps.ahba import calculate_gene_expression_similarity
from neurovault.apps.statmaps.forms import (
CollectionForm,
UploadFileForm,
Expand Down Expand Up @@ -109,6 +110,7 @@
from . import image_metadata



def owner_or_contrib(request, collection):
"""owner_or_contrib determines if user is an owner or contributor to a collection
:param collection: statmaps.models.Collection
Expand Down Expand Up @@ -1897,3 +1899,39 @@ def serve_surface_archive(request, pk, collection_cid=None):
response = StreamingHttpResponse(zf, content_type="application/zip")
response["Content-Disposition"] = "attachment; filename=%s" % zip_filename
return response


def gene_expression(request, pk, collection_cid=None):
'''view_image returns main view to see an image and associated meta data. If the image is in a collection with a DOI and has a generated thumbnail, it is a contender for image comparison, and a find similar button is exposed.
:param pk: statmaps.models.Image.pk the primary key of the image
:param collection_cid: statmaps.models.Collection.pk the primary key of the collection. Default None
'''
image = get_image(pk, collection_cid, request)
if image.is_thresholded:
raise Http404
api_cid = pk
if image.collection.private:
api_cid = '%s-%s' % (image.collection.private_token,pk)
context = {
'image': image,
'api_cid': api_cid,
'mask': request.GET.get('mask', 'full')
}
template = 'statmaps/gene_expression.html'
return render(request, template, context)

def gene_expression_json(request, pk, collection_cid=None):
image = get_image(pk, collection_cid, request)
if image.is_thresholded:
raise Http404

if not image.reduced_representation or not os.path.exists(image.reduced_representation.path):
image = save_resampled_transformation_single(image.id)

map_data = np.load(image.reduced_representation.file)

mask = request.GET.get('mask', None)
expression_results = calculate_gene_expression_similarity(map_data, mask)
dict = expression_results.to_dict("split")
del dict["index"]
return JSONResponse(dict)
18 changes: 10 additions & 8 deletions scripts/preparing_AHBA_data.py
Expand Up @@ -33,7 +33,7 @@
"http://human.brain-map.org/api/v2/well_known_file_download/178236545"]

donor_ids = [""]
download_dir = "/ahba_data"
download_dir = "./ahba_data"
os.makedirs(download_dir)

for i, url in enumerate(urls):
Expand All @@ -54,22 +54,24 @@

for coord_mni in samples[['corrected_mni_x', 'corrected_mni_y', 'corrected_mni_z']].values:
sample_counts = np.zeros(mni.shape, dtype=int)
coord_data = [int(round(i)) for i in nb.affines.apply_affine(npl.inv(mni.get_affine()), coord_mni)]
coord_data = [int(round(i)) for i in nb.affines.apply_affine(npl.inv(mni.affine), coord_mni)]
sample_counts[coord_data[0],
coord_data[1],
coord_data[2]] = 1
out_vector = sample_counts[mni.get_data()!=0]
out_vector = sample_counts[np.asanyarray(mni.dataobj)!=0]
idx = out_vector.argmax()
if idx == (out_vector == 1.0).sum() == 0:
idx = np.nan
reduced_coord.append(idx)

samples["reduced_coordinate"] = reduced_coord

#Downloading gene selections list
urllib.request.urlretrieve(
"http://science.sciencemag.org/highwire/filestream/631209/field_highwire_adjunct_files/2/Richiardi_Data_File_S2.xlsx",
os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx"))
# old URL now 404s:
# http://science.sciencemag.org/highwire/filestream/631209/field_highwire_adjunct_files/2/Richiardi_Data_File_S2.xlsx
if not os.path.exists(os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx")):
urllib.request.urlretrieve(
"https://github.com/amadeuskanaan/GluIRON/raw/master/AHBA/Richiardi_Data_File_S2.xlsx",
os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx"))

donors = ["H0351.2001", "H0351.2002", "H0351.1009", "H0351.1012", "H0351.1015", "H0351.1016"]

Expand All @@ -92,7 +94,7 @@

df.columns = list(sample_annot_df["reduced_coordinate"])
# removing out side of the brain coordinates
df.drop(sample_annot_df.index[sample_annot_df.reduced_coordinate.isnull()], axis=1, inplace=True)
df.drop(sample_annot_df.index[sample_annot_df.reduced_coordinate.isnull()], axis=1, inplace=True, errors='ignore')
# averaging measurements from similar locations
df = df.groupby(level=0, axis=1).mean()

Expand Down

0 comments on commit bfc4557

Please sign in to comment.