readd geneexpression route and view. Add in the small files needed fr…

…om old ahba data.
NeuroVault · Aug 10, 2023 · bfc4557 · bfc4557
1 parent e3dc3c7
commit bfc4557
Show file tree

Hide file tree

Showing 11 changed files with 24,754 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -76,3 +76,5 @@ local_data/
 .envs/
 
 *.swp
+
+ahba_data/store_max1_reduced.h5
diff --git a/ahba_data/README.md b/ahba_data/README.md
@@ -0,0 +1,3 @@
+This directory contains files used by `neurovault/apps/statmaps/ahba.py` which is called by the two gene expression views in in the statmaps app.
+
+A file called `store_max1_reduced.h5` is not tracked by git due to its size. This file is generated by `scripts/preparing_AHBA_data.py` and has additional dependencies from the rest of the project. `store_max1_reduced` can also be extracted from the neurovault/ahba image on dockerhub. 
diff --git a/ahba_data/Richiardi_Data_File_S2.xlsx b/ahba_data/Richiardi_Data_File_S2.xlsx
diff --git a/ahba_data/corrected_mni_coordinates.csv b/ahba_data/corrected_mni_coordinates.csv
diff --git a/ahba_data/probe_info_max1.csv b/ahba_data/probe_info_max1.csv
diff --git a/scripts/subcortex_mask.npy → ahba_data/subcortex_mask.npy b/scripts/subcortex_mask.npy → ahba_data/subcortex_mask.npy
diff --git a/neurovault/apps/statmaps/ahba.py b/neurovault/apps/statmaps/ahba.py
@@ -0,0 +1,72 @@
+
+# coding: utf-8
+
+# In[33]:
+
+from glob import glob
+import os
+import pandas as pd
+import numpy as np
+import nibabel as nb
+import numpy.linalg as npl
+from scipy.stats.stats import pearsonr, ttest_1samp, percentileofscore, linregress, zscore
+from statsmodels.sandbox.stats.multicomp import multipletests
+
+
+def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
+    store_file = "/ahba_data/store_max1_reduced.h5"
+    subcortex_mask = "/ahba_data/subcortex_mask.npy"
+
+    results_dfs = []
+    with pd.HDFStore(store_file, 'r') as store:
+        for donor_id in store.keys():
+            print "Loading expression data (%s)" % donor_id
+            expression_data = store.get(donor_id.replace(".", "_"))
+
+            print "Getting statmap values (%s)" % donor_id
+            nifti_values = reduced_stat_map_data[expression_data.columns]
+
+            print "Removing missing values (%s)" % donor_id
+            na_mask = np.isnan(nifti_values)
+            if mask == "subcortex":
+                na_mask = np.logical_or(na_mask,
+                    np.isnan(np.load(subcortex_mask)[expression_data.columns]))
+            elif mask == "cortex":
+                na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
+                    np.load(subcortex_mask)[expression_data.columns])))
+            else:
+                assert mask == "full"
+
+            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
+            expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)
+
+            print "z scoring (%s)" % donor_id
+            expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
+                                           index=expression_data.index)
+            nifti_values = zscore(nifti_values)
+
+            print "Calculating linear regressions (%s)" % donor_id
+            regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
+            results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)
+
+            results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
+                                                           names=['donor_id', 'parameter'])
+
+            results_dfs.append(results_df)
+
+        print "Concatenating results"
+        results_df = pd.concat(results_dfs, axis=1)
+        del results_dfs
+
+    t, p = ttest_1samp(results_df, 0.0, axis=1)
+    group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
+    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
+    group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
+    group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
+    del results_df
+    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
+    group_results_df = group_results_df.join(probe_info)
+    group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
+                                         "variance explained (mean)", "variance explained (std)"]]
+
+    return group_results_df
diff --git a/neurovault/apps/statmaps/templates/statmaps/gene_expression.html b/neurovault/apps/statmaps/templates/statmaps/gene_expression.html
@@ -0,0 +1,132 @@
+{% extends "base.html" %}
+
+{% block head %}
+    <title>{% block title %}Gene expression decoding: ({ image.name
+        }}{% endblock %}</title>
+    <script src="//cdn.datatables.net/plug-ins/1.10.11/sorting/natural.js"
+            type="text/javascript"></script>
+    <script>
+        $(document).ready(function () {
+            var prevSearch;
+            var saveState = true;
+            var captured = /q=([^&]+)/.exec(window.location.href);
+            var result = captured ? captured[1] : '';
+            var base_url = captured != '' ? window.location.href.split("?")[0] : window.location.href;
+            $('#genes-table').dataTable({
+                "bJQueryUI": true,
+                iDisplayLength: 25,
+                "ajax": "{% url 'statmaps:gene_expression_json' pk=image.pk %}?mask={{ mask }}",
+                "order": [[4, "asc"]],
+                "columnDefs": [
+                    {
+                        "render": function (data, type, row) {
+                            return data + " <a href='http://www.ncbi.nlm.nih.gov/gene/" + row[1] + "'>[NCBI]</a> <a href='http://neurosynth.org/genes/" + data + "'>[Neurosynth]</a>";
+                        },
+                        "targets": [0]
+                    },
+                    {
+                        "render": function (data, type, row) {
+                            return data.toFixed(6);
+                        },
+                        "searchable": false,
+                        "targets": [4, 5]
+                    },
+                    {
+                        "render": function (data, type, row) {
+                            return data.toFixed(2);
+                        },
+                        "searchable": false,
+                        "targets": [3, 6, 7]
+                    },
+                    {"visible": false, "targets": [1]}
+                ],
+                "oSearch": {"sSearch": result},
+                "fnDrawCallback": function (oSettings) {
+                    var curSearch = oSettings.oPreviousSearch.sSearch;
+                    history.replaceState({query: curSearch}, "title", base_url + "?q=" + curSearch + "&mask={{ mask }}");
+                    $("#full_button").attr("href", base_url + "?q=" + curSearch + "&mask=full");
+                    $("#cortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=cortex");
+                    $("#subcortex_button").attr("href", base_url + "?q=" + curSearch + "&mask=subcortex");
+
+                    if ("{{ mask }}" == "full") {
+                        $("#full_button").removeClass("btn-secondary");
+                        $("#full_button").addClass("btn-primary");
+                    }
+                    if ("{{ mask }}" == "cortex") {
+                        $("#cortex_button").removeClass("btn-secondary");
+                        $("#cortex_button").addClass("btn-primary");
+                    }
+                    if ("{{ mask }}" == "subcortex") {
+                        $("#subcortex_button").removeClass("btn-secondary");
+                        $("#subcortex_button").addClass("btn-primary");
+                    }
+                }
+            });
+        });
+
+    </script>
+
+{% endblock %}
+
+{% block content %}
+    <div class="row">
+        <div class="col">
+            <h2>Gene expression decoding of the <a
+                    href="{{ image.get_absolute_url }}">{{ image.name }}</a>
+                statistical map</h2>
+            <p>Limit decoding to :
+                <a class="btn btn-secondary btn-sm" type="button"
+                   id="full_button">Full brain</a>
+                <a class="btn btn-secondary btn-sm" type="button"
+                   id="subcortex_button">Subcortex</a>
+                <a class="btn btn-secondary btn-sm" type="button"
+                   id="cortex_button">Cortex</a>
+            </p>
+            <div class="table-responsive-md">
+                <table id="genes-table"
+                       class="table table-condensed table-striped table-sm table-hover">
+                    <thead>
+                    <tr>
+                        <th scope="row">Symbol</th>
+                        <th scope="row">entrez_id</th>
+                        <th scope="row">Name</th>
+                        <th scope="row">t</th>
+                        <th scope="row">p</th>
+                        <th scope="row">p&nbsp;(corr)</th>
+                        <th scope="row">Variance explained [%]</th>
+                        <th scope="row">± [%]</th>
+                    </tr>
+                    </thead>
+                </table>
+            </div>
+        </div>
+    </div>
+    <div class="row">
+        <div class="col">
+            <p><strong>About</strong></p>
+            <p>This map has been compared with gene expression data obtained
+                from <a href="http://human.brain-map.org/">Allen Human Brain
+                    Atlas</a>. For every
+                gene and each of the six brains donated to the Allen Brain
+                Institute a linear model has been fitted to see
+                how similar they are to the evaluated map. A one sample t test
+                has been used see how consistent the relation
+                between the gene expression and evaluated map values are across
+                the six donated brains. To account for
+                the number of tested genes False Discovery Rate correction has
+                been applied.</p>
+            <p>Decoding can be performed on the full brain or alternatively to
+                subcortical or cortical areas. <a
+                        href="http://neurovault.org/images/39877/">This
+                    mask</a> is used to
+                limit the datapoints for the subcortical analysis variant and
+                its inverse in the cortical case.</p>
+            <p>Please cite: <a
+                    href="https://f1000research.com/posters/1097120">Gorgolewski
+                KJ, Fox AS, Chang L et al. Tight fitting genes: finding
+                relations between statistical maps and gene expression
+                patterns. F1000Posters 2014, 5:1607 (poster)</a></p>
+        </div>
+    </div>
+
+{% endblock %}
diff --git a/neurovault/apps/statmaps/urls.py b/neurovault/apps/statmaps/urls.py
@@ -346,4 +346,10 @@
     ),
     re_path(r"^tasks/(?P<cog_atlas_id>[A-Za-z0-9].*)$", view_task, name="view_task"),
     re_path(r"^tasks$", view_task, name="view_task"),
+        url(r'^images/(?P<pk>\d+)/gene_expression$',
+        gene_expression,
+        name='gene_expression'),
+    url(r'^images/(?P<pk>\d+)/gene_expression/json$',
+        gene_expression_json,
+        name='gene_expression_json'),
 ]
diff --git a/neurovault/apps/statmaps/views.py b/neurovault/apps/statmaps/views.py
@@ -49,6 +49,7 @@
 
 import neurovault
 from neurovault import settings
+from neurovault.apps.statmaps.ahba import calculate_gene_expression_similarity
 from neurovault.apps.statmaps.forms import (
     CollectionForm,
     UploadFileForm,
@@ -109,6 +110,7 @@
 from . import image_metadata
 
 
+
 def owner_or_contrib(request, collection):
     """owner_or_contrib determines if user is an owner or contributor to a collection
     :param collection: statmaps.models.Collection
@@ -1897,3 +1899,39 @@ def serve_surface_archive(request, pk, collection_cid=None):
     response = StreamingHttpResponse(zf, content_type="application/zip")
     response["Content-Disposition"] = "attachment; filename=%s" % zip_filename
     return response
+
+
+def gene_expression(request, pk, collection_cid=None):
+    '''view_image returns main view to see an image and associated meta data. If the image is in a collection with a DOI and has a generated thumbnail, it is a contender for image comparison, and a find similar button is exposed.
+    :param pk: statmaps.models.Image.pk the primary key of the image
+    :param collection_cid: statmaps.models.Collection.pk the primary key of the collection. Default None
+    '''
+    image = get_image(pk, collection_cid, request)
+    if image.is_thresholded:
+        raise Http404
+    api_cid = pk
+    if image.collection.private:
+        api_cid = '%s-%s' % (image.collection.private_token,pk)
+    context = {
+        'image': image,
+        'api_cid': api_cid,
+        'mask': request.GET.get('mask', 'full')
+    }
+    template = 'statmaps/gene_expression.html'
+    return render(request, template, context)
+
+def gene_expression_json(request, pk, collection_cid=None):
+    image = get_image(pk, collection_cid, request)
+    if image.is_thresholded:
+        raise Http404
+
+    if not image.reduced_representation or not os.path.exists(image.reduced_representation.path):
+        image = save_resampled_transformation_single(image.id)
+
+    map_data = np.load(image.reduced_representation.file)
+
+    mask = request.GET.get('mask', None)
+    expression_results = calculate_gene_expression_similarity(map_data, mask)
+    dict = expression_results.to_dict("split")
+    del dict["index"]
+    return JSONResponse(dict)
diff --git a/scripts/preparing_AHBA_data.py b/scripts/preparing_AHBA_data.py
@@ -33,7 +33,7 @@
         "http://human.brain-map.org/api/v2/well_known_file_download/178236545"]
 
 donor_ids = [""]
-download_dir = "/ahba_data"
+download_dir = "./ahba_data"
 os.makedirs(download_dir)
 
 for i, url in enumerate(urls):
@@ -54,22 +54,24 @@
 
 for coord_mni in samples[['corrected_mni_x', 'corrected_mni_y', 'corrected_mni_z']].values:
     sample_counts = np.zeros(mni.shape, dtype=int)
-    coord_data = [int(round(i)) for i in nb.affines.apply_affine(npl.inv(mni.get_affine()), coord_mni)]
+    coord_data = [int(round(i)) for i in nb.affines.apply_affine(npl.inv(mni.affine), coord_mni)]
     sample_counts[coord_data[0],
                   coord_data[1],
                   coord_data[2]] = 1
-    out_vector = sample_counts[mni.get_data()!=0]
+    out_vector = sample_counts[np.asanyarray(mni.dataobj)!=0]
     idx = out_vector.argmax()
     if idx == (out_vector == 1.0).sum() == 0:
         idx = np.nan
     reduced_coord.append(idx)
 
 samples["reduced_coordinate"] = reduced_coord
-
 #Downloading gene selections list
-urllib.request.urlretrieve(
-    "http://science.sciencemag.org/highwire/filestream/631209/field_highwire_adjunct_files/2/Richiardi_Data_File_S2.xlsx",
-    os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx"))
+# old URL now 404s:
+# http://science.sciencemag.org/highwire/filestream/631209/field_highwire_adjunct_files/2/Richiardi_Data_File_S2.xlsx
+if not os.path.exists(os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx")):
+    urllib.request.urlretrieve(
+        "https://github.com/amadeuskanaan/GluIRON/raw/master/AHBA/Richiardi_Data_File_S2.xlsx",
+        os.path.join(download_dir, "Richiardi_Data_File_S2.xlsx"))
 
 donors = ["H0351.2001", "H0351.2002", "H0351.1009", "H0351.1012", "H0351.1015", "H0351.1016"]
 
@@ -92,7 +94,7 @@
 
         df.columns = list(sample_annot_df["reduced_coordinate"])
         # removing out side of the brain coordinates
-        df.drop(sample_annot_df.index[sample_annot_df.reduced_coordinate.isnull()], axis=1, inplace=True)
+        df.drop(sample_annot_df.index[sample_annot_df.reduced_coordinate.isnull()], axis=1, inplace=True, errors='ignore')
         # averaging measurements from similar locations
         df = df.groupby(level=0, axis=1).mean()