/
RunPythonMethods.py
97 lines (78 loc) · 2.46 KB
/
RunPythonMethods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
## Imports
def warn(*args, **kwargs):
pass
import warnings
warnings.warn = warn
import os
import sys
sys.stderr = open('tmp.err.log', 'w')
sys.stdout = open('tmp.out.log', 'w')
import scvi
import scanpy as sc
import anndata as ad
import scanorama
from scipy import sparse
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
## Import and setup data
adata = ad.read_h5ad("tmp.h5ad")
adata.layers["raw"] = sparse.csr_matrix(adata.raw.X.copy())
adata.X = sparse.csr_matrix(adata.raw.X.copy())
## Create batch indices
le = LabelEncoder()
adata.obs['batch_indices'] = le.fit_transform(adata.obs["batch"].values)
adata.obs['batch_indices'] = adata.obs['batch_indices'].astype("category")
## Normalize and log transform
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
## Import variable genes
f = open("tmp.features", "r")
hvg = f.read().splitlines()
f.close()
## Process variable genes
adata.var['highly_variable'] = [True if g in hvg else False for g in adata.var_names]
## Subset to variable genes
adata = adata[:, adata.var.highly_variable]
### Integrate using scanorama
## Split into batches
split = []
batch_categories = adata.obs["batch_indices"].cat.categories
for i in batch_categories:
split.append(adata[adata.obs["batch_indices"] == i].copy())
## Batch-aware scaling
for i in split:
sc.pp.scale(i)
## Integrate
for rep in range(5):
corrected = scanorama.integrate_scanpy(split, verbose = 0)
scan = ad.concat(split)
arr = scan.obsm["X_scanorama"]
df = pd.DataFrame(arr, index=scan.obs.index)
file_out = "tmp_Scanorama_rep" + str(rep) + ".txt"
df.to_csv(file_out)
### Integrate using scVI
## Copy the adata object
adata = adata.copy()
## Setup early stopping
## Copy from here: https://discourse.scverse.org/t/mapping-old-scanvi-code-to-0-16-2/494
early_stopping_kwargs = {
"early_stopping": True,
"early_stopping_monitor": 'elbo_validation',
"early_stopping_patience": 10,
"early_stopping_min_delta": 0.0,
}
## Loop across replicates
for rep in range(5):
## Setup the model
scvi.model.SCVI.setup_anndata(adata, layer="raw", batch_key="batch_indices")
## Create the model
vae = scvi.model.SCVI(adata)
## Train the model
vae.train(max_epochs = 500, **early_stopping_kwargs, check_val_every_n_epoch=1)
## Insert latent space
latent = vae.get_latent_representation()
df = pd.DataFrame(latent, index=adata.obs.index)
## Write objects
file_out = "tmp_scVI_rep" + str(rep) + ".txt"
df.to_csv(file_out)