/
SnakeSEA
65 lines (52 loc) · 1.69 KB
/
SnakeSEA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#import scanpy as sc
#big_h5ad = config['h5ad']
#adata = sc.read_h5ad(big_h5ad)
#well_counts = adata[adata.obs['TechType'].isin(['SMARTSeq_v2'])].obs[['batch']].value_counts()
#well_batches = list(well_counts[well_counts > 150].reset_index()['batch'])
#droplet_counts = adata[~adata.obs['TechType'].isin(['SMARTSeq_v2'])].obs[['sample_accession']].value_counts()
#droplet_samples = list(droplet_counts[droplet_counts > 150].reset_index()['sample_accession'])
well_file = open("well_batches.txt", "r")
well_batches = well_file.readlines()
well_batches = [a.rstrip('\n') for a in well_batches]
droplet_file = open("droplet_counts.txt", "r")
droplet_samples = droplet_file.readlines()
droplet_samples = [a.rstrip('\n') for a in droplet_samples]
print(well_batches)
big_h5ad = config['h5ad']
wildcard_constraints:
sample = '|'.join(droplet_samples),
batch = '|'.join(well_batches),
rule all:
input:
expand('seacells/{sample}.obs.csv.gz', sample = droplet_samples),
expand('seacells/{batch}.obs.csv.gz', batch = well_batches)
rule seacell_droplet:
input:
h5ad = big_h5ad,
output:
obs = 'seacells/{sample}.obs.csv.gz',
seacell = 'seacells/{sample}.seacell_aggr.csv.gz'
shell:
"""
/data/mcgaugheyd/conda/envs/seacells/bin/python \
/home/mcgaugheyd/git/scEiaD/src/make_seacells.py \
{input} \
{wildcards.sample} \
{output} \
sample_accession
"""
rule seacell_well:
input:
h5ad = big_h5ad
output:
obs = 'seacells/{batch}.obs.csv.gz',
seacell = 'seacells/{batch}.seacell_aggr.csv.gz'
shell:
"""
/data/mcgaugheyd/conda/envs/seacells/bin/python \
/home/mcgaugheyd/git/scEiaD/src/make_seacells.py \
{input} \
{wildcards.batch} \
{output} \
batch
"""