-
Notifications
You must be signed in to change notification settings - Fork 6
/
find_gene_via_csv.py
69 lines (64 loc) · 1.95 KB
/
find_gene_via_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python
# coding=utf-8
import os
import csv
import click
import collections
@click.command()
@click.option(
"-g", "--gff", type=str, help="gff file (It can only contain one chromosome.)"
)
@click.option(
"-p", "--position", type=str, help="chromosome name and snp position file(chr1 123456789)"
)
@click.option(
"-w", "--width", default=1000000, type=int, help="Total length of upstream and downstream of SNP locus (default 1M)"
)
@click.option(
"-o",
"--out",
default="out.txt",
type=str,
help="Output file (default 'out.txt')")
def command_line_runner(gff,position,width,out):
"""
According to the results of GWAS, the genes of upstream and downstream 1M of SNP locus were selected.
"""
_VERSION = "1.1.1"
try:
print("--------------------------------------------\nLooking for genes...\nThe gene and iterative position will be saved in {}".format(out))
mydcit = collections.defaultdict(list)
posnow =[]
iloc3 =[]
iloc4 =[]
iloc8 =[]
with open(gff) as f,open(position) as pos:
f_csv = csv.reader(f,delimiter='\t')
for i in f_csv:
iloc3.append(float(i[3])) #select position and gene
iloc4.append(float(i[4]))
iloc8.append(i[8])
df_all = zip(iloc3,iloc4,iloc8)
pos_csv = csv.reader(pos,delimiter='\t')
for i in pos_csv:
posnow.append(float(i[1]))
outfile = open(out,'w')
for z in df_all:
for a in posnow:
snp = int(a)
snpdown = snp - width/2 #set downstream
for i in range(0,width+1,1000):
snpnow = snpdown +i
if z[0] <= snpnow < z[1]:
mydcit[z[2]].append(snpnow)
outfile.write("gene\tposition\n")
for key in mydcit.keys():
outfile.write("{}\t{}\n".format(key,mydcit[key]))
number = len(mydcit.keys())
print("There are {} gene in {}".format(number,out))
except FileNotFoundError:
print("The file you inputed was not found")
except:
print("Please input your option, or use --help")
if __name__ == "__main__":
command_line_runner()