-
Notifications
You must be signed in to change notification settings - Fork 1
/
plinkAnno.py
28 lines (21 loc) · 832 Bytes
/
plinkAnno.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# A script that takes a USCS data dump, tab delimited, and create files plink uses to
# update base pair and chromosomes annotation.
# Only keeps SNPs on chromosomes 1-23, X & Y
# Usage: python input.txt
# Output: input_bp.txt & input_chr.txt
import sys
krom=set()
for i in range(1,23):
krom.add(''.join(['chr',str(i)]))
krom.add('chrX')
krom.add('chrY')
outfile=open(''.join([sys.argv[1],str('.txt'),'_chr.txt']),'w')
for line in open(sys.argv[1],'r'):
if line.split('\t')[1] in krom:
outfile.writelines('\t'.join([line.split('\t')[4],line.split('\t')[1].strip('chr')])+'\n')
outfile.close()
outfile=open(''.join([sys.argv[1],str('.txt'),'_bp.txt']),'w')
for line in open(sys.argv[1],'r'):
if line.split('\t')[1] in krom:
outfile.writelines('\t'.join([line.split('\t')[4],line.split('\t')[2]])+'\n')
outfile.close()