Skip to content

Commit

Permalink
bug fix and removal of python 3 compatibility due to encoding errs
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanagb committed Oct 13, 2016
1 parent c6ae8f8 commit 0c48d5a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 19 deletions.
1 change: 1 addition & 0 deletions getRandomPosition.py
Expand Up @@ -17,6 +17,7 @@ def getRandomPosition(buf,genomeLength, thresholdDict,names,lengthDict):
k = 1 #a counter to get the next chromosome threshold.
if chrom == 'chr1':
t=0
r=thresholdDict[chrom]
else:
t=thresholdDict[chrom]-lengthDict[chrom]
r=thresholdDict[chrom]
Expand Down
Binary file added releases/v1.0.1/SiLiCO-1.0.1.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -8,14 +8,14 @@
import setuptools

setup(name='SiLiCO',
version='1.0.0',
version='1.0.1',
description='A Simulator of Long Read Sequencing in PacBio and Oxford Nanopore',
author='Ethan Alexander Garcia Baker',
author_email='ethanagbaker@pitt.edu',
url='https://ethanagbaker.github.io',
license='GNU',
packages=setuptools.find_packages(),
py_modules=['simulateReads','convertToFasta','generateChrDist','splitGenomeFasta','getRandomPosition','findChromosome', 'SiLiCO'],
classifiers=['Programming Language :: Python :: 2.7.11', 'Programming Language :: Python :: 3.5','License :: OSI Approved :: GNU GPL'],
classifiers=['Programming Language :: Python :: 2.7.11','License :: OSI Approved :: GNU GPL'],
install_requires=['numpy', 'natsort','pybedtools','pysam==0.8.4']
)
34 changes: 17 additions & 17 deletions simulateReads.py
Expand Up @@ -15,7 +15,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
#Initialize variables
y=0
loopController=True

#Open the index file (chrdist.td)
with open("SiLiCO_Scratch/chrdist.td",'r') as infile:
lengths = []
Expand All @@ -27,7 +27,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
names.append(str(name))
infile.close()
lengthDict = dict(zip(names,lengths))
genomeLength=0
genomeLength=0
names=natsorted(names) #A sorted list of chromosome names.

#Calculate the total genome length
Expand All @@ -36,7 +36,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s

chrCount = len(lengths)
thresholdDict={} #A dictionary of chromome thresholds (defined as end of a chromsome as determined by length from chrdist.td)
correctionDict={} #A correction to get the start of the chromosome. Important for generating bed file.
correctionDict={} #A correction to get the start of the chromosome. Important for generating bed file.

#Generate the threshold dictionary.

Expand All @@ -45,7 +45,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
name = str(names[chrom])#this name will go as key in dict
if name =="chr1":
thresholdDict[name]=lengthDict[name]

else:
threshVal = 0
correctedVal = 0
Expand All @@ -54,7 +54,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
name2 = "chr"+str(i)
threshVal += lengthDict[name2]
i += 1
correctedVal = threshVal - lengthDict[name2]
correctedVal = threshVal - lengthDict[name2]
thresholdDict[name2] = threshVal

#Build correction dictionary
Expand All @@ -66,7 +66,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
prevChromName = str(names[j-1])
correctionDict[chromName] = thresholdDict[prevChromName]

#Some calculations for the appropriate distribution.
#Some calculations for the appropriate distribution.
print("Calculating distribution parameters...")

if seqMode == 1: #PacBio/LogNormal
Expand All @@ -83,7 +83,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
readlengths=np.random.lognormal(mu,sigma,req_reads) #Read lengths are randomly determined from the calculated log-normal distribution.
read_pos=[]
name_counter = 0

outfile = gzip.open(str(outfileName) + '/simulated_read_positions_trial_'+str(trial_counter) +'.bed.gz','wb')
for length in readlengths:
while True:
Expand All @@ -93,15 +93,15 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
#print(str(y))
start_pos = int(y-buf)
end_pos = int(y+buf)

#Figure out which chromosome this is in
selected_chrom = findChromosome(start_pos,names,thresholdDict)
end_pos_chrom = findChromosome(end_pos,names,thresholdDict)
if selected_chrom == end_pos_chrom:
outfile.write(str(selected_chrom) + '\t' + str(start_pos-correctionDict[str(selected_chrom)]) + '\t' + str(end_pos-correctionDict[str(selected_chrom)]) + '\t' + 'trial_'+str(trial_counter) +'_sim_read_' + str(name_counter) + '\n')
break

#count this run and reset reused variables.
#count this run and reset reused variables.
name_counter+=1
x=None
y=None
Expand All @@ -111,12 +111,12 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
outfile.close()
trial_counter+=1
print("Completed trial " + str(trial_counter) + " of " + str(trialCount) + ". ("+ str(100*(float(trial_counter)/trialCount)) + "%)")

elif seqMode == 2: #nanopore/gamma
scale = float(std)/mean
shape = (mean**2)/float(std)
req_reads = int((int(desired_cov)*genomeLength)/float(mean))

#Begin generating in-silico reads
trial_counter = 1
while trial_counter <= int(trialCount):
Expand All @@ -126,7 +126,7 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
readlengths=np.random.gamma(shape,scale,req_reads) #Read lengths are randomly determined from the calculated log-normal distribution.
read_pos=[]
name_counter = 0

outfile = gzip.open(str(outfileName) + '/simulated_read_positions_trial_'+str(trial_counter) +'.bed.gz','wb')
for length in readlengths:
while True:
Expand All @@ -135,15 +135,15 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
y=getRandomPosition(buf,genomeLength,thresholdDict,names,lengthDict)
start_pos = int(y-buf)
end_pos = int(y+buf)

#Figure out which chromosome this is in
selected_chrom = findChromosome(start_pos,names,thresholdDict)
end_pos_chrom = findChromosome(end_pos,names,thresholdDict)
if selected_chrom == end_pos_chrom: #Check to ensure that read lengths are not bridging chromosomes
outfile.write(str(selected_chrom) + '\t' + str(start_pos-correctionDict[str(selected_chrom)]) + '\t' + str(end_pos-correctionDict[str(selected_chrom)]) + '\t' + 'trial_'+str(trial_counter) +'_sim_read_' + str(name_counter) + '\n')
break

#count this run and reset reused variables.
#count this run and reset reused variables.
name_counter+=1
x=None
y=None
Expand All @@ -153,8 +153,8 @@ def simulateReads(infileName,outfileName,mean,std,desired_cov,trialCount,names,s
outfile.close()
trial_counter+=1
print("Completed trial " + str(trial_counter) + " of " + str(trialCount) + ". ("+ str(100*(float(trial_counter)/trialCount)) + "%)")



if __name__ == "__main__":
simulateReads(infileName,outfileName,mean,std,desired_cov,trials,names,seqMode)
simulateReads(infileName,outfileName,mean,std,desired_cov,trials,names,seqMode)

0 comments on commit 0c48d5a

Please sign in to comment.