Skip to content

Commit

Permalink
Bug fixes (#16)
Browse files Browse the repository at this point in the history
* fix import for folder

* More bug fixes
  • Loading branch information
kescobo committed May 28, 2016
1 parent 1c0226a commit a672cca
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ Type "help", "copyright", "credits" or "license" for more information.
>>> import run
>>> run.import_data()
>>> run.blast_db()
>>> run.blast()
>>> run.analyze(0.99)
```
13 changes: 8 additions & 5 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,25 @@


def import_data():
mongo_import_genbank(INPUT, "genes") # Perhaps settings.py should include option for collection name?
for f in os.listdir(INPUT):
if f.endswith(".gb") or f.endswith(".gbk"):
print("** Importing {}".format(f))
mongo_import_genbank(os.path.join(INPUT, f), "genes") # Perhaps settings.py should include option for collection name?

def blast_db():
fasta = db_cds_to_fna('genes') # Collection name option? (see ln9 above)
fasta = db_cds_to_fna('genes') # Collection name option? (see ln15 above)

# Make separate directory in output for Blast databases. Will probably do this for multiple outputs, might be good
# to have a function in `Analysis.output`
db_path = os.path.join(OUTPUT, MONGODB.name, "blast_db")
if not os.path.isdir(db_path):
os.makedirs(db_path)

make_blast_db(fasta.name, "nucl", os.path.join(db_path, "genes")) # Collection name option? (see ln10 above)
make_blast_db(fasta.name, "nucl", os.path.join(db_path, "genes")) # Collection name option? (see ln15 above)


def blast():
fasta = db_cds_to_fna('genes') # Collection name option? (see ln10 above)
fasta = db_cds_to_fna('genes') # Collection name option? (see ln15 above)
db_path = os.path.join(OUTPUT, MONGODB.name, "blast_db", "genes")

blast_results = blast_all(fasta, db_path)
Expand All @@ -34,7 +37,7 @@ def blast():
def analyze(minimum_identity, minimum_length=500, dist_between_hits=5000):
groups = output.hgt_groups(minimum_identity, minimum_length, dist_between_hits)
output.output_groups(
groups, os.path.join(
groups, MONGODB.name, os.path.join(
OUTPUT, "{}-{}-{}-groups.csv".format(
int((minimum_identity)*100), minimum_length, dist_between_hits
)
Expand Down
4 changes: 2 additions & 2 deletions src/DataImport/gb_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def parse_genbank(genbank_file):
records = SeqIO.parse(in_handle, 'gb')

# ToDo: Need a log file to record added locus_tags, species names etc, preferably with a way to reference back to original file
for record in add_contig_data(records):
for record in add_contig_data(records, genbank_file):
yield record


Expand All @@ -36,7 +36,7 @@ def check_16S(feature):
else:
return False

def add_contig_data(records):
def add_contig_data(records, genbank_file):
contig_counter = 0
contig_ids = []

Expand Down

0 comments on commit a672cca

Please sign in to comment.