Skip to content

Commit

Permalink
Merge pull request #937 from nextgenusfs/util_gbk2parts_add
Browse files Browse the repository at this point in the history
Util gbk2parts add cds dump
  • Loading branch information
hyphaltip committed Jul 19, 2023
2 parents eac3691 + b7cd660 commit 667e55c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 20 deletions.
28 changes: 15 additions & 13 deletions funannotate/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import textwrap
import errno
import datetime
import traceback
from natsort import natsorted
import funannotate.resources as resources
from funannotate.interlap import InterLap
Expand Down Expand Up @@ -2145,7 +2146,10 @@ def RevComp(s):
s = s.upper()
for i in range(0, n):
c = s[n - i - 1]
cseq += rev_comp_lib[c]
if c not in rev_comp_lib:
sys.stderr.write(f'Reverse complement of {c} failing on {s} len(s) = {n}\n')
else:
cseq += rev_comp_lib[c]
return cseq


Expand Down Expand Up @@ -2373,10 +2377,7 @@ def _sortDict(d):
Transcript = str(v["transcript"][i])
except IndexError:
sys.stderr.write(
"Index Error retriving transcript {}: ({}, {})\n".format(
i, k, v
)
)
f"Index Error retriving transcript {i}: ({k}, {v})\n")
if v["strand"] == "-":
Transcript = RevComp(Transcript)
tranout.write(">%s %s\n%s\n" % (x, k, softwrap(Transcript)))
Expand Down Expand Up @@ -2712,12 +2713,13 @@ def dict2nucleotides2(input, prots, trans, cdstrans):
except IndexError:
pass
try:
CDStranscript = str(v["cds_transcript"][i])
if v["strand"] == "-":
CDStranscript = RevComp(CDStranscript)
cdsout.write(
">{:} {:}\n{:}\n".format(x, k, softwrap(CDStranscript))
)
cds = v["cds_transcript"][i]
if cds and len(cds) > 0:
CDStranscript = str(cds)
if v["strand"] == "-":
CDStranscript = RevComp(CDStranscript)
cdsout.write(
">{:} {:}\n{:}\n".format(x, k, softwrap(CDStranscript)))
except IndexError:
pass
if v["type"] == "mRNA":
Expand Down Expand Up @@ -4033,7 +4035,7 @@ def gb2gffnuc(input, gff, prots, trans, dna):
return len(genes)


def gb2parts(input, tbl, gff, prots, trans, dna):
def gb2parts(input, tbl, gff, prots, trans, cds, dna):
"""
function returns a dictionary of all gene models from a genbank file this function
can handle multiple transcripts per locus/gene
Expand Down Expand Up @@ -4062,7 +4064,7 @@ def gb2parts(input, tbl, gff, prots, trans, dna):
# write gff3 output
dict2gff3_old(genes, gff)
# write to protein and transcripts
dict2nucleotides(genes, prots, trans)
dict2nucleotides2(genes, prots, trans, cds)
return len(genes)


Expand Down
14 changes: 7 additions & 7 deletions funannotate/utilities/gbk2parts.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ def __init__(self, prog):
args = parser.parse_args(args)

# setup output files
tblout = args.output+'.tbl'
gffout = args.output+'.gff3'
protout = args.output+'.proteins.fasta'
transout = args.output+'.transcripts.fasta'
dnaout = args.output+'.scaffolds.fasta'
lib.gb2parts(args.gbk, tblout, gffout, protout, transout, dnaout)

tblout = f'{args.output}.tbl'
gffout = f'{args.output}.gff3'
protout = f'{args.output}.proteins.fa'
transout = f'{args.output}.mrna-transcripts.fa'
cdsout = f'{args.output}.cds-transcripts.fa'
dnaout = f'{args.output}.scaffolds.fa'
lib.gb2parts(args.gbk, tblout, gffout, protout, transout, cdsout, dnaout)

if __name__ == "__main__":
main(sys.argv[1:])

0 comments on commit 667e55c

Please sign in to comment.