Skip to content

Commit

Permalink
add --trnascan option to predict #523
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer committed Dec 31, 2020
1 parent 05b1810 commit f0bc925
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
26 changes: 21 additions & 5 deletions funannotate/library.py
Expand Up @@ -6394,13 +6394,28 @@ def SystemInfo():
(system_os, multiprocessing.cpu_count(), MemoryCheck(), python_vers))


def runtRNAscan(input, tmpdir, output, cpus=1):
def runtRNAscan(input, tmpdir, output, cpus=1, precalc=False):
tRNAout = os.path.join(tmpdir, 'tRNAscan.out')
tRNAlenOut = os.path.join(tmpdir, 'tRNAscan.len-filtered.out')
if os.path.isfile(tRNAout): # tRNAscan can't overwrite file, so check
os.remove(tRNAout)
cmd = ['tRNAscan-SE', '-o', tRNAout, '--thread', str(cpus), input]
runSubprocess(cmd, '.', log)
if not precalc:
if os.path.isfile(tRNAout): # tRNAscan can't overwrite file, so check
os.remove(tRNAout)
cmd = ['tRNAscan-SE', '-o', tRNAout, '--thread', str(cpus), input]
log.debug(' '.join(cmd))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
if proc.returncode != 0:
log.error('CMD ERROR: {}'.format(' '.join(cmd)))
if stdout:
log.debug(stdout.decode("utf-8"))
if stderr:
log.debug(stderr.decode("utf-8"))
else:
shutil.copyfile(precalc, tRNAout)
if not checkannotations(tRNAout):
log.info('tRNAscan-SE seems to have failed, check logfile for error. You can pass precalculated results to --trnascan')
return False
# enforce NCBI length rules
with open(tRNAlenOut, 'w') as lenOut:
with open(tRNAout, 'r') as infile:
Expand All @@ -6424,6 +6439,7 @@ def runtRNAscan(input, tmpdir, output, cpus=1):
trna2gff = os.path.join(parentdir, 'aux_scripts', 'trnascan2gff3.pl')
with open(output, 'w') as out:
subprocess.call(['perl', trna2gff, '--input', tRNAlenOut], stdout=out)
return True


def runtbl2asn(folder, template, discrepency, organism, isolate, strain, parameters, version):
Expand Down
28 changes: 20 additions & 8 deletions funannotate/predict.py
Expand Up @@ -131,6 +131,8 @@ def __init__(self, prog):
help='Option for p2g on which prefilter')
parser.add_argument('--no-progress', dest='progress', action='store_false',
help='no progress on multiprocessing')
parser.add_argument('--trnascan',
help='Pre-computed tRNAScan results')
args = parser.parse_args(args)

parentdir = os.path.join(os.path.dirname(__file__))
Expand Down Expand Up @@ -1763,17 +1765,27 @@ def __init__(self, prog):
lib.log.info('{:,} gene models remaining'.format(total))

# run tRNAscan
lib.log.info("Predicting tRNAs")
tRNAscan = os.path.join(args.out, 'predict_misc', 'trnascan.gff3')
if not os.path.isfile(tRNAscan):
lib.runtRNAscan(MaskGenome, os.path.join(
args.out, 'predict_misc'), tRNAscan, cpus=args.cpus)
if args.trnascan:
lib.log.info("Existing tRNAscan results passed: {}".format(args.trnascan))
trna_result = lib.runtRNAscan(MaskGenome, os.path.join(args.out, 'predict_misc'),
tRNAscan, cpus=args.cpus, precalc=args.trnascan)
else:
lib.log.info("Predicting tRNAs")
if not os.path.isfile(tRNAscan):
trna_result = lib.runtRNAscan(MaskGenome, os.path.join(args.out, 'predict_misc'),
tRNAscan, cpus=args.cpus)
else:
trna_result = True

# combine tRNAscan with EVM gff, dropping tRNA models if they overlap with EVM models
cleanTRNA = os.path.join(args.out, 'predict_misc',
'trnascan.no-overlaps.gff3')
lib.validate_tRNA(tRNAscan, EVMCleanGFF, AssemblyGaps, cleanTRNA)
lib.log.info("{:,} tRNAscan models are valid (non-overlapping)".format(lib.countGFFgenes(cleanTRNA)))
cleanTRNA = os.path.join(args.out, 'predict_misc', 'trnascan.no-overlaps.gff3')
if trna_result:
lib.validate_tRNA(tRNAscan, EVMCleanGFF, AssemblyGaps, cleanTRNA)
lib.log.info("{:,} tRNAscan models are valid (non-overlapping)".format(lib.countGFFgenes(cleanTRNA)))
else:
with open(cleanTRNA, 'w') as outfile:
outfile.write('##gff-version 3\n')

# load EVM models and tRNAscan models, output tbl annotation file
lib.log.info("Generating GenBank tbl annotation file")
Expand Down
1 change: 1 addition & 0 deletions scripts/funannotate
Expand Up @@ -193,6 +193,7 @@ Optional:
--transcript_alignments Pre-computed transcript alignments in GFF3 format
--augustus_gff Pre-computed AUGUSTUS GFF3 results (must use --stopCodonExcludedFromCDS=False)
--genemark_gtf Pre-computed GeneMark GTF results
--trnascan Pre-computed tRNAscanSE results
--min_intronlen Minimum intron length. Default: 10
--max_intronlen Maximum intron length. Default: 3000
Expand Down

0 comments on commit f0bc925

Please sign in to comment.