Skip to content

Commit

Permalink
add missing files you forgot about
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Dec 13, 2016
1 parent 0d7cdf5 commit 0abf4d9
Show file tree
Hide file tree
Showing 3 changed files with 2,125 additions and 0 deletions.
31 changes: 31 additions & 0 deletions lib/codeml.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
seqfile = INPUTFILEHERE
treefile = INPUTTREEHERE
outfile = OUTPUTFILEHERE
noisy = 9 * 0,1,2,3,9: how much rubbish on the screen
verbose = 1 * 1: detailed output, 0: concise output
runmode = 0 * 0: user tree; 1: semi-automatic; 2: automatic
* 3: StepwiseAddition; (4,5):PerturbationNNI; -2: pairwise

seqtype = 1 * 1:codons; 2:AAs; 3:codons-->AAs
CodonFreq = 2 * 0:1/61 each, 1:F1X4, 2:F3X4, 3:codon table
clock = 0 * 0: no clock, unrooted tree, 1: clock, rooted tree
aaDist = 0 * 0:equal, +:geometric; -:linear, {1-5:G1974,Miyata,c,p,v}
model = 0

NSsites = 0
* 0:one w; 1:NearlyNeutral; 2:PositiveSelection; 3:discrete;
* 4:freqs; 5:gamma;6:2gamma;7:beta;8:beta&w;9:betaγ10:3normal
icode = 0 * 0:standard genetic code; 1:mammalian mt; 2-10:see below
Mgene = 0 * 0:rates, 1:separate; 2:pi, 3:kappa, 4:all

fix_kappa = 0 * 1: kappa fixed, 0: kappa to be estimated
kappa = 1 * initial or fixed kappa
fix_omega = 0 * 1: omega or omega_1 fixed, 0: estimate
omega = 1 * initial or fixed omega, for codons or codon-based AAs
ncatG = 10 * # of categories in the dG or AdG models of rates

getSE = 0 * 0: don't want them, 1: want S.E.s of estimates
RateAncestor = 0 * (0,1,2): rates (alpha>0) or ancestral states (1 or 2)
Small_Diff = .45e-6
cleandata = 1 * remove sites with ambiguity data (1:yes, 0:no)?
fix_blength = 0 * 0: ignore, -1: random, 1: initial, 2: fixed
96 changes: 96 additions & 0 deletions util/add2outgroups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python

import sys, os, subprocess, shutil, argparse, inspect
from Bio import SeqIO
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
import lib.library as lib


#setup menu with argparse
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter):
def __init__(self, prog):
super(MyFormatter, self).__init__(prog, max_help_position=48)
parser = argparse.ArgumentParser(prog='funannotate-predict.py', usage="%(prog)s [options] -i genome.fasta",
description = '''Script that adds a proteome to the outgroups.''',
epilog = """Written by Jon Palmer (2016) nextgenusfs@gmail.com""",
formatter_class = MyFormatter)
parser.add_argument('-i', '--input', required=True, help='Proteome in FASTA format')
parser.add_argument('--species', required=True, help='Species name "binomial in quotes"')
parser.add_argument('--busco_db', default='dikarya', required=True, help='BUSCO database to use')
parser.add_argument('--cpus', default=2, type=int, help='Number of CPUs to use')
args=parser.parse_args()

#get base name
species = args.species.replace(' ', '_').lower()+'.'+args.busco_db
OUTGROUPS = os.path.join(parentdir, 'DB', 'outgroups')

#create log file
log_name = species+'-add2outgroups.log'
if os.path.isfile(log_name):
os.remove(log_name)

#initialize script, log system info and cmd issue at runtime
lib.setupLogging(log_name)
FNULL = open(os.devnull, 'w')
cmd_args = " ".join(sys.argv)+'\n'
lib.log.debug(cmd_args)
print "-------------------------------------------------------"
lib.SystemInfo()

#get version of funannotate
version = lib.get_version()
lib.log.info("Running %s" % version)

#check buscos, download if necessary
if not os.path.isdir(os.path.join(parentdir, 'DB', args.busco_db)):
lib.download_buscos(args.busco_db)

ProtCount = lib.countfasta(args.input)
lib.log.info('{0:,}'.format(ProtCount) + ' protein records loaded')

#convert to proteins and screen with busco
lib.log.info("Looking for BUSCO models with %s DB" % args.busco_db)
BUSCODB = os.path.join(parentdir, 'DB', args.busco_db)
BUSCO = os.path.join(parentdir, 'util', 'funannotate-BUSCO2.py')
cmd = [sys.executable, BUSCO, '-i', os.path.abspath(args.input), '-m', 'proteins', '--lineage', BUSCODB, '-o', species, '--cpu', str(args.cpus), '-f']
lib.runSubprocess(cmd, '.', lib.log)

#check that it ran correctly
busco_results = os.path.join('run_'+species, 'full_table_'+species+'.tsv')
if not lib.checkannotations(busco_results):
lib.log.error("BUSCO failed, check logfile")
sys.exit(1)
nameChange = {}
with open(busco_results, 'rU') as input:
for line in input:
if line.startswith('#'):
continue
cols = line.split('\t')
if cols[1] == 'Complete':
if not cols[2] in nameChange:
nameChange[cols[2]] = cols[0]
else:
lib.log.error("Duplicate ID found: %s %s. Removing from results" % (cols[2], cols[0]))
del nameChange[cols[2]]

#output counts
lib.log.info('{0:,}'.format(len(nameChange)) + ' BUSCO models found')

#index the proteome for parsing
SeqRecords = SeqIO.index(args.input, 'fasta')

#setup output proteome
busco_out = os.path.join(OUTGROUPS, species+'_buscos.fa')
with open(busco_out, 'w') as output:
for k,v in nameChange.items():
rec = SeqRecords[k]
output.write('>%s\n%s\n' % (v, rec.seq))
lib.log.info("Results written to: %s" % busco_out)

#clean up your mess
shutil.rmtree('run_'+species)
shutil.rmtree('tmp')


0 comments on commit 0abf4d9

Please sign in to comment.