Skip to content

Commit

Permalink
fix final annotation table; orthologs and transfactors
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Sep 16, 2016
1 parent 1ba402a commit 74c367f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
21 changes: 16 additions & 5 deletions bin/funannotate-compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def __init__(self,prog):
eggnog.append(lib.getEggNogfromNote(GBK))
scinames.append(stats[i][0].replace(' ', '_'))


#convert busco to dictionary
busco = lib.busco_dictFlip(busco)

Expand Down Expand Up @@ -515,8 +514,9 @@ def __init__(self,prog):
os.makedirs(os.path.join(args.out, 'tfs'))
#should be able to pull transcription factor counts from InterPro Domains, load into pandas df
iprTF = os.path.join(parentdir, 'lib', 'tf_interpro.txt')

tf = pd.read_csv(iprTF, names=['InterPro', 'Description'])
#convert to dictionary for all annotations later
TFDict = tf.set_index('InterPro')['Description'].to_dict()
iprall = IPRdf.transpose()
iprall.reset_index(inplace=True)
dfmerged = pd.merge(tf,iprall, left_on='InterPro', right_on='index', how='left')
Expand Down Expand Up @@ -756,7 +756,7 @@ def __init__(self,prog):
for line in input:
line = line.replace('\n', '')
col = line.split('\t')
genes = col[1].split(',')
genes = col[-1].split(', ')
for i in genes:
orthoDict[i] = col[0]

Expand Down Expand Up @@ -785,9 +785,16 @@ def __init__(self,prog):
meropsDict = lib.dictFlip(merops)
cazyDict = lib.dictFlip(cazy)

#get Transcription factors in a dictionary
TFLookup = {}
for k,v in iprDict.items():
for x in v:
IPRid = x.split(':')[0]
if IPRid in TFDict:
TFLookup[k] = TFDict.get(IPRid)

table = []
header = ['GeneID','scaffold:start-end','strand','length','description', 'Ortho Group', 'EggNog', 'BUSCO', 'Secreted', 'Protease family', 'CAZyme family', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
header = ['GeneID','scaffold:start-end','strand','length','description', 'Ortho Group', 'EggNog', 'BUSCO', 'Secreted', 'Protease family', 'CAZyme family', 'Transcription factor', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
for y in range(0,num_input):
outputname = os.path.join(args.out, 'annotations', scinames[y]+'.all.annotations.tsv')
with open(outputname, 'w') as output:
Expand Down Expand Up @@ -843,6 +850,10 @@ def __init__(self,prog):
orthogroup = orthoDict.get(ID)
else:
orthogroup = ''
if ID in TFLookup:
transfactor = TFLookup.get(ID)
else:
transfactor = ''
for k,v in f.qualifiers.items():
if k == 'note':
notes = v[0].split('; ')
Expand All @@ -855,7 +866,7 @@ def __init__(self,prog):
if i.startswith('SMCOG:'):
smcog = i

final_result = [ID, location, strand, str(length), description, orthogroup, egg, buscogroup, signalphit, meropsdomains, cazydomains, IPRdomains, pfamdomains, goTerms, cluster, smcog]
final_result = [ID, location, strand, str(length), description, orthogroup, egg, buscogroup, signalphit, meropsdomains, cazydomains, transfactor, IPRdomains, pfamdomains, goTerms, cluster, smcog]
output.write("%s\n" % ('\t'.join(final_result)))
############################################

Expand Down
2 changes: 1 addition & 1 deletion funannotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
for i in range(0,num_lines))
return "\n".join(lines)

version = '0.3.8'
version = '0.3.9'

default_help = """
Usage: funannotate <command> <arguments>
Expand Down

0 comments on commit 74c367f

Please sign in to comment.