Skip to content

Commit

Permalink
be more explicit with GO terms from interpro (untested) #959
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer committed Sep 6, 2023
1 parent 10b3e84 commit e9a0107
Showing 1 changed file with 41 additions and 38 deletions.
79 changes: 41 additions & 38 deletions funannotate/aux_scripts/iprscan2annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@

def convertGOattribute(namespacein):
namespace = namespacein.upper()
if namespace == 'BIOLOGICAL_PROCESS':
attribute = 'go_process'
elif namespace == 'MOLECULAR_FUNCTION':
attribute = 'go_function'
elif namespace == 'CELLULAR_COMPONENT':
attribute = 'go_component'
if namespace == "BIOLOGICAL_PROCESS":
attribute = "go_process"
elif namespace == "MOLECULAR_FUNCTION":
attribute = "go_function"
elif namespace == "CELLULAR_COMPONENT":
attribute = "go_component"
else:
# print(f'Error parsing XML GO terms: {namespace} is not a valid term')
attribute = "go_unknown"
Expand All @@ -26,30 +26,29 @@ def convertGOattribute(namespacein):


def main():
'''Main step of intepro annotations to tab delimited script.'''
"""Main step of intepro annotations to tab delimited script."""

if len(sys.argv) < 2:
print("Usage: iprscan2annotations.py IPRSCAN.xml OUTPUT.annotations.txt")
sys.exit(1)

goDict = {}
for item in obo_parser.OBOReader(os.path.join(os.environ["FUNANNOTATE_DB"],
'go.obo')):
for item in obo_parser.OBOReader(
os.path.join(os.environ["FUNANNOTATE_DB"], "go.obo")
):
namespace = convertGOattribute(item.namespace)
goDict[item.id] = {'name': item.name,
'namespace': namespace}
goDict[item.id] = {"name": item.name, "namespace": namespace}
for nm in item.alt_ids: # also index by alt_id since that may be reported
goDict[nm] = {'name': item.name,
'namespace': namespace}
with open(sys.argv[2], 'w') as output:
goDict[nm] = {"name": item.name, "namespace": namespace}
with open(sys.argv[2], "w") as output:
with open(sys.argv[1]) as xml_file:
tree = etree.iterparse(xml_file)
for _, elem in tree:
if '}' in elem.tag:
elem.tag = elem.tag.split('}', 1)[1]
if "}" in elem.tag:
elem.tag = elem.tag.split("}", 1)[1]
for at in list(elem.attrib.keys()):
if '}' in at:
newat = at.split('}', 1)[1]
if "}" in at:
newat = at.split("}", 1)[1]
elem.attrib[newat] = elem.attrib[at]
del elem.attrib[at]
root = tree.root
Expand All @@ -60,20 +59,23 @@ def main():
gos = {}
signalp = []
for lv1 in hits:
if lv1.tag == 'xref':
name = lv1.get('id')
if lv1.tag == "xref":
name = lv1.get("id")
IDs.append(name)
if lv1.tag == 'matches':
for e in lv1.findall('.//entry'):
if not e.get('ac') in iprs:
iprs.append(e.get('ac'))
for g in lv1.findall('.//go-xref'):
cat, goID, desc = g.get(
'category'), g.get('id'), g.get('name')
if cat is None or desc is None:
if lv1.tag == "matches":
for e in lv1.findall(".//entry"):
if not e.get("ac") in iprs:
iprs.append(e.get("ac"))
for g in lv1.findall(".//go-xref"):
cat = g.get("category", None)
goID = g.get("id", None)
desc = g.get("name", None)
if not goID:
continue
if not cat or not desc:
if goID in goDict:
cat = goDict[goID]['namespace']
desc = goDict[goID]['name']
cat = goDict[goID]["namespace"]
desc = goDict[goID]["name"]
else:
continue
# cat = ""
Expand All @@ -86,23 +88,24 @@ def main():
gos[goID] = goHit
# signalp is processed elsewhere
# do we just skip this parsing even?
for s in lv1.findall('.//signalp-match'):
for lib in s.findall('.//signature-library-release'):
if lib.get('library') == "SIGNALP_EUK":
for loc in s.findall('.//signalp-location'):
for s in lv1.findall(".//signalp-match"):
for lib in s.findall(".//signature-library-release"):
if lib.get("library") == "SIGNALP_EUK":
for loc in s.findall(".//signalp-location"):
signalp.append(
(loc.get('start'), loc.get('end')))
(loc.get("start"), loc.get("end"))
)
# print out annotation file if IPR domains
if len(iprs) > 0:
for i in IDs:
for x in iprs:
output.write(f'{i}\tdb_xref\tInterPro:{x}\n')
output.write(f"{i}\tdb_xref\tInterPro:{x}\n")
if len(gos) > 0:
for i in IDs:
for goid in gos:
x = gos[goid]
GOID = x[2].replace('GO:', '')
output.write(f'{i}\t{x[0]}\t{x[1]}|{GOID}||IEA\n')
GOID = x[2].replace("GO:", "")
output.write(f"{i}\t{x[0]}\t{x[1]}|{GOID}||IEA\n")


if __name__ == "__main__":
Expand Down

0 comments on commit e9a0107

Please sign in to comment.