Source code for ckg.graphdb_builder.ontologies.parsers.efoParser

from ckg.graphdb_builder import builder_utils
from collections import defaultdict
import re


[docs]def parser(ontology_files): terms = defaultdict(list) relationships = defaultdict(set) mappings = defaultdict(set) definitions = defaultdict() for obo_file in ontology_files: with open(obo_file, encoding="utf-8") as f: oboGraph = builder_utils.convertOBOtoNet(f) namespace = "EFO" for term, attr in oboGraph.nodes(data=True): if term.startswith("EFO:"): if "namespace" in attr: namespace = attr["namespace"] if namespace not in terms: terms[namespace] = defaultdict(list) if "name" in attr: terms[namespace][term].append(attr["name"].replace('"', '')) else: terms[namespace][term].append(term) if "synonym" in attr: for s in attr["synonym"]: terms[namespace][term].append(re.match(r'\"(.+?)\"', s).group(1)) if "xref" in attr: for s in attr["xref"]: xref = None if s.startswith('DOID:'): xref = 'Disease' elif s.startswith('SNOMEDCT:'): xref = 'Clinical_variable' s = s.split(':')[1] elif s.startswith('HP:'): xref = 'Phenotype' if xref is not None: mappings["Experimental_factor_maps_to_"+xref].add((term, s, "MAPS_TO")) if "def" in attr: definitions[term] = attr["def"].replace('"', '') else: definitions[term] = terms[namespace][term][0] if "is_a" in attr: for isa in attr["is_a"]: relationships[namespace].add((term, isa, "HAS_PARENT")) return (terms, relationships, definitions), mappings