Source code for ckg.graphdb_builder.ontologies.parsers.oboParser

from collections import defaultdict
from ckg.graphdb_builder import builder_utils
import re


[docs]def parser(ontology, files): """ Multiple ontology database parser. This function parses and extracts relevant data from: Disease Ontology, Tissues, Human Phenotype Ontology, \ HUPO-PSI and Gene Ontology databases. :param str ontology: name of the ontology to be imported ('Disease', 'Tissue', 'Phenotype', 'Experiment', \ 'Modification', 'Molecular_interactions', 'Gene_ontology') :param list files: list of files downloaded from an ontology and used to generate nodes and relationships in the graph database. :return: Three nested dictionaries: terms, relationships between terms, and definitions of the terms. - terms: Dictionary where each key is an ontology identifier (*str*) and the values are lists of names and synonyms (*list[str]*). - relationships: Dictionary of tuples (*str*). Each tuple contains two ontology identifiers (source and target) and \ the relationship type between them. - definitions: Dictionary with ontology identifiers as keys (*str*), and definition of the terms as values (*str*). """ terms = defaultdict(list) relationships = defaultdict(set) definitions = defaultdict() for obo_file in files: oboGraph = builder_utils.convertOBOtoNet(obo_file) namespace = ontology for term, attr in oboGraph.nodes(data=True): if "namespace" in attr: namespace = attr["namespace"] if namespace not in terms: terms[namespace] = defaultdict(list) if "name" in attr: terms[namespace][term].append(attr["name"]) else: terms[namespace][term].append(term) if "synonym" in attr: for s in attr["synonym"]: terms[namespace][term].append(re.match(r'\"(.+?)\"', s).group(1)) if "xref" in attr: for s in attr["xref"]: terms[namespace][term].append(s) if "def" in attr: definitions[term] = attr["def"].replace('"', '') else: definitions[term] = terms[namespace][term][0] if "is_a" in attr: for isa in attr["is_a"]: relationships[namespace].add((term, isa, "HAS_PARENT")) return terms, relationships, definitions