Source code for zensols.mednlp.lib

"""Medical resource library that contains UMLS access, cui2vec etc..

"""
from __future__ import annotations
__author__ = 'Paul Landes'
from typing import Any, List, Dict, Tuple
from dataclasses import dataclass, field
from zensols.config import ConfigFactory
from . import MedCatResource, UTSClient


[docs] @dataclass class MedicalLibrary(object): """A utility class that provides access to medical APIs. """ config_factory: ConfigFactory = field(default=None) """The configuration factory used to create cTAKES and cui2vec instances. """ medcat_resource: MedCatResource = field(default=None) """The MedCAT factory resource.""" entity_linker_resource: 'EntityLinkerResource' = field(default=None) """The entity linker resource.""" uts_client: UTSClient = field(default=None) """Queries UMLS data."""
[docs] def get_entities(self, text: str) -> Dict[str, Any]: """Return the all concept entity data. :return: concepts as a multi-tiered dict """ return self.medcat_resource.cat.get_entities(text)
[docs] def get_linked_entity(self, cui: str) -> 'Entity': """Get a scispaCy linked entity. :param cui: the unique concept ID """ from .entlink import Entity ent: Entity = self.entity_linker_resource.get_linked_entity(cui) return ent
[docs] def get_atom(self, cui: str) -> Dict[str, str]: """Get the UMLS atoms of a CUI from UTS. :param cui: the concept ID used to query :param preferred: if ``True`` only return preferred atoms :return: a list of atom entries in dictionary form """ return self.uts_client.get_atoms(cui, preferred=True)
[docs] def get_relations(self, cui: str) -> List[Dict[str, Any]]: """Get the UMLS related concepts connected to a concept by ID. :param cui: the concept ID used to get related concepts :return: a list of relation entries in dictionary form in the order returned by UTS """ return self.uts_client.get_relations(cui)
[docs] def get_new_ctakes_parser_stash(self) -> 'CTakesParserStash': """Return a new instance of a ctakes parser stash. """ return self.config_factory.new_instance('ctakes_parser_stash')
@property def cui2vec_embedding(self) -> 'Cui2VecEmbedModel': """The cui2vec embedding model. """ return self.config_factory('cui2vec_500_embedding')
[docs] def similarity_by_term(self, term: str, topn: int = 5) -> \ List['EntitySimilarity']: """Return similaries of a medical term. :param term: the medical term (i.e. ``heart disease``) :param topn: the top N count similarities to return """ from .entlink import Entity, EntitySimilarity from .cui2vec import Cui2VecEmbedModel from gensim.models.keyedvectors import KeyedVectors embedding: Cui2VecEmbedModel = self.cui2vec_embedding kv: KeyedVectors = embedding.keyed_vectors res: List[Dict[str, str]] = self.uts_client.search_term(term) cui: str = res[0]['ui'] sims_by_word: List[Tuple[str, float]] = kv.similar_by_word(cui, topn) sims: List[EntitySimilarity] = [] for rel_cui, proba in sims_by_word: entity: Entity = self.get_linked_entity(rel_cui) # name: str = entity.canonical_name.lower() # defn: str = entity.definition sim: float = embedding.keyed_vectors.similarity(cui, rel_cui) sims.append(EntitySimilarity(entity.sci_spacy_entity, sim)) return sims