Source code for zensols.mednlp.entlink
"""Contains the classes for the medical token type and others.
"""
__author__ = 'Paul Landes'
from typing import List, Dict, Any, ClassVar
from dataclasses import dataclass, field, InitVar
import logging
from scispacy.linking import EntityLinker
from scispacy.linking_utils import Entity as SciSpacyEntity
from zensols.persist import persisted, PersistedWork
from zensols.config import Dictable
from zensols.nlp import FeatureToken, FeatureTokenDecorator
from . import MedicalLibrary
logger = logging.getLogger(__name__)
[docs]
@dataclass
class Entity(Dictable):
"""A convenience container class that Wraps a SciSpacy entity.
"""
_DICTABLE_ATTRIBUTES: ClassVar[List[str]] = 'cui name definition'.split()
sci_spacy_entity: SciSpacyEntity = field(repr=False)
"""The entity identified by :mod:`scispacy.linking_utils`."""
@property
def name(self) -> str:
"""The canonical name of the entity."""
return self.sci_spacy_entity.canonical_name
@property
def definition(self) -> str:
"""The human readable description of the entity."""
return self.sci_spacy_entity.definition
@property
def cui(self) -> str:
"""The unique concept identifier."""
return self.sci_spacy_entity.concept_id
def __str__(self) -> str:
return f'{self.name} ({self.cui})'
def __repr__(self):
return self.cui
[docs]
@dataclass
class EntitySimilarity(Entity):
"""A similarity measure of a medical concept in cui2vec.
:see: :meth:`.MedCatFeatureDocumentParser.similarity_by_term`
"""
similiarty: float = field()
[docs]
@dataclass
class EntityLinkerResource(object):
"""Provides a way resolve :class:`scispacy.linking_utils.Entity` instances
from CUIs.
:see: :meth:`.get_linked_entity`
"""
params: Dict[str, Any] = field(
default_factory=lambda: {'resolve_abbreviations': True,
'linker_name': 'umls'})
"""Parameters given to the scispaCy entity linker."""
cache_global: InitVar[bool] = field(default=True)
"""Whether or not to globally cache resources, which saves load time.
"""
def __post_init__(self, cache_global: bool):
self._linker = PersistedWork(
'_linker', self, cache_global=cache_global)
@property
@persisted('_linker')
def linker(self) -> EntityLinker:
"""The ScispaCy entity linker."""
self._silence_scispacy_warn()
return EntityLinker(**self.params)
@staticmethod
def _silence_scispacy_warn():
"""This warning has should have no bearing on this application as we're
simply doing a CUI looking.
"""
import warnings
s = '.*Trying to unpickle estimator Tfidf(?:Transformer|Vectorizer) from version.*'
warnings.filterwarnings('ignore', message=s)
[docs]
def get_linked_entity(self, cui: str) -> Entity:
"""Get a scispaCy linked entity.
:param cui: the unique concept ID
"""
linker: EntityLinker = self.linker
se: SciSpacyEntity = linker.kb.cui_to_entity.get(cui)
if se is not None:
return Entity(se)
[docs]
@dataclass
class LinkFeatureTokenDecorator(FeatureTokenDecorator):
"""Adds linked SciSpacy definitions to tokens using the
:class:`.MedicalLibrary`.
"""
lib: MedicalLibrary = field(default=None)
"""The medical library used for linking entities."""
[docs]
def decorate(self, token: FeatureToken):
e: SciSpacyEntity = self.lib.get_linked_entity(token.cui_)
if logger.isEnabledFor(logging.DEBUG):
logger.debug(f'entity: {token.cui_} -> {e} ({id(token)})')
if e is not None:
token._definition = e.definition