Source code for zensols.mednlp.tok

"""Contains the classes for the medical token type.

"""
__author__ = 'Paul Landes'

from typing import Dict, Tuple, Optional, Union
import logging
from functools import reduce
from frozendict import frozendict
from spacy.tokens.token import Token
from spacy.tokens.span import Span
from medcat.cdb import CDB
from zensols.nlp import FeatureToken, SpacyFeatureToken
from . import MedCatResource
from .domain import _MedicalEntity

logger = logging.getLogger(__name__)


[docs] class MedicalFeatureToken(SpacyFeatureToken): """A set of token features that optionally contains a medical concept. """ FEATURE_IDS_BY_TYPE = frozendict({ 'str': frozenset(('cui_ pref_name_ detected_name_ tuis_ ' + 'definition_ tui_descs_').split()), 'bool': frozenset('is_concept'.split()), 'float': frozenset('context_similarity'.split()), 'int': frozenset('cui'.split()), 'list': frozenset('tuis sub_names'.split())}) FEATURE_IDS = frozenset( reduce(lambda res, x: res | x, FEATURE_IDS_BY_TYPE.values())) WRITABLE_FEATURE_IDS = tuple(list(FeatureToken.WRITABLE_FEATURE_IDS) + 'cui_'.split()) _NONE_SET = frozenset()
[docs] def __init__(self, spacy_token: Union[Token, Span], norm: str, res: MedCatResource, ix2ent: Dict[int, _MedicalEntity]): super().__init__(spacy_token, norm) self._definition: str = self.NONE self._cdb: CDB = res.cat.cdb self._res = res med_ent: Optional[_MedicalEntity] = ix2ent.get(self.idx) if med_ent is None: med_ent = _MedicalEntity() self.med_ent = med_ent self.is_ent = med_ent.is_ent
@property def ent_(self) -> str: # self.med_ent.concept_span.label_ just gives 'concept', which then # clobbers other useful entities in the combiner parsers return self.NONE @property def ent(self) -> int: # self.med_ent.concept_span.label just gives 'concept', which then # clobbers other useful entities in the combiner parsers return 0 @property def is_concept(self) -> bool: """``True`` if this has a CUI and identifies a medical concept.""" return self.is_ent @property def cui_(self) -> str: """The unique UMLS concept ID.""" return self.med_ent.cui_ if self.is_concept else self.NONE @property def cui(self) -> int: """Returns the numeric part of the concept ID.""" return -1 if not self.is_concept else int(self.cui_[1:]) @property def pref_name_(self) -> str: """The preferred name of the concept.""" if self.is_concept: return self._cdb.cui2preferred_name.get(self.cui_) else: return self.NONE @property def detected_name_(self) -> str: """The detected name of the concept.""" if self.is_concept: return self.med_ent.concept_span._.detected_name else: return self.NONE @property def sub_names(self) -> Tuple[str, ...]: """Return other names for the concept.""" if self.is_concept: return tuple(sorted(self._cdb.cui2names[self.cui_])) else: return [] @property def context_similarity(self) -> float: """The similiarity of the concept.""" if self.is_concept: return self.med_ent.concept_span._.context_similarity else: return -1 @property def definition_(self) -> str: """The definition if the concept.""" return self._definition or FeatureToken.NONE @property def tuis(self) -> Tuple[str, ...]: """The the CUI type of the concept.""" if self.is_concept: cui: str = self.cui_ return tuple(sorted(self._cdb.cui2type_ids.get(cui))) else: return self._NONE_SET @property def tuis_(self) -> str: """All CUI TUIs (types) of the concept sorted as a comma delimited list. """ return ','.join(sorted(self.tuis)) @property def tui_descs_(self) -> str: """Descriptions of :obj:`tuis_`.""" def map_tui(k: str) -> str: v = self._res.tuis.get(k) if v is None: v = f'? ({k})' return v return ', '.join(map(map_tui, sorted(self.tuis))) def __str__(self): cui_str = f' ({self.cui_})' if self.is_concept else '' return self.norm + cui_str