Source code for zensols.mimic.regexnote

"""Regular expression note parsing

"""
__author__ = 'Paul Landes'

from typing import Iterable, ClassVar
from dataclasses import dataclass
from abc import ABCMeta, abstractmethod
import re
from zensols.nlp import LexicalSpan
from . import Section, SectionAnnotatorType, Note



[docs]
@dataclass(repr=False)
class RegexNote(Note, metaclass=ABCMeta):
    """Base class used to collect subclass regular expressions captures and
    create sections from them.

    """
    @abstractmethod
    def _get_matches(self, text: str) -> Iterable[re.Match]:
        pass

    def _get_section_annotator_type(self) -> SectionAnnotatorType:
        return SectionAnnotatorType.REGULAR_EXPRESSION

    def _get_sections(self) -> Iterable[Section]:
        # add to match on most regex's that expect two newlines between sections
        ext_text = self.text + '\n\n'
        matches: Iterable[re.Match] = self._get_matches(ext_text)
        matches = filter(lambda m: (m.end() - m.start() > 0), matches)
        secs = []
        sid = 0
        try:
            while matches:
                m: re.Match = next(matches)
                name, sec_text = m.groups()
                sec = Section(
                    id=sid,
                    name=None,
                    container=self,
                    header_spans=(LexicalSpan(m.start(1), m.end(1)),),
                    body_span=LexicalSpan(m.start(2), m.end(2)))
                secs.append(sec)
                sid += 1
        except StopIteration:
            pass
        if len(secs) == 0:
            secs = super()._get_sections()
        return secs




[docs]
@dataclass(repr=False)
class DischargeSummaryNote(RegexNote):
    """Contains sections for the discharge summary.  There should be only one of
    these per hospital admission.

    """
    CATEGORY: ClassVar[str] = 'Discharge summary'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'header': re.compile(r'([a-zA-Z ]+):\n+(.+?)\n{2,}', re.DOTALL),
        'para': re.compile(r'([A-Z ]+):[ ]{2,}(.+?)\n{2,}', re.DOTALL),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern
        if text.find('HISTORY OF PRESENT ILLNESS:') > -1:
            regex = self._SECTION_REGEX['para']
        else:
            regex = self._SECTION_REGEX['header']
        return re.finditer(regex, text)




[docs]
@dataclass(repr=False)
class NursingOtherNote(RegexNote):
    CATEGORY: ClassVar[str] = 'Nursing/other'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'para': re.compile(r'([a-zA-Z ]+):[ ](.+?)\n{2,}', re.DOTALL),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern = self._SECTION_REGEX['para']
        return re.finditer(regex, text)




[docs]
@dataclass(repr=False)
class EchoNote(RegexNote):
    CATEGORY: ClassVar[str] = 'Echo'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'para': re.compile(
            '(' +
            '|'.join('conclusions findings impression indication'.split() +
                     ['patient/test information', 'clinical implications']) +
            r'):[\n ]+(.+?)\n{2,}', re.DOTALL | re.IGNORECASE),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern = self._SECTION_REGEX['para']
        return re.finditer(regex, text)




[docs]
@dataclass(repr=False)
class PhysicianNote(RegexNote):
    CATEGORY: ClassVar[str] = 'Physician'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'header': re.compile(
            r'[ ]{3}(' +
            'HPI|Current medications|24 Hour Events|Last dose of Antibiotics|Flowsheet Data|physical examination|labs / radiology|assessment and plan|code status|disposition' +
            r'):?\n(.+?)\n[ ]{3}[a-zA-Z0-9/ ]+:', re.DOTALL | re.IGNORECASE),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern = self._SECTION_REGEX['header']
        return re.finditer(regex, text)




[docs]
@dataclass(repr=False)
class RadiologyNote(RegexNote):
    CATEGORY: ClassVar[str] = 'Radiology'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'para': re.compile(r'\s*([A-Z ]+):[\n ]{2,}(.+?)\n{2,}', re.DOTALL),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern = self._SECTION_REGEX['para']
        return re.finditer(regex, text)




[docs]
@dataclass(repr=False)
class ConsultNote(RegexNote):
    """Contains sections for the discharge summary.  There should be only one of
    these per hospital admission.

    """
    CATEGORY: ClassVar[str] = 'Consult'
    _SECTION_REGEX: ClassVar[re.Pattern] = {
        'header': re.compile(r'\s*([a-zA-Z/ ]+):\n+(.+?)(?:[\n]{2,}|\s+\.\n)',
                             re.DOTALL),
    }

    def _get_matches(self, text: str) -> Iterable[re.Match]:
        regex: re.Pattern = self._SECTION_REGEX['header']
        return re.finditer(regex, text)