Source code for zensols.mimic.regexnote

"""Regular expression note parsing

"""
__author__ = 'Paul Landes'

from typing import Iterable, ClassVar
from dataclasses import dataclass
from abc import ABCMeta, abstractmethod
import re
from zensols.nlp import LexicalSpan
from . import Section, SectionAnnotatorType, Note


[docs] @dataclass(repr=False) class RegexNote(Note, metaclass=ABCMeta): """Base class used to collect subclass regular expressions captures and create sections from them. """ @abstractmethod def _get_matches(self, text: str) -> Iterable[re.Match]: pass def _get_section_annotator_type(self) -> SectionAnnotatorType: return SectionAnnotatorType.REGULAR_EXPRESSION def _get_sections(self) -> Iterable[Section]: # add to match on most regex's that expect two newlines between sections ext_text = self.text + '\n\n' matches: Iterable[re.Match] = self._get_matches(ext_text) matches = filter(lambda m: (m.end() - m.start() > 0), matches) secs = [] sid = 0 try: while matches: m: re.Match = next(matches) name, sec_text = m.groups() sec = Section( id=sid, name=None, container=self, header_spans=(LexicalSpan(m.start(1), m.end(1)),), body_span=LexicalSpan(m.start(2), m.end(2))) secs.append(sec) sid += 1 except StopIteration: pass if len(secs) == 0: secs = super()._get_sections() return secs
[docs] @dataclass(repr=False) class DischargeSummaryNote(RegexNote): """Contains sections for the discharge summary. There should be only one of these per hospital admission. """ CATEGORY: ClassVar[str] = 'Discharge summary' _SECTION_REGEX: ClassVar[re.Pattern] = { 'header': re.compile(r'([a-zA-Z ]+):\n+(.+?)\n{2,}', re.DOTALL), 'para': re.compile(r'([A-Z ]+):[ ]{2,}(.+?)\n{2,}', re.DOTALL), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern if text.find('HISTORY OF PRESENT ILLNESS:') > -1: regex = self._SECTION_REGEX['para'] else: regex = self._SECTION_REGEX['header'] return re.finditer(regex, text)
[docs] @dataclass(repr=False) class NursingOtherNote(RegexNote): CATEGORY: ClassVar[str] = 'Nursing/other' _SECTION_REGEX: ClassVar[re.Pattern] = { 'para': re.compile(r'([a-zA-Z ]+):[ ](.+?)\n{2,}', re.DOTALL), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern = self._SECTION_REGEX['para'] return re.finditer(regex, text)
[docs] @dataclass(repr=False) class EchoNote(RegexNote): CATEGORY: ClassVar[str] = 'Echo' _SECTION_REGEX: ClassVar[re.Pattern] = { 'para': re.compile( '(' + '|'.join('conclusions findings impression indication'.split() + ['patient/test information', 'clinical implications']) + r'):[\n ]+(.+?)\n{2,}', re.DOTALL | re.IGNORECASE), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern = self._SECTION_REGEX['para'] return re.finditer(regex, text)
[docs] @dataclass(repr=False) class PhysicianNote(RegexNote): CATEGORY: ClassVar[str] = 'Physician' _SECTION_REGEX: ClassVar[re.Pattern] = { 'header': re.compile( r'[ ]{3}(' + 'HPI|Current medications|24 Hour Events|Last dose of Antibiotics|Flowsheet Data|physical examination|labs / radiology|assessment and plan|code status|disposition' + r'):?\n(.+?)\n[ ]{3}[a-zA-Z0-9/ ]+:', re.DOTALL | re.IGNORECASE), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern = self._SECTION_REGEX['header'] return re.finditer(regex, text)
[docs] @dataclass(repr=False) class RadiologyNote(RegexNote): CATEGORY: ClassVar[str] = 'Radiology' _SECTION_REGEX: ClassVar[re.Pattern] = { 'para': re.compile(r'\s*([A-Z ]+):[\n ]{2,}(.+?)\n{2,}', re.DOTALL), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern = self._SECTION_REGEX['para'] return re.finditer(regex, text)
[docs] @dataclass(repr=False) class ConsultNote(RegexNote): """Contains sections for the discharge summary. There should be only one of these per hospital admission. """ CATEGORY: ClassVar[str] = 'Consult' _SECTION_REGEX: ClassVar[re.Pattern] = { 'header': re.compile(r'\s*([a-zA-Z/ ]+):\n+(.+?)(?:[\n]{2,}|\s+\.\n)', re.DOTALL), } def _get_matches(self, text: str) -> Iterable[re.Match]: regex: re.Pattern = self._SECTION_REGEX['header'] return re.finditer(regex, text)