Source code for zensols.mimic.note

"""EHR related text documents.

"""
from __future__ import annotations
__author__ = 'Paul Landes'
from typing import (
    Dict, Iterable, Set, Tuple, List, Any, Optional, ClassVar, Sequence
)
from dataclasses import dataclass, field, fields
from abc import ABCMeta, abstractmethod
from enum import Enum, auto
import logging
import sys
import re
import collections
import copy
import itertools as it
from itertools import chain
from io import TextIOBase
from frozendict import frozendict
import pandas as pd
from zensols.config import Dictable, ConfigFactory
from zensols.persist import PersistableContainer, persisted, Primeable
from zensols.nlp import LexicalSpan, FeatureToken, FeatureDocument
from zensols.nlp.dataframe import FeatureDataFrameFactory
from . import NoteEvent

logger = logging.getLogger(__name__)



[docs]
class NoteFormat(Enum):
    """Used in :meth:`.Note.format` for a parameterized method to write a note.

    """
    text = auto()
    raw = auto()
    verbose = auto()
    summary = auto()
    json = auto()
    yaml = auto()
    markdown = auto()

    @property
    def ext(self) -> str:
        return {
            self.text: 'txt',
            self.raw: 'txt',
            self.verbose: 'txt',
            self.summary: 'txt',
            self.json: 'json',
            self.yaml: 'yaml',
            self.markdown: 'md'
        }[self]




[docs]
class SectionAnnotatorType(Enum):
    """The type of :class:`.Section` annotator for :class:`.Note` instances.
    The `MedSecId`_ project adds the :obj:`human` and :obj:`model`:

    :see: `MedSecId <https://github.com/plandes/mimicsid>`_

    """
    NONE = auto()
    """Default for those without section identifiers."""

    REGULAR_EXPRESSION = auto()
    """Sections are automatically assigned by regular expressions."""

    HUMAN = auto()
    """A `MedSecId`_ human annotator."""

    MODEL = auto()
    """Predictions are provided by a `MedSecId`_ model."""




[docs]
@dataclass
class ParagraphFactory(object, metaclass=ABCMeta):
    """Splits a document in to constituent paragraphs.

    """

[docs]
    @abstractmethod
    def create(self, sec: Section) -> Iterable[FeatureDocument]:
        pass





[docs]
@dataclass
class Section(PersistableContainer, Dictable):
    """A section segment with an identifier and represents a section of a
    :class:`.Note`, one for each section.  An example of a section is the
    *history of present illness* in a discharge note.

    """
    _DICTABLE_WRITABLE_DESCENDANTS: ClassVar[bool] = True
    _PERSITABLE_TRANSIENT_ATTRIBUTES: ClassVar[Set[str]] = {
        'container', '_doc_stash', '_paragraph_factory'}

    _SENT_FILTER_REGEX: ClassVar[re.Pattern] = re.compile(r'^\s*\d+\.\s*')
    """Remove enumerated lists (<number> .) as separate sentences.  Example is
    hadm=119960, cat=Discharge summary, section=Discharge Medications:
    ``1. Vancomycin 125 mg``.

    """
    FILTER_ENUMS: ClassVar[bool] = True
    """Whether to filter enumerated lists as separate sentences."""

    id: int = field()
    """The unique ID of the section."""

    name: Optional[str] = field()
    """The name of the section (i.e. ``hospital-course``).  This field is what's
    called the ``type`` in the paper, which is not used since ``type`` is a
    keyword in Python.

    """
    container: SectionContainer = field(repr=False)
    """The container that has this section."""

    header_spans: Tuple[LexicalSpan, ...] = field()
    """The character offsets of the section headers.  The first is usually the
    :obj:`name` of the section.  If there are no headers, this is an 0-length
    tuple.

    """
    body_span: LexicalSpan = field()
    """Like :obj:`header_spans` but for the section body.  The body and name do
    not intersect.

    """
    def __post_init__(self):
        super().__init__()
        if self.name is None:
            if len(self.headers) == 0:
                self.name = 'unknown'
            else:
                header = ' '.join(self.headers)
                self.name = re.sub(r'[_/ ]+', '-', header).lower()

    @property
    def note_text(self) -> str:
        """The entire parent note's text."""
        return self.container.text

    @property
    @persisted('_headers', transient=True)
    def headers(self) -> Tuple[str, ...]:
        """The section text."""
        text = self.note_text
        return tuple(map(lambda s: text[s.begin:s.end], self.header_spans))

    @property
    def body(self) -> str:
        """The section text."""
        return self.note_text[self.body_span.begin:self.body_span.end]

    def _get_doc(self) -> FeatureDocument:
        return self.container._get_doc()

    @property
    def header_tokens(self) -> Iterable[FeatureToken]:
        doc: FeatureDocument = self._get_doc()
        spans = doc.map_overlapping_tokens(self.header_spans)
        return chain.from_iterable(spans)

    @property
    def body_tokens(self) -> Iterable[FeatureToken]:
        doc: FeatureDocument = self._get_doc()
        return doc.get_overlapping_tokens(self.body_span)

    @property
    @persisted('_doc', transient=True)
    def doc(self) -> FeatureDocument:
        """A feature document of the section's body text."""
        return self._narrow_doc(self._get_doc(), self.lexspan, False)

    @property
    @persisted('_body_doc', transient=True)
    def body_doc(self) -> FeatureDocument:
        """A feature document of the body of this section's body text."""
        return self._narrow_doc(self._get_doc(), self.body_span)

    def _narrow_doc(self, doc: FeatureDocument, span: LexicalSpan,
                    filter_sent: bool = None) -> \
            FeatureDocument:
        if filter_sent is None:
            filter_sent = self.FILTER_ENUMS
        # using inclusive=true will very often leave newlines, but keep the last
        # sentence character when the sentence chunker gets confused
        doc = doc.get_overlapping_document(span, inclusive=True)
        if filter_sent:
            sreg: re.Pattern = self._SENT_FILTER_REGEX
            doc.sents = tuple(filter(lambda s: sreg.match(s.text) is None,
                                     doc.sents))
        return doc

    @property
    @persisted('_lexspan')
    def lexspan(self) -> LexicalSpan:
        """The widest lexical extent of the sections, including headers."""
        return LexicalSpan.widen(
            chain.from_iterable(((self.body_span,), self.header_spans)))

    @property
    def text(self) -> str:
        """Get the entire text of the section, which includes the headers."""
        span: LexicalSpan = self.lexspan
        ntext: str = self.note_text
        return ntext[span.begin:span.end]

    @property
    @persisted('_paragraphs', transient=True)
    def paragraphs(self) -> Tuple[FeatureDocument, ...]:
        """The list of paragraphs, each as as a feature document, of this
        section's body text.

        """
        return tuple(self._paragraph_factory.create(self))

    @property
    def is_empty(self) -> bool:
        """Whether the content of the section is empty."""
        return len(self.header_spans) == 0 and len(self.body.strip()) == 0


[docs]
    @staticmethod
    def header_to_name(s: str) -> str:
        """Convert a section header text to a section name."""
        return s.replace(' ', '-').lower()



[docs]
    @staticmethod
    def name_to_header(s: str) -> str:
        """Convert a section name to a section header text.  Note that this uses
        a heuristic method that might generate a string that does not match the
        original header text.

        """
        return s.replace('-', ' ').capitalize()


    def _copy_resources(self, target: Section):
        for attr in self._PERSITABLE_TRANSIENT_ATTRIBUTES:
            setattr(target, attr, getattr(self, attr))
        target._row_id = self._row_id


[docs]
    def clone(self) -> Section:
        clone = copy.copy(self)
        self._copy_resources(clone)
        return clone



[docs]
    def write_sentences(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                        container: FeatureDocument = None, limit: int = 0):
        """Write all parsed sentences of the section with respective entities.

        """
        def map_ent(tp: Tuple[FeatureToken, ...]):
            """Map a feature token to a readable string."""
            if tp[0].ent_ == 'concept':
                desc = f' ({tp[0].cui_})'
            else:
                desc = f' ({tp[0].ent_})'
            return ' '.join(map(lambda t: t.norm, tp)) + desc

        container = self.body_doc if container is None else container
        for sent in it.islice(container, limit):
            self._write_divider(depth, writer)
            self._write_line(sent.norm, depth, writer)
            mtoks = tuple(map(lambda tk: f'{tk.text} ({tk.norm})',
                              filter(lambda t: t.mimic_ != FeatureToken.NONE,
                                     sent.token_iter())))
            if len(mtoks) > 0:
                self._write_line(f"mimic: {', '.join(mtoks)}", depth, writer)
            if len(sent.entities) > 0:
                ents = ', '.join(map(map_ent, sent.entities))
                self._write_line(f'entities: {ents}', depth, writer)



[docs]
    def write_as_item(self, depth: int = 0, writer: TextIOBase = sys.stdout):
        """A terse output designed for list iteration."""
        self._write_line(f'id: {self.id}', depth, writer)
        self.write(depth + 1, writer, body_line_limit=0, norm_line_limit=0,
                   include_header_spans=True, include_body_span=True,
                   include_id_name=False)



[docs]
    def write(self, depth: int = 0, writer: TextIOBase = sys.stdout,
              body_line_limit: int = sys.maxsize,
              norm_line_limit: int = sys.maxsize,
              par_limit: int = 0, sent_limit: int = 0,
              include_header: bool = True, include_id_name: bool = True,
              include_header_spans: bool = False,
              include_body_span: bool = False):
        """Write a note section's name, original body, normalized body and
        sentences with respective sentence entities.

        :param body_line_limit: the number of line of the section's body to
                                output

        :param norm_line_limit: the number of line of the section's normalized
                                (parsed) body to output

        :param par_limit: the number of paragraphs to output

        :param sent_limit: the number of sentences to output

        :param include_header: whether to include the header

        :param include_id_name: whether to write the section ID and name

        """
        header = ' '.join(self.headers)
        if include_id_name:
            self._write_line(f'id: {self.id}', depth, writer)
            self._write_line(f'name: {self.name}', depth, writer)
        if include_header:
            self._write_line(f'header: {header}', depth, writer)
        if include_header_spans:
            self._write_line(f'header spans: {self.header_spans}',
                             depth, writer)
        if include_body_span:
            self._write_line(f'body span: {self.body_span}', depth, writer)
        if not len(self.body) > 0:
            if body_line_limit > 0:
                self._write_line('body:', depth, writer)
                self._write_block(self.body, depth + 1, writer,
                                  limit=body_line_limit)
            if norm_line_limit > 0:
                self._write_line('normalized:', depth, writer)
                self._write_block(self.body_doc.norm, depth + 1, writer,
                                  limit=norm_line_limit)
            if par_limit > 0 and sent_limit > 0:
                for par in self.paragraphs:
                    self._write_line('paragraph:', depth, writer)
                    self.write_sentences(depth + 1, writer, par, sent_limit)


    def __len__(self) -> int:
        return len(self.body_span) + sum(map(len, self.header_spans))

    def __str__(self):
        return f'{self.name} ({self.id}): body_len={len(self.body)}'




[docs]
@dataclass
class SectionContainer(Dictable, metaclass=ABCMeta):
    """A *note like* container base class that has sections.  Note based classes
    extend this base class.  Sections in order of their position in the document
    are produced when using this class as an iterable.

    """
    _DICTABLE_ATTRIBUTES: ClassVar[Set[str]] = {'sections'}
    DEFAULT_SECTION_NAME: ClassVar[str] = 'default'
    """The name of the singleton section when none the note is not sectioned."""

    @abstractmethod
    def _get_doc(self) -> FeatureDocument:
        """Return the parsed document that represents the text in this
        container."""
        pass

    @abstractmethod
    def _get_sections(self) -> Iterable[Section]:
        """Generate the sections cached and returned in the :obj:`sections`
        property.

        """
        pass


[docs]
    @staticmethod
    def category_to_id(s: str) -> str:
        """Convert a category string (i.e. ``Discharge summary``) to a category
        ID (i.e. ``discharge-summary``).

        """
        return Section.header_to_name(s)



[docs]
    @staticmethod
    def id_to_category(s: str) -> str:
        """Convert a category ID (i.e. ``discharge-summary``) to a category
        string (i.e. ``Discharge summary``).

        """
        return Section.name_to_header(s)


    @property
    @persisted('_sections')
    def sections(self) -> Dict[int, Section]:
        """A map from the unique section identifier to a note section.

        """
        secs: Iterable[Section] = self._get_sections()
        return frozendict({sec.id: sec for sec in secs})

    @property
    @persisted('_sections_ordered', transient=True)
    def sections_ordered(self) -> Tuple[Section, ...]:
        """Sections returned in order as they appear in the note."""
        return tuple(map(lambda t: t[1], sorted(
            self.sections.items(), key=lambda t: t[0])))

    @property
    @persisted('_by_name', transient=True)
    def sections_by_name(self) -> Dict[str, Tuple[Section, ...]]:
        """A map from the name of a section (i.e. *history of present illness*
        in discharge notes) to a note section.

        """
        by_name = collections.defaultdict(list)
        for s in self.sections.values():
            by_name[s.name].append(s)
        return frozendict(map(lambda s: (s[0], tuple(s[1])), by_name.items()))

    @property
    def section_dataframe(self) -> pd.DataFrame:
        """A Pandas dataframe containing the section's name, header and body
        offset spans.

        """
        rows = []
        cols = 'name id body headers body_begin body_end'.split()
        sec: Section
        for sec in self.sections.values():
            rows.append((sec.name, sec.id, sec.body,
                         tuple(map(lambda s: s.astuple, sec.header_spans)),
                         sec.body_span.begin, sec.body_span.end))
        return pd.DataFrame(rows, columns=cols)

    @property
    def feature_dataframe(self) -> pd.DataFrame:
        """A dataframe useful for features used in an ML model."""
        def map_df(sec: Section):
            df = dataframe_factory(sec.body_doc)
            df['section'] = sec.name
            df['section_id'] = sec.id
            return df

        dataframe_factory: FeatureDataFrameFactory = \
            self._trans_context['dataframe_factory']
        dfs = map(map_df, self.sections.values())
        return pd.concat(dfs, ignore_index=True, copy=False)


[docs]
    def write_fields(self, depth: int = 0, writer: TextIOBase = sys.stdout):
        """Write note header fields such as the ``row_id`` and ``category``.

        """
        pass



[docs]
    def write_human(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                    normalize: bool = False):
        """Generates a human readable version of the annotation.  This calls the
        following methods in order: :meth:`write_fields` and
        :meth:`write_sections`.

        :param depth: the starting indentation depth

        :param writer: the writer to dump the content of this writable

        :param normalize: whether to use the paragraphs' normalized
                          (:obj:~zensols.nlp.TokenContainer.norm`) or text

        """
        self.write_fields(depth, writer)
        self.write_sections(depth, writer, normalize=normalize)



[docs]
    def write_sections(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                       normalize: bool = False):
        """Writes the sections of the container.

        :param depth: the starting indentation depth

        :param writer: the writer to dump the content of this writable

        :param normalize: whether to use the paragraphs' normalized
                          (:obj:~zensols.nlp.TokenContainer.norm`) or text

        """
        for sec in self:
            header = ' '.join(sec.headers)
            div_text: str = f'{sec.id}:{sec.name}'
            if len(header) > 0:
                div_text += f' ({header})'
            self._write_divider(depth, writer, header=div_text)
            if normalize:
                for i, para in enumerate(sec.paragraphs):
                    if i > 0:
                        self._write_empty(writer)
                    self._write_wrap(para.norm, depth, writer)
            elif len(sec.body) > 0:
                self._write_block(sec.body, depth, writer)



[docs]
    def write_markdown(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                       normalize: bool = False):
        """Generates markdown version of the annotation.

        :param depth: the starting indentation depth

        :param writer: the writer to dump the content of this writable

        :param normalize: whether to use the paragraphs' normalized
                          (:obj:~zensols.nlp.TokenContainer.norm`) or text

        """
        self._write_line(f'# {self.category} ({self.row_id})', depth, writer)
        for sec in self.sections.values():
            header = ' '.join(sec.headers)
            self._write_empty(writer)
            self._write_empty(writer)
            self._write_line(f'## {header}', depth, writer)
            self._write_empty(writer)
            if normalize:
                for i, para in enumerate(sec.paragraphs):
                    if i > 0:
                        self._write_empty(writer)
                    self._write_wrap(para.norm, depth, writer)
            elif len(sec.body) > 0:
                self._write_block(sec.body, depth, writer)



[docs]
    def write_full(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                   note_line_limit: int = sys.maxsize,
                   section_line_limit: int = sys.maxsize,
                   section_sent_limit: int = sys.maxsize,
                   include_section_header: bool = True,
                   sections: Set[str] = None,
                   include_fields: bool = True,
                   include_note_divider: bool = True,
                   include_section_divider: bool = True):
        """Write the custom parts of the note.

        :param note_line_limit: the number of lines to write from the note text

        :param section_line_limit: the number of line of the section's body and
                                   number of sentences to output

        :param par_limit: the number of paragraphs to output

        :param sections: the sections, by name, to write

        :param include_section_header: whether to include the header

        :param include_fields: whether to write the note fields

        :param include_note_divider: whether to write dividers between notes

        :param include_section_divider: whether to write dividers between
                                        sections

        """
        secs: Sequence[Section] = self.sections.values()
        if sections is not None:
            secs = tuple(filter(lambda s: s.name in sections, secs))
        if len(secs) > 0:
            self._write_line('sections:', depth + 1, writer)
            sec: Section
            for sec in secs:
                aft: str = ''
                if section_line_limit == 0 and include_section_header:
                    aft = ':'
                self._write_line(f'{sec.name}{aft}', depth + 2, writer)
                sec.write(depth + 3, writer,
                          include_id_name=False,
                          body_line_limit=section_line_limit,
                          norm_line_limit=section_line_limit,
                          sent_limit=section_sent_limit,
                          include_header=include_section_header)
                if include_section_divider:
                    self._write_divider(depth + 3, writer)
        if include_note_divider:
            self._write_divider(depth, writer, '=')



[docs]
    def write_by_format(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                        note_format: NoteFormat = NoteFormat):
        """Write the note in the specified format.

        :param depth: the starting indentation depth

        :param writer: the writer to dump the content of this writable

        :param note_format: the format to use for the output

        """
        def summary_format(writer: TextIOBase):
            for s in self.sections.values():
                print(s, s.header_spans, len(s))

        {NoteFormat.text: lambda: self.write_human(depth, writer),
         NoteFormat.verbose: lambda: self.write_full(depth, writer),
         NoteFormat.raw: lambda: writer.write(self.text),
         NoteFormat.json: lambda: self.asjson(writer=writer, indent=4),
         NoteFormat.yaml: lambda: self.asyaml(writer=writer, indent=4),
         NoteFormat.markdown: lambda: self.write_markdown(depth, writer),
         NoteFormat.summary: lambda: summary_format(depth, writer),
         }[note_format]()



[docs]
    def write(self, depth: int = 0, writer: TextIOBase = sys.stdout):
        self.write_human(depth, writer)


    def __getitem__(self, id: int) -> Section:
        return self.sections[id]

    def __iter__(self) -> Iterable[Section]:
        return iter(sorted(self.sections.values(), key=lambda s: s.lexspan))




[docs]
@dataclass
class GapSectionContainer(SectionContainer):
    """A container that fills in missing sections of text from a note with
    additional sections.

    """
    delegate: Note = field()
    """The note with the sections to be filled."""

    filter_empty: bool = field()
    """Whether to filter empty sections."""

    def _get_doc(self) -> FeatureDocument:
        return self.delegate._get_doc()

    def _get_sections(self) -> Iterable[Section]:
        sections: List[Section] = list(
            map(lambda s: s.clone(), self.delegate.sections.values()))
        if len(sections) > 0:
            note_text: str = self.delegate.text
            gaps: Sequence[LexicalSpan] = LexicalSpan.gaps(
                spans=map(lambda s: s.lexspan, sections),
                end=len(note_text))
            ref_sec: Section = sections[0]
            sec_cont: SectionContainer = ref_sec.container
            gap_secs: List[Section] = []
            gs: LexicalSpan
            for gs in gaps:
                gsec = Section(
                    id=-1,
                    name=None,
                    container=sec_cont,
                    header_spans=(),
                    body_span=gs)
                if self.filter_empty and gsec.is_empty:
                    continue
                ref_sec._copy_resources(gsec)
                gap_secs.append(gsec)
            sections.extend(gap_secs)
            sections.sort(key=lambda s: s.lexspan)
            sec: Section
            for sid, sec in enumerate(sections):
                sec.original_id = sec.id
                sec.id = sid
        return sections




[docs]
@dataclass(repr=False)
class Note(NoteEvent, SectionContainer):
    """A container class of :class:`.Section` for each section for the
    text in the note events given by the property  :obj:`sections`.

    """
    _PERSITABLE_PROPERTIES: ClassVar[Set[str]] = {'sections'} | \
        NoteEvent._PERSITABLE_PROPERTIES
    _DICTABLE_WRITE_EXCLUDES: ClassVar[Set[str]] = \
        NoteEvent._DICTABLE_WRITE_EXCLUDES | {'sections'}
    _DICTABLE_WRITABLE_DESCENDANTS: ClassVar[bool] = True

    def _get_sections(self) -> Iterable[Section]:
        sec = Section(0, self.DEFAULT_SECTION_NAME, self, (),
                      LexicalSpan(0, len(self.text)))
        sec._row_id = self.row_id
        return [sec]

    @property
    def section_annotator_type(self) -> SectionAnnotatorType:
        """A human readable string describing who or what annotated the note."""
        return self._get_section_annotator_type()

    def _get_section_annotator_type(self) -> SectionAnnotatorType:
        return SectionAnnotatorType.NONE

    def _trans_context_update(self, trans_context: Dict[str, Any]):
        for sec in self.sections.values():
            sec.container = self
            sec._row_id = self.row_id
            sec._doc_stash = trans_context['doc_stash']
            sec._paragraph_factory = trans_context['paragraph_factory']


[docs]
    def write_fields(self, depth: int = 0, writer: TextIOBase = sys.stdout):
        sat: SectionAnnotatorType = self.section_annotator_type
        self._write_line(f'row_id: {self.row_id}', depth, writer)
        self._write_line(f'category: {self.category}', depth, writer)
        self._write_line(f'description: {self.description}', depth, writer)
        self._write_line(f'annotator: {sat.name.lower()}', depth, writer)



[docs]
    def write_full(self, depth: int = 0, writer: TextIOBase = sys.stdout,
                   note_line_limit: int = sys.maxsize,
                   section_line_limit: int = sys.maxsize,
                   section_sent_limit: int = sys.maxsize,
                   include_section_header: bool = True,
                   sections: Set[str] = None,
                   include_fields: bool = True,
                   include_note_divider: bool = True,
                   include_section_divider: bool = True):
        super().write(depth, writer,
                      line_limit=note_line_limit,
                      include_fields=include_fields)
        super().write_full(
            depth, writer,
            note_line_limit=note_line_limit,
            section_line_limit=section_line_limit,
            section_sent_limit=section_sent_limit,
            include_section_header=include_section_header,
            sections=sections,
            include_fields=include_fields,
            include_note_divider=include_note_divider,
            include_section_divider=include_section_divider)



[docs]
    def write(self, depth: int = 0, writer: TextIOBase = sys.stdout):
        SectionContainer.write(self, depth, writer)





[docs]
@dataclass
class NoteFactory(Primeable):
    """Creates an instance of :class:`.Note` from :class:`.NoteEvent`.

    """
    config_factory: ConfigFactory = field()
    """The factory used to create notes.

    """
    category_to_note: Dict[str, str] = field()
    """A mapping between notes' category to section name for :class:.Note`
    configuration.

    """
    mimic_default_note_section: str = field()
    """The section name holding the configuration of the class to create when
    there is no mapping in :obj:`category_to_note`.

    """
    def _event_to_note(self, note_event: NoteEvent, section: str,
                       params: Dict[str, Any] = None) -> Note:
        """Create a note from the application configuration

        :param section: the configuration section that details the class

        :param params: used to initialize the new instance

        """
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f'even to note (section={section}): {note_event}')
        ne_params = {f.name: getattr(note_event, f.name)
                     for f in fields(note_event)}
        if params is not None:
            ne_params.update(params)
        return self.config_factory.new_instance(section, **ne_params)

    def _create_from_note_event(self, note_event: NoteEvent,
                                section: str = None) -> Note:
        """Because subclasses override :meth:`create`, we need a method that
        specifically creates from :class:`.NoteEvent` for subclasses that
        recover from errors (such as MedSecId prediction) when they cannot
        create notes themselves.  This method provides a way to create them
        directly using the default regular expressions (:mod:`regexnote`).

        **Important**: do not override this method.

        :param note_event: the source data

        :param section: the configuration section to use to create the new note,
                        which is one of the regular expression based sections or
                        :obj:`mimic_default_note_section` for a :class:`.Note`


        """
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f'create note from event: {note_event}')
        if section is None:
            section = self.category_to_note.get(note_event.category)
        if section is None:
            section = self.mimic_default_note_section
        return self._event_to_note(note_event, section)


[docs]
    def create(self, note_event: NoteEvent) -> Note:
        """Create a new factory based instance of a :class:`.Note` from a
        :class:`.NoteEvent`.

        :param note_event: the source data

        """
        return self._create_from_note_event(note_event, None)



[docs]
    def create_default(self, note_event: NoteEvent) -> Note:
        """Like :meth:`.create` but always create the default (:class:`.Note`)
        note.

        :param note_event: the source data

        :return: always an instance of :class:`.Note`

        """
        return self._create_from_note_event(
            note_event, self.mimic_default_note_section)



[docs]
    def prime(self):
        """The MedSecId project primes by installing the model files."""
        if logger.isEnabledFor(logging.INFO):
            logger.info('priming...')


    def __call__(self, note_event: NoteEvent, section: str = None) -> Note:
        """See :meth:`.create`."""
        return self.create(note_event, section)




[docs]
@dataclass
class DefaultNoteFactory(NoteFactory):
    """A note factory that creates only default notes.

    :see: :meth:`.NoteFactory.create_default`

    """

[docs]
    def create(self, note_event: NoteEvent) -> Note:
        return self.create_default(note_event)