"""Discharge summary research and Mimic III data exploration.
"""
__author__ = 'Paul Landes'
from dataclasses import dataclass, field
import logging
import sys
from pathlib import Path
from io import TextIOBase
from zensols.config import Dictable, ConfigFactory
from . import (
RecordNotFoundError, HospitalAdmission, HospitalAdmissionDbStash,
PatientPersister, AdmissionPersister, DiagnosisPersister,
NoteEventPersister, Note,
)
logger = logging.getLogger(__name__)
[docs]
@dataclass
class Corpus(Dictable):
"""A container class provided access to the MIMIC-III dataset using a
relational database (by default Postgress per the resource library
configuration). It also has methods to dump corpus statistics.
:see: `Resource Libraries <https://plandes.github.io/util/doc/config.html#resource-libraries>`_
"""
config_factory: ConfigFactory = field()
"""Used to clear the note event cache."""
patient_persister: PatientPersister = field()
"""The persister for the ``patients`` table."""
admission_persister: AdmissionPersister = field()
"""The persister for the ``admissions`` table."""
diagnosis_persister: DiagnosisPersister = field()
"""The persister for the ``diagnosis`` table."""
note_event_persister: NoteEventPersister = field()
"""The persister for the ``noteevents`` table."""
hospital_adm_stash: HospitalAdmissionDbStash = field()
"""Creates hospital admission instances. Note that this might be a caching
stash instance, but method calls are delegated through to the instance of
:class:`.HospitalAdmissionDbStash`.
"""
temporary_results_dir: Path = field()
"""The path to create the output results. This is not used, but needs to
stay until the next :mod:`zensols.mimicsid` is retrained."""
def __post_init__(self):
# allow pass through method delegation from any configured cache
# stashes on to the HospitalAdmissionDbStash such as `process_keys`
self.hospital_adm_stash.delegate_attr = True
[docs]
def clear(self, include_notes: bool = True):
"""Clear the all cached admission and note parses.
:param include_notes: whether to also clear the parsed notes cache
"""
self.hospital_adm_stash.clear()
if include_notes:
# the note event cache stash used by :meth:`clear` to remove cached
# parsed files
self.config_factory('mimic_note_event_persister_stash').clear()
self.config_factory('mimic_hospital_adm_factory_stash').clear()
[docs]
def get_hospital_adm_by_id(self, hadm_id: int) -> HospitalAdmission:
"""Return a hospital admission by its unique identifier."""
return self.hospital_adm_stash[str(hadm_id)]
[docs]
def get_hospital_adm_for_note(self, row_id: int) -> HospitalAdmission:
"""Return an admission that has note ``row_id``.
:raise: RecordNotFoundError if ``row_id`` is not found in the database
"""
hadm_id: int = self.note_event_persister.get_hadm_id(row_id)
if hadm_id is None:
raise RecordNotFoundError(self, 'hadm_id', hadm_id)
return self.hospital_adm_stash[str(hadm_id)]
[docs]
def get_note_by_id(self, row_id: int) -> Note:
"""Return the note (via the hospital admission) for ``row_id``.
:raise: RecordNotFoundError if ``row_id`` is not found in the database
"""
return self.get_hospital_adm_for_note(row_id)[row_id]
[docs]
def write_note_event_counts(self, subject_id: int, depth: int = 0,
writer: TextIOBase = sys.stdout):
"""Print a list of hospital admissions by count of related notes in
descending order.
:see: :meth:`.NoteEventPersister.get_note_counts_by_subject_id`
"""
np: NoteEventPersister = self.note_event_persister
for hadm_id, count in np.get_note_counts_by_subject_id(subject_id):
self._write_line(f'{hadm_id}: {count}', depth, writer)
[docs]
def write_hosptial_count_admission(self, depth: int = 0,
writer: TextIOBase = sys.stdout,
limit: int = sys.maxsize):
"""Write the counts for each hospital admission.
:param limit: the limit on the return admission counts
:see: :meth:`.AdmissionPersister.get_admission_admission_counts`
"""
for i in self.admission_persister.get_admission_admission_counts(limit):
self._write_line(str(i), depth, writer)
[docs]
def write_hospital_admission(self, hadm_id: int, depth: int = 0,
writer: TextIOBase = sys.stdout,
note_line_limit: int = sys.maxsize):
"""Write the hospital admission identified by ``hadm_id``.
"""
fac: HospitalAdmissionDbStash = self.hospital_adm_stash
hadm: HospitalAdmission = fac.get(hadm_id)
hadm.write(depth, writer, note_line_limit=note_line_limit)
[docs]
def write(self, depth: int = 0, writer: TextIOBase = sys.stdout):
n_notes: int = self.note_event_persister.get_count()
n_adms: int = self.admission_persister.get_count()
n_patients: int = self.patient_persister.get_count()
self._write_line(f'patients: {n_patients:,}', depth, writer)
self._write_line(f'admissions: {n_adms:,}', depth, writer)
self._write_line(f'notes: {n_notes:,}', depth, writer)