"""A utility class to summarize all results in a directory.
"""
__author__ = 'Paul Landes'
from typing import Dict, Tuple, List, Any, Callable
from dataclasses import dataclass, field
from pathlib import Path
import logging
import parse
from collections import OrderedDict
import pandas as pd
import numpy as np
import math
import scipy
from zensols.util.time import time
from zensols.persist import persisted, FileTextUtil, Stash
from zensols.datdesc import DataFrameDescriber, DataDescriber
from zensols.deeplearn import DatasetSplitType
from . import (
ModelResult, EpochResult, DatasetResult, ModelResultManager, ArchivedResult,
Metrics, PredictionsDataFrameFactory
)
logger = logging.getLogger(__name__)
[docs]
@dataclass
class ModelResultReporter(object):
"""Summarize all results in a directory from the output of model execution
from :class:`~zensols.deeplearn.model.ModelExectuor`.
The class iterates through the pickled binary output files from the run and
summarizes in a Pandas dataframe, which is handy for reporting in papers.
"""
result_manager: ModelResultManager = field()
"""Contains the results to report on--and specifically the path to directory
where the results were persisted.
"""
include_validation: bool = field(default=True)
"""Whether or not to include validation performance metrics."""
@persisted('_archive_results')
def _get_archive_results(self) -> Tuple[Tuple[str, ArchivedResult], ...]:
stash: Stash = self.result_manager.results_stash
return sorted(stash.items(), key=lambda t: t[0])
def _add_rows(self, fname: str, arch_res: ArchivedResult) -> List[Any]:
if logger.isEnabledFor(logging.INFO):
logger.info(f'reading results from {fname}')
dpt_key: str = 'n_total_data_points'
res: ModelResult = arch_res.model_result
train: DatasetResult = res.dataset_result.get(
DatasetSplitType.train)
validate: DatasetResult = res.dataset_result.get(
DatasetSplitType.validation)
test: DatasetResult = res.dataset_result.get(DatasetSplitType.test)
if validate is not None:
conv_epoch: int = validate.statistics['n_epoch_converged']
ver: EpochResult = validate.converged_epoch
else:
conv_epoch = None
ver: EpochResult = None
if test is not None:
vm: Metrics = ver.metrics
tm: Metrics = None
if not test.is_ended:
# production models will not have test results
logger.warning(
f'no test results found for {arch_res}--not reporting')
else:
tm = test.metrics
features = ', '.join(res.decoded_attributes)
row: List[Any] = [res.name, fname, train.start_time, train.end_time,
test.start_time, test.end_time,
conv_epoch, features]
if tm is None:
row.extend([float('nan')] * 10)
else:
row.extend([
tm.weighted.f1, tm.weighted.precision, tm.weighted.recall,
tm.micro.f1, tm.micro.precision, tm.micro.recall,
tm.macro.f1, tm.macro.precision, tm.macro.recall,
tm.accuracy])
if self.include_validation:
row.extend([
vm.weighted.f1, vm.weighted.precision,
vm.weighted.recall,
vm.micro.f1, vm.micro.precision, vm.micro.recall,
vm.macro.f1, vm.macro.precision, vm.macro.recall,
vm.accuracy])
row.extend([
train.statistics[dpt_key], validate.statistics[dpt_key],
test.statistics[dpt_key]])
if logger.isEnabledFor(logging.INFO):
logger.info('result calculation complete for ' +
f'{res.name} ({fname})')
return row
@property
def dataframe(self) -> pd.DataFrame:
"""Return the summarized results (see class docs).
:return: the Pandas dataframe of the results
"""
rows: List[List[Any]] = []
res_stash: Stash = self.result_manager.results_stash
n_res: int = len(res_stash)
cols = 'name resid train_start train_end test_start test_end converged features'.split()
cols.extend(PredictionsDataFrameFactory.TEST_METRIC_COLUMNS)
if self.include_validation:
cols.extend(PredictionsDataFrameFactory.VALIDATION_METRIC_COLUMNS)
cols.extend('train_occurs validation_occurs test_occurs'.split())
if n_res == 0:
logger.warning(f'no results found in: {self.result_manager}')
arch_res: ArchivedResult
for fname, arch_res in self._get_archive_results():
rows.append(self._add_rows(fname, arch_res))
return pd.DataFrame(rows, columns=cols)
def _create_data_frame_describer(self, df: pd.DataFrame,
desc: str = 'Summary Model Results',
metric_metadata: Dict[str, str] = None) \
-> DataFrameDescriber:
mdesc: Dict[str, str] = dict(
PredictionsDataFrameFactory.METRIC_DESCRIPTIONS)
if metric_metadata is not None:
mdesc.update(metric_metadata)
meta: Tuple[Tuple[str, str], ...] = tuple(map(
lambda c: (c, mdesc[c]), df.columns))
return DataFrameDescriber(
name=FileTextUtil.normalize_text(desc),
df=df,
desc=f'{self.result_manager.name.capitalize()} {desc}',
meta=meta)
@property
def dataframe_describer(self) -> DataFrameDescriber:
"""Get a dataframe describer of metrics (see :obj:`dataframe`)."""
return self._create_data_frame_describer(df=self.dataframe)
def _cross_validate_summary(self) -> DataFrameDescriber:
from zensols.dataset import StratifiedCrossFoldSplitKeyContainer
def map_name(name: str, axis: int) -> str:
p: parse.Result = parse.parse(fold_format, name)
# TODO: iter_ix -> repeat_ix
return pd.Series((int(p['fold_ix']), int(p['iter_ix'])))
fold_format: str = StratifiedCrossFoldSplitKeyContainer.FOLD_FORMAT
test_cols: List[str, ...] = \
list(PredictionsDataFrameFactory.TEST_METRIC_COLUMNS)
val_cols: List[str, ...] = \
list(PredictionsDataFrameFactory.VALIDATION_METRIC_COLUMNS)
test_cols.append('test_occurs')
val_cols.append('validation_occurs')
df: pd.DataFrame = self.dataframe.drop(columns=test_cols).\
rename(columns=dict(zip(val_cols, test_cols)))
fold_cols: List[str] = ['fold', 'repeat']
cols: List[str] = df.columns.to_list()
df[fold_cols] = df['name'].apply(map_name, axis=1)
df = df[fold_cols + cols]
df = df.drop(columns=['name'])
df = df.sort_values(fold_cols)
dfd: DataFrameDescriber = self._create_data_frame_describer(
df=df,
desc='Cross Validation Results',
metric_metadata=(('fold', 'fold number'),
('repeat', 'sub-fold repeat')))
return dfd
def _calc_t_ci(self, data: np.ndarray) -> Tuple[float, float]:
"""Compute the mean 95% confidence interval assuming a normal
distribution of scores treating the scores as a mean distribution.
:param data: the sample
:param confidence: the interval to use for confidence
:return: the confidence interval
:see: :obj:`cross_validate_describer` for calculation reference
:see: `Definition: <https://en.wikipedia.org/wiki/Confidence_interval>`_
:see: `Mean distribution: <https://sebastianraschka.com/blog/2022/confidence-intervals-for-ml.html>`_
"""
# standard reporting alpha
confidence: float = 0.95
# ensure correct computation
data: np.ndarray = data.astype(float)
# sample size
n: int = len(data)
# mu
m: float = np.mean(data)
# standard error on the distribution of means
se_dist: float = scipy.stats.sem(data)
# revert the mean computation denominator since our sores are already
# a distribution of means
se: float = se_dist * math.sqrt(n)
# compute the t-value from the t-distribution
t_value: float = scipy.stats.t.ppf((1 + confidence) / 2., df=n - 1)
# margin of error
ci_len: float = se * t_value
return m - ci_len, m + ci_len
def _get_metadata(self, df: pd.DataFrame) -> Dict[str, int]:
return {'folds': df['fold'].max().item() + 1,
'repeats': df['repeat'].max().item() + 1}
def _cross_validate_stats(self, dfd_res: DataFrameDescriber) -> \
DataFrameDescriber:
cols: List[str] = list(PredictionsDataFrameFactory.TEST_METRIC_COLUMNS)
cvm: Dict[str, int] = self._get_metadata(dfd_res.df)
df: pd.DataFrame = dfd_res.df[cols]
rows: List[pd.Series] = []
index_meta: Dict[str, str] = OrderedDict()
dfd_desc: str = (f"{cvm['folds']}-Fold Cross {cvm['repeats']} " +
'with Repeat(s) Validation Statistics')
stat: str
for stat in 'mean min max std'.split():
row: pd.Series = getattr(df, stat)()
row.name = stat
rows.append(row.to_frame().T)
index_meta[stat] = f'the {stat} of the performance metric'
ci_meths: Tuple[Tuple[str, str, Callable]] = (
('t-ci', 't-distribution', self._calc_t_ci),)
for (name, desc, ci_fn) in ci_meths:
cis: List[Tuple[float, float]] = []
col_data: pd.Series
for col_data in map(lambda t: t[1], df.items()):
ci_min, ci_max = ci_fn(col_data.to_numpy())
# ci_max may be greater than 1, but it doesn't make sense to
# *report* it as such
ci_max = min(ci_max, 1.)
cis.append((ci_min, ci_max))
row: pd.Series = pd.Series(data=cis, index=df.columns, name=name)
rows.append(row.to_frame().T)
index_meta[name] = desc
dfs: pd.DataFrame = pd.concat(rows)
dfs.insert(0, 'stat', list(index_meta.keys()))
dfs.index.name = 'description'
meta: pd.DataFrame = dfd_res.meta[dfd_res.meta.index.isin(df.columns)]
meta = pd.concat((
pd.DataFrame([{'description': 'aggregate statistic'}],
index=['stat']),
meta))
return DataFrameDescriber(
name='cross-validation-stats',
df=dfs,
desc=f'{self.result_manager.name.capitalize()} {dfd_desc}',
meta=meta,
index_meta=index_meta)
@property
def cross_validate_describer(self) -> DataDescriber:
"""A data describer with the results of a cross-validation.
The describer returned includes the metrics for each fold and summary
statitics for all folds. The statistics
:class:`~zensols.datdesc.desc.DataFrameDescriber` (describer with name
``cross-validation-stats``) contain the following 95% mean confidence
interval calculations given in the ``stat`` row:
* ``t-ci``: use the t-scores from a t-distribution (assumes a normal
distribution across scores)
"""
dfd_sum: DataFrameDescriber = self._cross_validate_summary()
dfd_stat: DataFrameDescriber = self._cross_validate_stats(dfd_sum)
return DataDescriber(
name='summary-model',
describers=(dfd_sum, dfd_stat))
[docs]
def dump(self, path: Path) -> pd.DataFrame:
"""Create the summarized results and write them to the file system.
"""
with time(f'wrote results summary: {path}'):
df: pd.DataFrame = self.dataframe
df.to_csv(path)
return df