Source code for zensols.dataframe.config
"""Configuration classes using dataframes as sources.
"""
__author__ = 'Paul Landes'
from typing import Dict, Any
from pathlib import Path
import pandas as pd
from zensols.config import DictionaryConfig
[docs]
class DataframeConfig(DictionaryConfig):
"""A :class:`~zensols.config.Configurable` that dataframes as sources. This
is useful for providing labels to nominial label vectorizers.
"""
[docs]
def __init__(self, csv_path: Path, default_section: str,
columns: Dict[str, str] = None, column_eval: str = None,
counts: Dict[str, str] = None):
"""Initialize the configuration from a dataframe (see parameters).
:param csv_path: the path to the CSV file to create the dataframe
:param default_section: the singleton section name, which has as options
a list of the columns of the dataframe
:param columns: the columns to add to the configuration from the
dataframe with ``key, values`` as ``column names, option
names``
:param column_eval: Python code to evaluate and apply to each column if
provided
:param counts: additional option entries in the section to add as counts
of respective columns with ``key, values`` as ``column
option names, new entry option names; where the ``column
option names`` are those given as values from the
``columns`` :class:`dict`
"""
df: pd.DataFrame = pd.read_csv(csv_path)
sec: Dict[str, Any] = {}
if columns is None:
columns = dict(map(lambda x: (x, x), df.columns))
col_name: str
for df_col, sec_name in columns.items():
col: pd.Series = df[df_col]
if column_eval is not None:
col = eval(column_eval)
if isinstance(col, pd.Series):
col = col.tolist()
sec[sec_name] = col
if counts is not None:
for col_name, sec_name in counts.items():
sec[sec_name] = len(sec[col_name])
super().__init__(config={default_section: sec},
default_section=default_section)