"""Interfaces used for dealing with dataset splits."""__author__='Paul Landes'fromtypingimportDict,Set,Tuplefromdataclassesimportdataclassfromabcimportabstractmethod,ABCMetaimportloggingimportsysfromioimportTextIOBasefromzensols.utilimportAPIErrorfromzensols.configimportWritablefromzensols.persistimportStash,PrimeableStashlogger=logging.getLogger(__name__)
[docs]classDatasetError(APIError):"""Thrown when any dataset related is raised."""
[docs]@dataclassclassSplitKeyContainer(Writable,metaclass=ABCMeta):"""An interface defining a container that partitions data sets (i.e. ``train`` vs ``test``). For instances of this class, that data are the unique keys that point at the data. """def_get_split_names(self)->Set[str]:returnself._get_keys_by_split().keys()def_get_counts_by_key(self)->Dict[str,int]:ks=self._get_keys_by_split()return{k:len(ks[k])forkinks.keys()}@abstractmethoddef_get_keys_by_split(self)->Dict[str,Tuple[str]]:pass@propertydefsplit_names(self)->Set[str]:"""Return the names of each split in the dataset. """returnself._get_split_names()@propertydefcounts_by_key(self)->Dict[str,int]:"""Return data set splits name to count for that respective split. """returnself._get_counts_by_key()@propertydefkeys_by_split(self)->Dict[str,Tuple[str]]:"""Generate a dictionary of split name to keys for that split. It is expected this method will be very expensive. """returnself._get_keys_by_split()
[docs]defclear(self):"""Clear any cached state."""
[docs]@dataclassclassSplitStashContainer(PrimeableStash,SplitKeyContainer,metaclass=ABCMeta):"""An interface like ``SplitKeyContainer``, but whose implementations are of ``Stash`` containing the instance data. For a default implemetnation, see :class:`.DatasetSplitStash`. """@abstractmethoddef_get_split_name(self)->str:pass@abstractmethoddef_get_splits(self)->Dict[str,Stash]:pass@propertydefsplit_name(self)->str:"""Return the name of the split this stash contains. Thus, all data/items returned by this stash are in the data set given by this name (i.e. ``train``). """returnself._get_split_name()@propertydefsplits(self)->Dict[str,Stash]:"""Return a dictionary with keys as split names and values as the stashes represented by that split. :see: :meth:`split_name` """returnself._get_splits()