"""Domain objects for the natural language text classification atsk."""__author__='Paul Landes'fromtypingimportTuple,Dict,Anyfromdataclassesimportdataclass,fieldimportsysfromioimportTextIOBaseimportnumpyasnpfromzensols.persistimportpersistedfromzensols.nlpimport(TokenContainer,FeatureDocument,TokenAnnotatedFeatureSentence,TokenAnnotatedFeatureDocument,)fromzensols.deeplearnimportDeepLearnErrorfromzensols.deeplearn.batchimport(DataPoint,Batch,BatchFeatureMapping,ManagerFeatureMapping,FieldFeatureMapping,)
[docs]@dataclassclassTokenContainerDataPoint(DataPoint):"""A convenience class that uses data, such as tokens, a sentence or a document (:class:`~zensols.nlp.container.TokenContainer`) as a data point. """container:TokenContainer=field()"""The token cotainer used for this data point."""@property@persisted('_doc')defdoc(self)->FeatureDocument:"""The container as a document. If it is a sentence, it will create a document with the single sentence. This is usually used by the embeddings vectorizer. """returnself.container.to_document()def_get_token_labels(self)->Tuple[Any,...]:ifisinstance(self.container,(TokenAnnotatedFeatureDocument,TokenAnnotatedFeatureSentence)):returnself.container.annotationselse:raiseDeepLearnError('Need instance of TokenAnnotatedFeature{Sentence,Document} '+f'(got {type(self.sent)}) or override _get_token_labels')@propertydeftoken_labels(self)->Tuple[Any,...]:"""The label that corresponds to each normalized token."""returnself._get_token_labels()def__len__(self)->int:"""The number or normalized tokens in the container."""returnself.container.token_len
[docs]@dataclassclassPredictionFeatureDocument(FeatureDocument):"""A feature document with a label, used for text classification. """softmax_logit:Dict[str,np.ndarray]=field(default=None)"""The document level softmax of the logits. :see: :obj:`.ClassificationPredictionMapper.softmax_logit_attribute` """
[docs]@dataclassclassLabeledFeatureDocument(PredictionFeatureDocument):"""A feature document with a label, used for text classification. """label:str=field(default=None)"""The document level classification gold label."""pred:str=field(default=None)"""The document level prediction label. :see: :obj:`.ClassificationPredictionMapper.pred_attribute` """
[docs]@dataclassclassLabeledFeatureDocumentDataPoint(TokenContainerDataPoint):"""A representation of a data for a reivew document containing the sentiment polarity as the label. """@propertydeflabel(self)->str:"""The label for the textual data point."""returnself.doc.label