"""Create Pandas dataframes from features. This must be imported by absolutemodule (:mod:`zensols.nlp.dataframe`)."""__author__='Paul Landes'fromtypingimportSet,List,Tuplefromdataclassesimportdataclass,fieldimportpandasaspdfromzensols.nlpimportFeatureToken,FeatureDocument
[docs]@dataclassclassFeatureDataFrameFactory(object):"""Creates a Pandas dataframe of features from a document annotations. Each feature ID is given a column in the output :class:`pandas.DataFrame`. """token_feature_ids:Set[str]=field(default=(FeatureToken.FEATURE_IDS|{'text'}))"""The feature IDs to add to the :class:`pandas.DataFrame`."""priority_feature_ids:Tuple[str,...]=field(default=FeatureToken.WRITABLE_FEATURE_IDS)"""Feature IDs that are used first in the column order in the output :class:`pandas.DataFrame`. """def__call__(self,doc:FeatureDocument)->pd.DataFrame:fids=self.token_feature_idscols:List[str]=list(filter(lambdan:ninfids,self.priority_feature_ids))cols.extend(sorted(fids-set(cols)))rows=[]forsix,sentinenumerate(doc.sents):fortokinsent:feats=tok.asdict()if'text'notinfeats:feats['text']=feats['norm']rows.append(tuple(map(lambdaf:feats.get(f),cols)))returnpd.DataFrame(rows,columns=cols)