"""Contain a class to add embeddings to AMR feature documents."""__author__='Paul Landes'fromtypingimportDict,Tuple,Anyfromdataclassesimportdataclass,fieldimportloggingfromzensols.utilimporttimefromzensols.persistimportDelegateStash,PrimeableStashfromzensols.amrimportAmrSentence,AmrFeatureDocumentfromzensols.amr.annotateimport(AnnotatedAmrFeatureDocumentFactory,AnnotatedAmrDocument,AnnotatedAmrSectionDocument)fromzensols.deepnlp.transformerimport(WordPieceFeatureDocumentFactory,WordPieceFeatureDocument)logger=logging.getLogger(__name__)
[docs]@dataclassclassAddEmbeddingsFeatureDocumentStash(DelegateStash,PrimeableStash):"""Add embeddings to AMR feature documents. Embedding population is disabled by configuring :obj:`word_piece_doc_factory` as ``None``. """word_piece_doc_factory:WordPieceFeatureDocumentFactory=field(default=None)"""The feature document factory that populates embeddings."""
[docs]@dataclassclassCalamrAnnotatedAmrFeatureDocumentFactory(AnnotatedAmrFeatureDocumentFactory):"""Adds wordpiece embeddings to :class:`~zensols.amr.container.AmrFeatureDocument` instances. """word_piece_doc_factory:WordPieceFeatureDocumentFactory=field(default=None)"""The feature document factory that populates embeddings."""def_populate_embeddings(self,doc:AmrFeatureDocument):"""Adds the transformer sentinel embeddings to the document."""ifself.word_piece_doc_factoryisnotNone:wpdoc:WordPieceFeatureDocument=self.word_piece_doc_factory(doc)wpdoc.copy_embedding(doc)
[docs]@dataclassclassProxyReportAnnotatedAmrDocument(AnnotatedAmrDocument):"""Overrides the sections property to skip duplicate summary sentences also found in the body. """@propertydefsections(self)->Tuple[AnnotatedAmrSectionDocument]:"""The sentences that make up the body of the document."""deffilter_sents(s:AmrSentence)->bool:returns.textnotinsum_sentssum_sents=set(map(lambdas:s.text,self.summary))secs=super().sectionsforsecinsecs:sec.sents=tuple(filter(filter_sents,sec.sents))returnsecs