Source code for zensols.nlp.stemmer
"""Stem text using the Porter stemmer.
"""
__author__ = 'Paul Landes'
from dataclasses import dataclass, field
import logging
from nltk.stem import PorterStemmer
from zensols.nlp import TokenMapper
logger = logging.getLogger(__name__)
[docs]
@dataclass
class PorterStemmerTokenMapper(TokenMapper):
"""Use the Porter Stemmer from the NTLK to stem as normalized tokens.
"""
stemmer: PorterStemmer = field(default_factory=PorterStemmer)
[docs]
def map_tokens(self, token_tups):
return (map(lambda t: (t[0], self.stemmer.stem(t[1])),
token_tups),)