stemming.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:tashaphyne 作者: linuxscout 项目源码 文件源码
def __init__(self):
        #load affix information
        # pass
        self.prefix_letters = stem_const.DEFAULT_PREFIX_LETTERS
        self.suffix_letters = stem_const.DEFAULT_SUFFIX_LETTERS
        self.infix_letters = stem_const.DEFAULT_INFIX_LETTERS
        self.max_prefix_length = stem_const.DEFAULT_MAX_PREFIX
        self.max_suffix_length = stem_const.DEFAULT_MAX_SUFFIX
        self.min_stem_length = stem_const.DEFAULT_MIN_STEM
        self.joker = stem_const.DEFAULT_JOKER
        self.prefix_list = stem_const.DEFAULT_PREFIX_LIST
        self.suffix_list = stem_const.DEFAULT_SUFFIX_LIST
        self.word = u""
        self.unvocalized = u""
        self.normalized = u""
        self.starword = u""
        self.root = u""
        self.left = 0
        self.right = 0
        self.segment_list = []
        #token pattern
        # letters and harakat
        self.token_pat = re.compile(r"[^\w\u064b-\u0652']+", re.UNICODE)
        self.prefixes_tree = self._create_prefix_tree(self.prefix_list)
        self.suffixes_tree = self._create_suffix_tree(self.suffix_list)
    ######################################################################
    #{ Attribut Functions
    ######################################################################
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号