bookstopher.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:ewe_ebooks 作者: jaymcgrath 项目源码 文件源码
def __init__(self, body, author='Anonymous'):

        # accumulators
        hashtags = []

        # Now process cleaned up text with NLTK
        words = []
        bigrams = []
        trigrams = []
        quadgrams = []
        sentences = []


        words = word_tokenize(body)

        sentences.extend(sent_tokenize(body))

        # Strip whitespace from each sentence
        sentences = [sentence.strip() for sentence in sentences]

        bigrams = ngrams(body, 2)
        trigrams = ngrams(body, 3)
        quadgrams = ngrams(body, 4)

        self.body = body
        self.words = words
        self.bigrams = bigrams
        self.trigrams = trigrams
        self.quadgrams = quadgrams
        self.sentences = sentences
        self.hashtags = hashtags
        self.author = author

        #TODO: Create "hashtags" from arbitrary number of rarest words
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号