__NLPMODIFIED.py 文件源码-python代码片段

__NLPMODIFIED.py 文件源码

python

阅读 41 收藏 0 点赞 0 评论 0

def parts_of_speechtag(self, sentences=""):
        from nltk.corpus import state_union  # for importing the already stored data, to be trained with
        from nltk.tokenize import PunktSentenceTokenizer  # importing the already POS intelligent punkbuster tokenizer
        training_text = state_union.raw("2005-GWBUSH.txt")  # Training set imported from the state union local repo.
        sample_text = sentences
        custom_sentence_tokenized = PunktSentenceTokenizer(train_text=training_text)
        # This is the unsupervised learning
        tokenization_unsupervised = custom_sentence_tokenized.tokenize(str(sample_text))

        # tokenizing using unsupervised learning
        # print(tokenization_unsupervised)  # just for hedebuggin purposes
        # print(type(tokenization_unsupervised))  # checking the type of the sentences

        self.processing_POS_tokenization(tokenization_unsupervised=tokenization_unsupervised)

        # Calling the Process content