def parts_of_speechtag(self, sentences=""):
from nltk.corpus import state_union # for importing the already stored data, to be trained with
from nltk.tokenize import PunktSentenceTokenizer # importing the already POS intelligent punkbuster tokenizer
training_text = state_union.raw("2005-GWBUSH.txt") # Training set imported from the state union local repo.
sample_text = sentences
custom_sentence_tokenized = PunktSentenceTokenizer(train_text=training_text)
# This is the unsupervised learning
tokenization_unsupervised = custom_sentence_tokenized.tokenize(str(sample_text))
# tokenizing using unsupervised learning
# print(tokenization_unsupervised) # just for hedebuggin purposes
# print(type(tokenization_unsupervised)) # checking the type of the sentences
self.processing_POS_tokenization(tokenization_unsupervised=tokenization_unsupervised)
# Calling the Process content
评论列表
文章目录