def translateHinglishTweets(tweets_text):
counter = 0
tweets_text_translated = []
n = len(tweets_text)
open_file = open("dictionary.pickle", "rb")
dictionary = pickle.load(open_file)
open_file.close()
english_stopwords_set = set(stopwords.words('english'))
for i in range(n):
text = tweets_text[i]
translated_text = ""
tokens = wordpunct_tokenize(text)
words = [word.lower() for word in tokens]
for word in words:
if word in english_stopwords_set:
translated_text = translated_text + " " + word
elif (word in dictionary):
#print word + "-" + dictionary[word]
translated_text = translated_text + " " + dictionary[word]
counter = counter + 1
else:
translated_text = translated_text + " " + word
tweets_text_translated.append(translated_text)
#print counter
return tweets_text_translated
analyzeTweets.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录