def gen_lstm_status(screen_name, timeline, short_url, depth):
# Create a vector of words and their frequency in on the user's timeline.
# Experimentation shows that requiring a word to occur at least 4 * depth
# times to be considered gives good results.
with open("stopwords.txt", 'r') as stopwords_file:
stopwords = [line.strip() for line in stopwords_file]
processed_timeline_text = [preprocess_post(post) for post in timeline]
vectorizer = CountVectorizer(min_df=4*depth, stop_words=stopwords)
X = vectorizer.fit_transform(processed_timeline_text)
vocab = vectorizer.get_feature_names()
topic = random.choice(vocab)
# Generates a status using a helper bash script.
proc = subprocess.Popen([NN_SAMPLE_COMMAND, topic], stdout=subprocess.PIPE)
status = topic + " " + proc.stdout.read().split("\n")[-2].strip()
return "@" + screen_name + " " + status + " " + short_url
评论列表
文章目录