main.py 文件源码-python代码片段

main.py 文件源码
python
阅读 25 收藏 0 点赞 0 评论 0
def gen_lstm_status(screen_name, timeline, short_url, depth):
    # Create a vector of words and their frequency in on the user's timeline.
    # Experimentation shows that requiring a word to occur at least 4 * depth
    # times to be considered gives good results.
    with open("stopwords.txt", 'r') as stopwords_file:
        stopwords = [line.strip() for line in stopwords_file]
    processed_timeline_text = [preprocess_post(post) for post in timeline]

    vectorizer = CountVectorizer(min_df=4*depth, stop_words=stopwords)
    X = vectorizer.fit_transform(processed_timeline_text)
    vocab = vectorizer.get_feature_names()
    topic = random.choice(vocab)

    # Generates a status using a helper bash script.
    proc = subprocess.Popen([NN_SAMPLE_COMMAND, topic], stdout=subprocess.PIPE)
    status = topic + " " + proc.stdout.read().split("\n")[-2].strip()
    return "@" + screen_name + " " + status + " " + short_url