linguistic_analysis.py 文件源码-python代码片段

def get_linguistic_analysis(user, fromFile):
    tweets = []
    if fromFile:
        tweets = get_tweets_from_file(user)
    else:
        tweets = get_max_amount_tweets(user)
    tweets = utils.remove_retweets(tweets)
    norm = [
        utils.preprocess(tweet)
        for tweet in tweets if len(utils.preprocess(tweet))
        if not utils.is_retweet(tweet)
    ]
    print("\nLinguistic Analysis of ", user, "'s tweets\n")
    print(
        "Average word length: ",
        get_average_word_characters(norm),
        " characters"
    )
    print("Average syllables per word: ", get_average_word_syllables(norm))
    print(
        "Average sentence length: ",
        get_average_sentence_length(norm),
        " words"
    )
    print("Average tweet length: ", get_average_tweet_length(norm), " words")
    print(
        "Average question marks per tweet: ",
        get_average_question_marks(tweets)
    )
    print(
        "Average exclamation marks per tweet: ",
        get_average_exclamation_marks(tweets)
    )
    print("Average flesch grade level: ", get_average_flesch_grade_level(norm))
    print("\nMost frequent 25 keywords:")
    for tag,count in get_most_frequent_keywords(norm):
        print("{}: {}".format(tag, count))