seriesanalysis.py 文件源码-python代码片段

def get_series_mean_std_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False):
    """
    Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years 
    """
    means = []
    stderrs = []
    r_word_time_series = {}
    if exclude_partial_missing:
        for word, time_series in word_time_series.iteritems():
            if not np.isnan(np.sum(time_series.values())):
                r_word_time_series[word] = time_series
    else:
        r_word_time_series = word_time_series
    for year in xrange(start_year, end_year + 1, year_inc):
        word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] 
            if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not np.isinf(r_word_time_series[word][year])])
        if len(word_array) == 0:
            continue
        if one_minus:
            word_array = 1 - word_array
        means.append(word_array.mean())
        stderrs.append(word_array.std())
    return np.array(means), np.array(stderrs)