def get_series_mean_std_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False):
"""
Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years
"""
means = []
stderrs = []
r_word_time_series = {}
if exclude_partial_missing:
for word, time_series in word_time_series.iteritems():
if not np.isnan(np.sum(time_series.values())):
r_word_time_series[word] = time_series
else:
r_word_time_series = word_time_series
for year in xrange(start_year, end_year + 1, year_inc):
word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year]
if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not np.isinf(r_word_time_series[word][year])])
if len(word_array) == 0:
continue
if one_minus:
word_array = 1 - word_array
means.append(word_array.mean())
stderrs.append(word_array.std())
return np.array(means), np.array(stderrs)
评论列表
文章目录