def get_series_median_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=10, exclude_partial_missing=False):
"""
Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years
"""
medians = []
r_word_time_series = {}
if exclude_partial_missing:
for word, time_series in word_time_series.iteritems():
if not np.isnan(np.sum(time_series.values())):
r_word_time_series[word] = time_series
else:
r_word_time_series = word_time_series
for year in xrange(start_year, end_year + 1, year_inc):
word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year]
if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not r_word_time_series[word][year] == 0])
if len(word_array) == 0:
continue
if one_minus:
word_array = 1 - word_array
medians.append(np.median(word_array))
return np.array(medians)
评论列表
文章目录