def series_corr(word_year_series_1, word_year_series_2, i_year_words, start_year=1900, end_year=2000, series_1_norms=None, series_2_norms=None):
"""
Gets the per-year correlation between the two word time series.
Words are included even if they have values missing for a year, but there missing values are excluded from the year in question.
"""
year_corrs = []
year_ps = []
years = range(start_year, end_year + 1)
if start_year not in i_year_words:
i_year_words = {year:i_year_words for year in years}
if series_1_norms == None:
series_1_norms = ([0 for year in years], [1 for year in years])
if series_2_norms == None:
series_2_norms = ([0 for year in years], [1 for year in years])
for i in xrange(len(years)):
year = years[i]
s1 = []
s2 = []
for word in i_year_words[year]:
if word in word_year_series_1 and word in word_year_series_2:
if not np.isnan(word_year_series_1[word][year]) and not np.isnan(word_year_series_2[word][year]):
s1.append((word_year_series_1[word][year] - series_1_norms[0][i]) / series_1_norms[1][i])
s2.append((word_year_series_2[word][year] - series_2_norms[0][i]) / series_2_norms[1][i])
corr, p = spearmanr(s1, s2)
year_corrs.append(corr)
year_ps.append(p)
return year_corrs, year_ps
评论列表
文章目录