seriesanalysis.py 文件源码-python代码片段

def series_corr(word_year_series_1, word_year_series_2, i_year_words, start_year=1900, end_year=2000, series_1_norms=None, series_2_norms=None):
    """
    Gets the per-year correlation between the two word time series.
    Words are included even if they have values missing for a year, but there missing values are excluded from the year in question.
    """
    year_corrs = []
    year_ps = []
    years = range(start_year, end_year + 1)
    if start_year not in i_year_words:
        i_year_words = {year:i_year_words for year in years}
    if series_1_norms == None:
        series_1_norms = ([0 for year in years], [1 for year in years])
    if series_2_norms == None:
        series_2_norms = ([0 for year in years], [1 for year in years])
    for i in xrange(len(years)):
        year = years[i]
        s1 = []
        s2 = []
        for word in i_year_words[year]:
            if word in word_year_series_1 and word in word_year_series_2:
                if not np.isnan(word_year_series_1[word][year]) and not np.isnan(word_year_series_2[word][year]):
                    s1.append((word_year_series_1[word][year] - series_1_norms[0][i]) / series_1_norms[1][i])
                    s2.append((word_year_series_2[word][year] - series_2_norms[0][i]) / series_2_norms[1][i])
        corr, p = spearmanr(s1, s2)
        year_corrs.append(corr)
        year_ps.append(p)
    return year_corrs, year_ps