outliers.py 文件源码-python代码片段

outliers.py 文件源码

python

阅读 47 收藏 0 点赞 0 评论 0

项目：py-hadoop-tutorial 作者: hougs 项目源码文件源码

def to_series(tuples):
    """Transforms a list of tuples of the form (date, count) in to a pandas
    series indexed by dt.
    """
    cleaned_time_val_tuples = [tuple for tuple in tuples if not (
        tuple[0] is pd.NaT or tuple[1] is None)]
    if len(cleaned_time_val_tuples) > 0:
        # change list of tuples ie [(a1, b1), (a2, b2), ...] into
        # tuple of lists ie ([a1, a2, ...], [b1, b2, ...])
        unzipped_cleaned_time_values = zip(*cleaned_time_val_tuples)
        # just being explicit about what these are
        counts = unzipped_cleaned_time_values[1]
        timestamps = unzipped_cleaned_time_values[0]
        # Create the series with a sorted index.
        ret_val = pd.Series(counts, index=timestamps).sort_index()
    else:
        ret_val = None
    return ret_val


# In[ ]: