merge.py 文件源码

python
阅读 49 收藏 0 点赞 0 评论 0

项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码
def _factorize_keys(lk, rk, sort=True):
    if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk):
        lk = lk.values
        rk = rk.values
    if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
        klass = _hash.Int64Factorizer
        lk = com._ensure_int64(com._values_from_object(lk))
        rk = com._ensure_int64(com._values_from_object(rk))
    else:
        klass = _hash.Factorizer
        lk = com._ensure_object(lk)
        rk = com._ensure_object(rk)

    rizer = klass(max(len(lk), len(rk)))

    llab = rizer.factorize(lk)
    rlab = rizer.factorize(rk)

    count = rizer.get_count()

    if sort:
        uniques = rizer.uniques.to_array()
        llab, rlab = _sort_labels(uniques, llab, rlab)

    # NA group
    lmask = llab == -1
    lany = lmask.any()
    rmask = rlab == -1
    rany = rmask.any()

    if lany or rany:
        if lany:
            np.putmask(llab, lmask, count)
        if rany:
            np.putmask(rlab, rmask, count)
        count += 1

    return llab, rlab, count
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号