def _factorize_keys(lk, rk, sort=True):
if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk):
klass = _hash.Int64Factorizer
lk = com._ensure_int64(com._values_from_object(lk))
rk = com._ensure_int64(com._values_from_object(rk))
else:
klass = _hash.Factorizer
lk = com._ensure_object(lk)
rk = com._ensure_object(rk)
rizer = klass(max(len(lk), len(rk)))
llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
count = rizer.get_count()
if sort:
uniques = rizer.uniques.to_array()
llab, rlab = _sort_labels(uniques, llab, rlab)
# NA group
lmask = llab == -1
lany = lmask.any()
rmask = rlab == -1
rany = rmask.any()
if lany or rany:
if lany:
np.putmask(llab, lmask, count)
if rany:
np.putmask(rlab, rmask, count)
count += 1
return llab, rlab, count
merge.py 文件源码
python
阅读 49
收藏 0
点赞 0
评论 0
评论列表
文章目录