def __new__(cls, source_column, boundaries):
if not isinstance(source_column, _RealValuedColumn):
raise TypeError("source_column must be an instance of _RealValuedColumn. "
"source_column: {}".format(source_column))
if not isinstance(boundaries, list) or not boundaries:
raise ValueError("boundaries must be a non-empty list. "
"boundaries: {}".format(boundaries))
# We allow bucket boundaries to be monotonically increasing
# (ie a[i+1] >= a[i]). When two bucket boundaries are the same, we
# de-duplicate.
sanitized_boundaries = []
for i in range(len(boundaries) - 1):
if boundaries[i] == boundaries[i + 1]:
continue
elif boundaries[i] < boundaries[i + 1]:
sanitized_boundaries.append(boundaries[i])
else:
raise ValueError("boundaries must be a sorted list. "
"boundaries: {}".format(boundaries))
sanitized_boundaries.append(boundaries[len(boundaries) - 1])
return super(_BucketizedColumn, cls).__new__(cls, source_column,
tuple(sanitized_boundaries))
评论列表
文章目录