__init__.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:juicer 作者: eubr-bigsea 项目源码 文件源码
def masking_gen(attribute_name, details):
    """
    Apply masking to a RDD of rows. Rows are first grouped by key in order to
    have rows with same value for the column available at same time (if the
    value is the same, the mask will be the same).
    @FIXME: Define a good size for partitions / groups (for instance use part
    of string or range of numbers, but it depends on the data type).
    """
    def masking(group):
        from faker import Factory
        faker_obj = Factory.create(details.get('lang', 'en_GB'))
        faker_obj.seed(random.randint(0, 100000))

        if not hasattr(faker_obj, details.get('label_type', 'name')):
            raise ValueError(_('Invalid masking type: {}').format(
                details.get('label_type')))

        action = getattr(faker_obj, details.get('label_type', 'name'))
        faker_ctx = {}

        result = []
        for row in group[1]:
            as_dict = row.asDict()
            value = as_dict.get(attribute_name)
            if value in faker_ctx:
                new_value = faker_ctx.get(value)
            else:
                new_value = action(**details.get('label_args', {}))
                faker_ctx[value] = new_value

            as_dict[attribute_name] = new_value
            result.append(as_dict)
        return result

    return masking
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号