def __init__(self, file_path, type='text', **kwargs):
self._file_path = file_path
self._type = type
self._kwargs = kwargs
self._file_handler = open(file_path, 'r')
if type == 'json_line':
# pre-compile json path, raise exception if not exists
self._id_path_parser = parse(kwargs['id_path'])
elif type == 'csv':
self._id_column = kwargs['id_column'] # raise exception if not exists
delimiter = kwargs['delimiter'] if 'delimiter' in kwargs else ','
quote_char = kwargs['quote_char'] if 'quote_char' in kwargs else '"'
quoting = kwargs['quoting'] if 'quoting' in kwargs else csv.QUOTE_MINIMAL
column_names = kwargs['column_names'] if 'column_names' in kwargs else None
self._csv_reader = csv.DictReader(
self._file_handler, delimiter=delimiter, quotechar=quote_char, quoting=quoting, fieldnames=column_names)
else: # text
self._id_prefix = hashlib.md5(file_path).hexdigest()[:6]
评论列表
文章目录