def match_contractors(contractors_file, match_file, match_col, match_threshold):
results = []
with open(match_file, 'r') as f:
with open(contractors_file, 'r') as g:
contracts = []
contribs_reader = csv.reader(f)
contracts_reader = csv.reader(g)
next(contracts_reader)
for row in contracts_reader:
contracts.append(row)
header = next(contribs_reader)
for row in contribs_reader:
best_match = ''
best_match_amount = -1
best_score = 0
for contract in contracts:
translator = str.maketrans('', '', string.punctuation)
contractor_name = contract[0].translate(translator).lower()
match_name = row[match_col].translate(translator).lower()
score = fuzz.ratio(match_name, contractor_name)
if score > best_score and score > match_threshold:
best_match = contract[0]
best_score = score
best_match_amount = contract[4]
new_row = row + [best_match, best_match_amount]
results.append(new_row)
return results
评论列表
文章目录