def fuzzy_feats(train_in, test_in, qcolumns = ['question1', 'question2'], append=''):
from fuzzywuzzy import fuzz
import pandas as pd
train = train_in.copy().loc[:,qcolumns]
test = test_in.copy().loc[:,qcolumns]
train['fuzz_r'+append] = train.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
train['fuzz_pr'+append] = train.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
train['fuzz_tsr'+append] = train.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
train['fuzz_tsor'+append] = train.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
test['fuzz_r'+append] = test.apply(lambda x: fuzz.ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
test['fuzz_pr'+append] = test.apply(lambda x: fuzz.partial_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
test['fuzz_tsr'+append] = test.apply(lambda x: fuzz.partial_token_set_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
test['fuzz_tsor'+append] = test.apply(lambda x: fuzz.partial_token_sort_ratio(x[qcolumns[0]],x[qcolumns[1]]), axis = 1)
return (train, test)
评论列表
文章目录