def process_id(train_file_dir,train_file_name):
# ???????????????????????????????query??????list??????????list
# train_file_dir?????????train_file_name????????
unable = open(os.path.join(train_file_dir, 'cannot_segment_query_id.txt'), 'r')
lines = unable.readlines()
unable_id = []
for line in lines:
a = line.replace("\n", "").split(" ")
unable_id.append(string.atoi(a[0]))
f = open(os.path.join(train_file_dir, train_file_name),'r')
qaid = []
for line in f:
file_dict_temp = json.loads(line)
temp_id = file_dict_temp['query_id']
if temp_id in unable_id:
continue
qaid.append(temp_id)
return qaid
评论列表
文章目录