def __convert_csv_to_coocur_dict(self, res_folder):
import csv, win32api
import platform
cooccurrence_counts = defaultdict(float)
new_words = []
res_folder_gen = [label_folder for label_folder in os.listdir(res_folder) if label_folder[:2] != SUCCESS_HEADER]
for label_folder in res_folder_gen:
csv_gen = [csv_fname for csv_fname in os.listdir(os.path.join(res_folder, label_folder)) if csv_fname[-3:] == 'csv']
for csv_fname in csv_gen:
if any(platform.win32_ver()):
csv_file = win32api.GetShortPathName(os.path.join(win32api.GetShortPathName(res_folder), label_folder, csv_fname))
else:
csv_file = os.path.join(res_folder, label_folder, csv_fname)
reader = csv.DictReader(open(csv_file), fieldnames=['tgt_word', 'ctx_word', 'coor_val'])
for row in reader:
target_word = row['tgt_word']
context_word = row['ctx_word']
print(row['tgt_word'])
if (self.__embeddings is None or target_word not in self.__embeddings.keys()) and target_word not in new_words:
new_words.append(target_word)
if (self.__embeddings is None or context_word not in self.__embeddings.keys()) and context_word not in new_words:
new_words.append(context_word)
cooccurrence_counts[(target_word, context_word)] = row['coor_val']
self.__new_words = new_words
return cooccurrence_counts
评论列表
文章目录