def vector_loader_modify(text_field_words):
# load word2vec_raw
path = 'word_embedding/glove.6B.300d.txt'
words = []
words_dict = {}
file = open(path, 'rt', encoding='utf-8')
lines = file.readlines()
t = 300
for line in lines:
line_split = line.split(' ')
word = line_split[0]
nums = line_split[1:]
nums = [float(e) for e in nums]
# data.append(line_list)
words.append(word)
words_dict[word] = nums
uniform = np.random.uniform(-0.1, 0.1, t).round(6).tolist() # uniform distribution U(a,b).????
# match
count_list2 = []
count = 0
dict_cat = []
for word in text_field_words:
if word in words_dict:
count += 1
dict_cat.append(words_dict[word])
else:
# a = torch.normal(mean=0.0, std=torch.arange(0.09, 0, -0.09))
dict_cat.append(uniform)
count += 1
count_list2.append(count - 1)
# count_data = len(text_field_words) - len(count_list2)
# # modify uniform
# sum = []
# for j in range(t):
# sum_col = 0.0
# for i in range(len(dict_cat)):
# sum_col += dict_cat[i][j]
# sum_col = float(sum_col / count_data)
# sum_col = round(sum_col, 6)
# sum.append(sum_col)
# sum_none = []
# for i in range(t):
# sum_total = sum[i] / (len(sum) - len(count_list2))
# sum_total = round(sum_total, 6)
# sum_none.append(sum_total)
# # print(sum_none)
#
# for i in range(len(count_list2)):
# dict_cat[count_list2[i]] = sum_none
return dict_cat
word_embedding_loader.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录