def map_item2id(items, voc, max_len, none_word=1, lower=False, init_value=0, allow_error=True):
"""
?word/pos????id
Args:
items: list, ?????
voc: ??
max_len: int, ??????
none_word: ??????,???0
lower: bool, ???????
init_value: default is 0, ?????
Returns:
arr: np.array, dtype=int32, shape=[max_len,]
"""
assert type(none_word) == int
arr = np.zeros((max_len,), dtype='int32') + init_value
min_range = min(max_len, len(items))
for i in range(min_range): # ?items????max_len?????
item = items[i] if not lower else items[i].lower()
if allow_error:
arr[i] = voc[item] if item in voc else none_word
else:
arr[i] = voc[item]
return arr
评论列表
文章目录