def load_data(from_file, input_words, grammar, max_length):
inputs = []
input_lengths = []
parses = []
labels = []
label_lengths = []
with open(from_file, 'r') as data:
for line in data:
split = line.strip().split('\t')
if len(split) == 4:
_, sentence, canonical, parse = split
else:
_, sentence, canonical = split
parse = None
input, in_len = vectorize(sentence, input_words, max_length, add_eos=False)
inputs.append(input)
input_lengths.append(in_len)
label, label_len = grammar.vectorize_program(canonical, max_length)
labels.append(label)
label_lengths.append(label_len)
if parse is not None:
parses.append(vectorize_constituency_parse(parse, max_length, in_len))
else:
parses.append(np.zeros((2*max_length-1,), dtype=np.bool))
return inputs, input_lengths, parses, labels, label_lengths
loader.py 文件源码
python
阅读 34
收藏 0
点赞 0
评论 0
评论列表
文章目录