def load_2data(s_path, t_path):
n_lines = count_lines(s_path)
m_lines = count_lines(t_path)
assert n_lines==m_lines
#bar = progressbar.ProgressBar(n_lines-1)
source = []
target = []
print('loading...: %s' % s_path)
i = 0
f = open(s_path)
g = open(t_path)
so = f.readline()
ta = g.readline()
while so:
#bar.update(i)
s_words = so.strip().split()
t_words = ta.strip().split()
s = np.array(s_words)
t = np.array(t_words)
s_len = len(s_words)
t_len = len(t_words)
if s_len == t_len:
if (s!=t).any():
#KNOWN(1)?UNK(1)???
p = np.where(s!=t)[0][0]
t = split_unk(t, p)
elif s_len < t_len:
#????1????????
t = t
elif s_len > t_len:
#[???] -> [?,??]
for p in range(t_len):
if s_words[p] != t_words[p]:
t = split_unk(t, p)
dif = s_len - t_len
s = concat_unk(s, p, dif)
break
target.append(t)
source.append(s)
if i %100==0:
print("#target:"," ".join(list(t)))
print("#source:"," ".join(list(s)))
so = f.readline()
ta = g.readline()
i += 1
print('...loading completed')
return source, target
评论列表
文章目录