JA_Hybrid_BiGRU.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:NANHM-for-GEC 作者: shinochin 项目源码 文件源码
def load_2data(s_path, t_path):
    n_lines = count_lines(s_path)
    m_lines = count_lines(t_path)
    assert n_lines==m_lines
    #bar = progressbar.ProgressBar(n_lines-1)
    source = []
    target = []
    print('loading...: %s' % s_path)
    i = 0
    f = open(s_path)
    g = open(t_path)
    so = f.readline()
    ta = g.readline()
    while so:
        #bar.update(i)
        s_words = so.strip().split()
        t_words = ta.strip().split()
        s = np.array(s_words)
        t = np.array(t_words)
        s_len = len(s_words)
        t_len = len(t_words)
        if s_len == t_len:
            if (s!=t).any():
                #KNOWN(1)?UNK(1)???
                #Hybrid????UNK?????KNOWN???
                t=t
        elif s_len < t_len:
            #????1????????
            t = t
        elif s_len > t_len:
            #[???] -> [?,??]
            for p in range(t_len):
                if s_words[p] != t_words[p]:
                    t = split_unk(t, p)
                    dif = s_len - t_len
                    s = concat_unk(s, p, dif)
                    break
        target.append(t)
        source.append(s)
        if i %100==0:
            print("#target:"," ".join(list(t)))
            print("#source:"," ".join(list(s)))
        so = f.readline()
        ta = g.readline()
        i += 1
    print('...loading completed')
    return source, target
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号