preprocess_bible.py 文件源码

python
阅读 76 收藏 0 点赞 0 评论 0

项目:lang-reps 作者: chaitanyamalaviya 项目源码 文件源码
def write_combined_file(lang_code, all_lang_paths, all_en_paths):
  src_combined_filename = "train" + "_" + lang_code + "_en."+ lang_code + ".txt"
  tgt_combined_filename = "train" + "_" + lang_code + "_en.en" + ".txt"

  if not os.path.exists(output_dir + lang_code):
    os.makedirs(output_dir+lang_code)

  write_lang = []
  write_en = []

  for corp in all_lang_paths:
    for filename in corp:
      with open(filename) as f:
        doc = f.read()
        write_lang.append(doc)

  for corp in all_en_paths:
    for filename in corp:
      with open(filename) as f:
    doc = f.read()
        write_en.append(doc)

  for doc1, doc2 in zip(write_lang, write_en):
    if len(doc1.split("\n"))!=len(doc2.split("\n")):
      continue
    else:
      with open(output_dir + lang_code + "/" + src_combined_filename, 'a') as wf:
        wf.write(doc1)
      with open(output_dir + lang_code + "/" + tgt_combined_filename, 'a') as wf:
        wf.write(doc2)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号