def rm_doubled_quotes(entry):
"""Some entries have doubled quotes (translations) within different senses.
Remove the doubled quotes.
This function return True, if the entry has been modified."""
senses = list(findall(entry, 'sense'))
# add quote elements
senses = [(cit, q) for s in senses for cit in findall(s, 'cit')
for q in findall(cit, 'quote')]
if len(senses) <= 1:
return
changed = False
# pair each sense with another and compare their content
for trans1, trans2 in itertools.combinations(senses, 2):
# could have been removed already, so check:
cit1, quote1 = trans1
cit2, quote2 = trans2
if not cit1.findall(quote1.tag) or not cit2.findall(quote2.tag) \
and cit1 is not cit2:
continue # one of them has been removed already
# text of both quotes match, remove second quote
if quote1.text == quote2.text:
cit2.remove(quote2)
changed = True
return changed
评论列表
文章目录