def parse_translation_table(self, table):
""" Overrides GeneralParser's method.
:param table: a Tag object. Not necessary a table; can be a div.
:return: (translation, language_name, language_code)
"""
# go through all "li" elements in a table
for li in table.find_all('li'):
if not isinstance(li, Tag):
continue
text = li.get_text().split(':')
if len(text) < 2:
continue
# language name is before ":"
lang_name = text[0]
# language code is usually in super script
lang_code = li.find(class_="trad-sup-code")
if lang_code:
lang_code = lang_code.text.strip()[1:-1]
else:
lang_code = ""
# There are two functions that removes parentheses. Not sure which one to use.
t = remove_parenthesis(text[1])
trans_list = re.split(COMMA_OR_SEMICOLON, t)
# each "trans" is: translation <sup>(lang_code)</sup> (transliteration)
# lang_code and transliteration may not exist
for trans in trans_list:
translation = trans.split('(')[0].strip()
yield (translation, lang_name.strip(), lang_code)
parse_nl.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录