def rnc_markup_tokenizer(s):
"""
[rn][mod1][num][\s-]
"""
rn_re = re.compile(u"""(?P<p1>[b??#]?[ivIV]+)
(?P<p2>[^\d\s-]*)
(?P<p3>[^\s-]*)
(?P<sep>(\s*-\s*|\s*))""",
re.VERBOSE|re.UNICODE)
i = 0
retval = []
while i < len(s):
m = rn_re.match(s[i:])
if not m:
retval.append((u'ERR:%s' % s[i:], '', '', ''))
break
retval.append((m.group('p1'), m.group('p2'), m.group('p3'), m.group('sep')))
i += m.end()
return retval
评论列表
文章目录