def lines_from_sourcepairs(pairs, numlines=2, enc='utf-8'):
# type: (List[BytesPair], int, str) -> Tuple[List[str], List[str]]
def safeunistr(s):
# type: (bytes) -> str
# The bytes 0 and 1 that appear in the intermediate result of
# difflib.HtmlDiff.make_table are replaced by opening and closing span tags.
# If the input to make_table already contains 0 and 1 bytes we get mismatched
# span tags.
# We use '\x02' as escape character and encode '\x00', '\x01', '\x02' as
# '\x02' followed by the digit 0, 1, 2 respectively.
def escape_zeroonetwo(m):
return b'\x02' + int2byte(ord('0') + ord(m.group(0)))
s = re.sub(b'[\x00-\x02]', escape_zeroonetwo, s)
return surrdecode(s, enc=enc)
a = [a for a, _ in pairs]
b = [b for _, b in pairs]
a = concat_sources(a, numlines=numlines).splitlines(True)
b = concat_sources(b, numlines=numlines).splitlines(True)
atext = list(map(safeunistr, a))
btext = list(map(safeunistr, b))
return atext, btext
# yapf: disable
# ----------------------------------------------------------------------
# http://stackoverflow.com/questions/1707890/
# fast-way-to-filter-illegal-xml-unicode-chars-in-python
评论列表
文章目录