def get_special_case_params():
# The windows and unix specific tests should be tested on both unix and Windows to detect crashes.
special_cases = [
[u"noexist", u"noexist"],
[u"tests/ascii/ex1", u"noexist"],
[u"noexist", u"tests/ascii/ex1"],
[u"tests/ascii/ex1", u"tests/ascii/ex1", "--outfile", "/dev/null"],
[u"tests/ascii/ex1", u"tests/ascii/ex2"],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4"],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/ex3", u"tests/utf_8/ex4", u"--oldfile-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\""],
[u"tests/ascii/ex5", u"tests/ascii/ex6"],
[u"tests/ascii/ex7", u"tests/ascii/ex8"],
[u"tests/ascii/a.json", u"tests/ascii/b.json"],
[u"tests/ascii/a.json", u"tests/ascii/b.json", u"--push-delimiters", u"\"{\"", u"\"[\"", u"--pop-delimiters", u"\"}\"", u"\"]\"", u"--include-delimiters"],
[u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"???", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/fancy1", u"tests/utf_8/fancy2", u"--delimiters", u"\"\\u65e5\\u672c\\u56fd\"", u"--include-delimiters", u"--parameters-encoding", u"\"utf-8\"", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-8\"", u"--oldfile-encoding", u"\"utf-8\""],
[u"tests/utf_8/this-is-encoded-in-utf-8", u"tests/utf_16/this-is-encoded-in-utf-16", u"--output-encoding", u"\"utf-8\"", u"--newfile-encoding", u"\"utf-16\"", u"--oldfile-encoding", u"\"utf-8\"", u"--enable-mark"],
[u"tests/ascii/a.html", u"tests/ascii/b.html", u"-m", u"html"]
]
return special_cases[random.randint(0, len(special_cases)-1)]
python类html()的实例源码
unit_tests.py 文件源码
项目:roberteldersoftwarediff
作者: RobertElderSoftware
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def error_handler(error):
"""Error handler for surrogateescape decoding.
Should be used with an ASCII-compatible encoding (e.g., 'latin-1' or 'utf-8').
Replaces any invalid byte sequences with surrogate code points.
As specified in
https://docs.python.org/2/library/codecs.html#codecs.register_error.
"""
# We can't use this with UnicodeEncodeError; the UTF-8 encoder doesn't raise
# an error for surrogates. Instead, use encode.
if not isinstance(error, UnicodeDecodeError):
raise error
result = []
for i in range(error.start, error.end):
byte = ord(error.object[i])
if byte < 128:
raise error
result.append(unichr(0xdc00 + byte))
return ''.join(result), error.end
def from_path(path, manager=None, allow_nested=False, citation_clearing=True, encoding='utf-8', **kwargs):
"""Loads a BEL graph from a file resource. This function is a thin wrapper around :func:`from_lines`.
:param str path: A file path
:param manager: database connection string to cache, pre-built :class:`Manager`, or None to use default cache
:type manager: None or str or pybel.manager.Manager
:param bool allow_nested: if true, turn off nested statement failures
:param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
Delegated to :class:`pybel.parser.ControlParser`
:param str encoding: the encoding to use when reading this file. Is passed to :code:`codecs.open`.
See the python `docs <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ for a
list of standard encodings. For example, files starting with a UTF-8 BOM should use
:code:`utf_8_sig`
:param dict kwargs: keyword arguments to :func:`pybel.io.line_utils.parse_lines`
:rtype: BELGraph
"""
log.info('Loading from path: %s', path)
with codecs.open(os.path.expanduser(path), encoding=encoding) as file:
return from_lines(
lines=file,
manager=manager,
allow_nested=allow_nested,
citation_clearing=citation_clearing,
**kwargs
)
def run(self, edit, encoding, file_name, need_codecs):
self.view.set_name('ConvertToUTF8 Instructions')
self.view.set_scratch(True)
self.view.settings().set("word_wrap", True)
msg = 'File: {0}\nEncoding: {1}\nError: '.format(file_name, encoding)
if need_codecs:
msg = msg + 'Codecs missing\n\n'
branch = self.get_branch(sublime.platform(), sublime.arch())
if branch:
ver = '33' if ST3 else '26'
msg = msg + 'Please install Codecs{0} plugin (https://github.com/seanliang/Codecs{0}/tree/{1}).\n'.format(ver, branch)
else:
import platform
msg = msg + 'Please send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format(
sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding
)
else:
msg = msg + 'Unsupported encoding, see http://docs.python.org/library/codecs.html#standard-encodings\n\nPlease try other tools such as iconv.\n'
self.view.insert(edit, 0, msg)
self.view.set_read_only(True)
self.view.window().focus_view(self.view)
def encode(self, input, errors='strict'):
assert errors == 'strict'
#return codecs.encode(input, self.base_encoding, self.name), len(input)
# The above line could totally be all we needed, relying on the error
# handling to replace the unencodable Unicode characters with our extended
# byte sequences.
#
# However, there seems to be a design bug in Python (probably intentional):
# the error handler for encoding is supposed to return a **Unicode** character,
# that then needs to be encodable itself... Ugh.
#
# So we implement what codecs.encode() should have been doing: which is expect
# error handler to return bytes() to be added to the output.
#
# This seems to have been fixed in Python 3.3. We should try using that and
# use fallback only if that failed.
# https://docs.python.org/3.3/library/codecs.html#codecs.register_error
length = len(input)
out = b''
while input:
try:
part = codecs.encode(input, self.base_encoding)
out += part
input = '' # All converted
except UnicodeEncodeError as e:
# Convert the correct part
out += codecs.encode(input[:e.start], self.base_encoding)
replacement, pos = self.error(e)
out += replacement
input = input[pos:]
return out, length
def pocketsphinx(self):
# NOTE: copy()
# Return a shallow copy of x.
# source: https://docs.python.org/3/library/copy.html
"""Get pocketsphinx speech to text settings."""
return self._data.get('pocketsphinx', {}).copy()
def pocketsphinx(self):
# NOTE: copy()
# Return a shallow copy of x.
# source: https://docs.python.org/3/library/copy.html
"""Get pocketsphinx speech to text settings."""
return self._data.get('pocketsphinx', {}).copy()