def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
python类purge()的实例源码
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def main():
times = {}
html = urllib2.urlopen('http://example.webscraping.com/view/United-Kingdom-239').read()
NUM_ITERATIONS = 1000 # number of times to test each scraper
for name, scraper in ('Regular expressions', regex_scraper), ('Beautiful Soup', beautiful_soup_scraper), ('Lxml', lxml_scraper):
times[name] = []
# record start time of scrape
start = time.time()
for i in range(NUM_ITERATIONS):
if scraper == regex_scraper:
# the regular expression module will cache results
# so need to purge this cache for meaningful timings
re.purge()
result = scraper(html)
# check scraped result is as expected
assert(result['area'] == '244,820 square kilometres')
times[name].append(time.time() - start)
# record end time of scrape and output the total
end = time.time()
print '{}: {:.2f} seconds'.format(name, end - start)
writer = csv.writer(open('times.csv', 'w'))
header = sorted(times.keys())
writer.writerow(header)
for row in zip(*[times[scraper] for scraper in header]):
writer.writerow(row)
def bench_regex_compile(loops, regexes):
range_it = xrange(loops)
t0 = perf.perf_counter()
for _ in range_it:
for regex, flags in regexes:
re.purge()
# ignore result (compiled regex)
re.compile(regex, flags)
return perf.perf_counter() - t0
def purge():
re.purge()
def asciiconvert(user_input, enable_spechar=True):
"""
Converts user-input string into ASCII.
Returns a string with ASCII chars only
"""
SPECHAR_RE = r'[\s.?!:;§*^¨%$\|\[\]\(\)\{\}+=`~\'\"\\&]*'
accented_chars = [(r'[àäâ]*', 'a'),
(r'[éèëê]*', 'e'),
(r'[ìîï]*', 'i'),
(r'[òöô]*', 'o'),
(r'[ùûü]*', 'u'),
(r'[ÿ??]*', 'y')]
if not enable_spechar:
spechar = re.compile(SPECHAR_RE)
ans = spechar.findall(user_input)
if ans:
# Replacing all occurrences by an empty string
user_input = spechar.sub('', user_input, count=0)
re.purge()
for pattern, repl in accented_chars:
accented = re.compile(pattern)
# Replacing all occurrences by a non-accented char
user_input = accented.sub(repl, user_input, count=0)
re.purge()
return user_input
def __del__(self):
re.purge()
def dash_R_cleanup(fs, ps, pic, zdc, abcs):
import gc, copy_reg
import _strptime, linecache
dircache = test_support.import_module('dircache', deprecated=True)
import urlparse, urllib, urllib2, mimetypes, doctest
import struct, filecmp
from distutils.dir_util import _path_created
# Clear the warnings registry, so they can be displayed again
for mod in sys.modules.values():
if hasattr(mod, '__warningregistry__'):
del mod.__warningregistry__
# Restore some original values.
warnings.filters[:] = fs
copy_reg.dispatch_table.clear()
copy_reg.dispatch_table.update(ps)
sys.path_importer_cache.clear()
sys.path_importer_cache.update(pic)
try:
import zipimport
except ImportError:
pass # Run unmodified on platforms without zipimport support
else:
zipimport._zip_directory_cache.clear()
zipimport._zip_directory_cache.update(zdc)
# clear type cache
sys._clear_type_cache()
# Clear ABC registries, restoring previously saved ABC registries.
for abc, registry in abcs.items():
abc._abc_registry = registry.copy()
abc._abc_cache.clear()
abc._abc_negative_cache.clear()
# Clear assorted module caches.
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
urlparse.clear_cache()
urllib.urlcleanup()
urllib2.install_opener(None)
dircache.reset()
linecache.clearcache()
mimetypes._default_mime_types()
filecmp._cache.clear()
struct._clearcache()
doctest.master = None
try:
import ctypes
except ImportError:
# Don't worry about resetting the cache if ctypes is not supported
pass
else:
ctypes._reset_cache()
# Collect cyclic trash.
gc.collect()
def dash_R_cleanup(fs, ps, pic, zdc, abcs):
import gc, copy_reg
import _strptime, linecache
dircache = test_support.import_module('dircache', deprecated=True)
import urlparse, urllib, urllib2, mimetypes, doctest
import struct, filecmp
from distutils.dir_util import _path_created
# Clear the warnings registry, so they can be displayed again
for mod in sys.modules.values():
if hasattr(mod, '__warningregistry__'):
del mod.__warningregistry__
# Restore some original values.
warnings.filters[:] = fs
copy_reg.dispatch_table.clear()
copy_reg.dispatch_table.update(ps)
sys.path_importer_cache.clear()
sys.path_importer_cache.update(pic)
try:
import zipimport
except ImportError:
pass # Run unmodified on platforms without zipimport support
else:
zipimport._zip_directory_cache.clear()
zipimport._zip_directory_cache.update(zdc)
# clear type cache
sys._clear_type_cache()
# Clear ABC registries, restoring previously saved ABC registries.
for abc, registry in abcs.items():
abc._abc_registry = registry.copy()
abc._abc_cache.clear()
abc._abc_negative_cache.clear()
# Clear assorted module caches.
_path_created.clear()
re.purge()
_strptime._regex_cache.clear()
urlparse.clear_cache()
urllib.urlcleanup()
urllib2.install_opener(None)
dircache.reset()
linecache.clearcache()
mimetypes._default_mime_types()
filecmp._cache.clear()
struct._clearcache()
doctest.master = None
try:
import ctypes
except ImportError:
# Don't worry about resetting the cache if ctypes is not supported
pass
else:
ctypes._reset_cache()
# Collect cyclic trash.
gc.collect()