def _make_boundary(text=None):
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
token = random.randrange(sys.maxint)
boundary = ('=' * 15) + (_fmt % token) + '=='
if text is None:
return boundary
b = boundary
counter = 0
while True:
cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
if not cre.search(text):
break
b = boundary + '.' + str(counter)
counter += 1
return b
python类escape()的实例源码
def pickline(file, key, casefold = 1):
try:
f = open(file, 'r')
except IOError:
return None
pat = re.escape(key) + ':'
prog = re.compile(pat, casefold and re.IGNORECASE)
while 1:
line = f.readline()
if not line: break
if prog.match(line):
text = line[len(key)+1:]
while 1:
line = f.readline()
if not line or not line[0].isspace():
break
text = text + line
return text.strip()
return None
def _findLib_gcc(name):
expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name)
fdout, ccout = tempfile.mkstemp()
os.close(fdout)
cmd = 'if type gcc >/dev/null 2>&1; then CC=gcc; elif type cc >/dev/null 2>&1; then CC=cc;else exit 10; fi;' \
'$CC -Wl,-t -o ' + ccout + ' 2>&1 -l' + name
try:
f = os.popen(cmd)
try:
trace = f.read()
finally:
rv = f.close()
finally:
try:
os.unlink(ccout)
except OSError, e:
if e.errno != errno.ENOENT:
raise
if rv == 10:
raise OSError, 'gcc or cc command not found'
res = re.search(expr, trace)
if not res:
return None
return res.group(0)
def replace_citations_strings_with_ids(text, string_to_id):
"""
Convert citations to their IDs for pandoc.
`text` is markdown source text
`string_to_id` is a dictionary like:
@10.7287/peerj.preprints.3100v1 ? 11cb5HXoY
"""
for old, new in string_to_id.items():
text = re.sub(
pattern=re.escape(old) + r'(?![\w:.#$%&\-+?<>~/]*[a-zA-Z0-9/])',
repl='@' + new,
string=text,
)
return text
def setup(self, config):
"""
Deterine max size to unpack and which directories to ignore.
:param config: Configuration object.
:type config: ``dict``
"""
self.max_size = config.get(helper.MAX_FILE_SIZE, 128) * 1024 * 1024
self.config = config
ignore = {}
path = os.path.join(
config[helper.CODE_ROOT], 'utils', 'diskimage_ignore.txt')
with open(path) as inp:
for line in inp:
if len(line.strip()) == 0 or line.startswith('#'):
continue
ignore[re.escape(line.strip().lower())] = True
self.ignore = re.compile('|'.join(list(ignore.keys())), re.I)
def prepare(rep, onlyAtEnds=False, multiOccur=True):
'''Compiles and returns a regex matching the input list of strings to replace
Note: returns two values wrapped as one; can feed tuple directly into apply
'''
if type(rep) == list:
rep = {key: '' for key in rep}
rep = dict((re.escape(k), v) for k, v in rep.items())
if onlyAtEnds:
if multiOccur:
expr = str.format("^[{0}]+|[{0}]+$", ''.join(rep.keys()))
else:
expr = str.format("{0}|{1}",
"|".join(['^%s' % key for key in rep.keys()]),
"|".join(['%s$' % key for key in rep.keys()])
)
else:
expr = "|".join(rep.keys())
pattern = re.compile(expr)
return (pattern, rep)
def visit_textclause(self, textclause, **kw):
def do_bindparam(m):
name = m.group(1)
if name in textclause._bindparams:
return self.process(textclause._bindparams[name], **kw)
else:
return self.bindparam_string(name, **kw)
if not self.stack:
self.isplaintext = True
# un-escape any \:params
return BIND_PARAMS_ESC.sub(
lambda m: m.group(1),
BIND_PARAMS.sub(
do_bindparam,
self.post_process_text(textclause.text))
)
def list(self):
"""Lists all sessions in the store.
.. versionadded:: 0.6
"""
before, after = self.filename_template.split('%s', 1)
filename_re = re.compile(r'%s(.{5,})%s$' % (re.escape(before),
re.escape(after)))
result = []
for filename in os.listdir(self.path):
#: this is a session that is still being saved.
if filename.endswith(_fs_transaction_suffix):
continue
match = filename_re.match(filename)
if match is not None:
result.append(match.group(1))
return result
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def import_string(import_name, silent=False):
"""Imports an object based on a string. This is useful if you want to
use import paths as endpoints or something similar. An import path can
be specified either in dotted notation (``xml.sax.saxutils.escape``)
or with a colon as object delimiter (``xml.sax.saxutils:escape``).
If the `silent` is True the return value will be `None` if the import
fails.
:return: imported object
"""
try:
if ':' in import_name:
module, obj = import_name.split(':', 1)
elif '.' in import_name:
items = import_name.split('.')
module = '.'.join(items[:-1])
obj = items[-1]
else:
return __import__(import_name)
return getattr(__import__(module, None, None, [obj]), obj)
except (ImportError, AttributeError):
if not silent:
raise
def clean_downloaded_metadata(self, mi):
docase = (
mi.language == 'zhn'
)
if mi.title and docase:
# Remove series information from title
m = re.search(r'\S+\s+(\(.+?\s+Book\s+\d+\))$', mi.title)
if m is not None:
mi.title = mi.title.replace(m.group(1), '').strip()
mi.title = fixcase(mi.title)
mi.authors = fixauthors(mi.authors)
if mi.tags and docase:
mi.tags = list(map(fixcase, mi.tags))
mi.isbn = check_isbn(mi.isbn)
if mi.series and docase:
mi.series = fixcase(mi.series)
if mi.title and mi.series:
for pat in (r':\s*Book\s+\d+\s+of\s+%s$', r'\(%s\)$', r':\s*%s\s+Book\s+\d+$'):
pat = pat % re.escape(mi.series)
q = re.sub(pat, '', mi.title, flags=re.I).strip()
if q and q != mi.title:
mi.title = q
break
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
def get(self):
keyword = self.get_query_argument('keyword', '')
if not keyword:
self.custom_error('???????')
pattern = u'.*{}.*'.format(re.escape(keyword))
limit = 20
page = int(self.get_query_argument('page', default=1))
page = 1 if page <= 0 else page
# ????title????
# ?????? elstiasearch ????
cursor = self.db.topic.find({
'title': {'$regex': pattern, '$options': 'i'}
})
total = yield cursor.count()
cursor.sort([('time', -1)]).limit(
limit).skip((page - 1) * limit)
topics = yield cursor.to_list(length=limit)
self.render('search/template/search.html',
topics=topics, total=total, page=page, keyword=keyword)
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
def list(self):
"""Lists all sessions in the store.
.. versionadded:: 0.6
"""
before, after = self.filename_template.split('%s', 1)
filename_re = re.compile(r'%s(.{5,})%s$' % (re.escape(before),
re.escape(after)))
result = []
for filename in os.listdir(self.path):
#: this is a session that is still being saved.
if filename.endswith(_fs_transaction_suffix):
continue
match = filename_re.match(filename)
if match is not None:
result.append(match.group(1))
return result
def import_string(import_name, silent=False):
"""Imports an object based on a string. This is useful if you want to
use import paths as endpoints or something similar. An import path can
be specified either in dotted notation (``xml.sax.saxutils.escape``)
or with a colon as object delimiter (``xml.sax.saxutils:escape``).
If the `silent` is True the return value will be `None` if the import
fails.
:return: imported object
"""
try:
if ':' in import_name:
module, obj = import_name.split(':', 1)
elif '.' in import_name:
items = import_name.split('.')
module = '.'.join(items[:-1])
obj = items[-1]
else:
return __import__(import_name)
return getattr(__import__(module, None, None, [obj]), obj)
except (ImportError, AttributeError):
if not silent:
raise
def render(self):
# make local variables for the loop below (faster)
local_dict = self.key_dict
local_template = self.template
local_varlist = self.template.varlist
local_odel = self.odel
local_cdel = self.cdel
local_htmlent = self.html_entities
if local_htmlent:
from xml.sax.saxutils import escape #from Python std lib
for key in local_dict:
if key in local_varlist:
value = local_dict[key]
replace_string = local_odel + key + local_cdel
if local_htmlent:
value = escape(value) #xml.sax.saxutils function
local_template = local_template.replace(replace_string, value)
return local_template
##TODO : multiple file render method?
def set_status(self, status_code, reason=None):
"""Sets the status code for our response.
:arg int status_code: Response status code. If ``reason`` is ``None``,
it must be present in `httplib.responses <http.client.responses>`.
:arg string reason: Human-readable reason phrase describing the status
code. If ``None``, it will be filled in from
`httplib.responses <http.client.responses>`.
"""
self._status_code = status_code
if reason is not None:
self._reason = escape.native_str(reason)
else:
try:
self._reason = httputil.responses[status_code]
except KeyError:
raise ValueError("unknown status code %d", status_code)
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
def __init__(self, portal_names, app_matchers):
if not isinstance(app_matchers, list) and not isinstance(app_matchers, tuple):
assert isinstance(app_matchers, AppMatcher)
app_matchers = (app_matchers,)
for matcher in app_matchers:
assert isinstance(matcher, AppMatcher)
self.__app_matchers = app_matchers
self.__portal_matchers = []
if not isinstance(portal_names, list) and not isinstance(portal_names, tuple):
portal_names = (portal_names,)
for name in portal_names:
if isinstance(name, str):
name = re.compile(re.escape(name))
assert hasattr(name, 'match') and callable(name.match)
self.__portal_matchers.append(name)
def scrapeContacts(self):
if (not self.srv):
return
self.getUIDs()
if (not self.uids):
return None
contacts = []
for uid in self.uids:
resp, data = self.srv.fetch(uid, "(RFC822)")
for response_part in data:
if isinstance(response_part, tuple):
msg = email.message_from_string(response_part[1])
fromaddr = msg['from']
if (fromaddr):
sender = msg['from'].split()[-1]
address = re.sub(r'[<>]','',sender)
# Ignore any occurences of own email address and add to list
if not re.search(r'' + re.escape(self.user),address) and not address in contacts:
contacts.append(address)
print "IDENTIFED new contact [%s]" % (address)
return contacts
def scrapeContacts(self):
if (not self.srv):
return
self.getMessages()
contacts = []
for (server_msg, body, octets) in self.msg_list:
mail = email.message_from_string('\n'.join(body))
for part in mail.walk():
fromaddr = part['from']
if (fromaddr):
sender = part['from'].split()[-1]
address = re.sub(r'[<>]','',sender)
# Ignore any occurences of own email address and add to list
if not re.search(r'' + re.escape(self.user),address) and not address in contacts:
contacts.append(address)
print "IDENTIFED new contact [%s]" % (address)
return contacts
def tiny_tokenize(text, stem=False, stop_words=[]):
words = []
for token in wordpunct_tokenize(re.sub('[%s]' % re.escape(string.punctuation), ' ', \
text.decode(encoding='UTF-8', errors='ignore'))):
if not token.isdigit() and not token in stop_words:
if stem:
try:
w = EnglishStemmer().stem(token)
except Exception as e:
w = token
else:
w = token
words.append(w)
return words
# return [EnglishStemmer().stem(token) if stem else token for token in wordpunct_tokenize(
# re.sub('[%s]' % re.escape(string.punctuation), ' ', text.decode(encoding='UTF-8', errors='ignore'))) if
# not token.isdigit() and not token in stop_words]
def strip_tags(text, strip_punctuation=False):
# Return only the words from content, stripping punctuation and HTML.
soup = BeautifulSoup(text)
if strip_punctuation:
punctuation = re.compile('[{}]+'.format(re.escape(p)))
words_only = punctuation.sub('', soup.get_text())
return words_only
words_only = soup.get_text()
return words_only
def _search_for_query(self, query):
if query in self._search_pattern_cache:
return self._search_pattern_cache[query]
# Build pattern: include all characters
pattern = []
for c in query:
# pattern.append('[^{0}]*{0}'.format(re.escape(c)))
pattern.append('.*?{0}'.format(re.escape(c)))
pattern = ''.join(pattern)
search = re.compile(pattern, re.IGNORECASE).search
self._search_pattern_cache[query] = search
return search
def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
def _glob_to_re(self, pattern):
"""Translate a shell-like glob pattern to a regular expression.
Return a string containing the regex. Differs from
'fnmatch.translate()' in that '*' does not match "special characters"
(which are platform-specific).
"""
pattern_re = fnmatch.translate(pattern)
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
# character except the special characters (currently: just os.sep).
sep = os.sep
if os.sep == '\\':
# we're using a regex to manipulate a regex, so we need
# to escape the backslash twice
sep = r'\\\\'
escaped = r'\1[^%s]' % sep
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
def prune_file_list(self):
build = self.get_finalized_command('build')
base_dir = self.distribution.get_fullname()
self.filelist.prune(build.build_base)
self.filelist.prune(base_dir)
sep = re.escape(os.sep)
self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep,
is_regex=1)
def _setoption(arg):
import re
parts = arg.split(':')
if len(parts) > 5:
raise _OptionError("too many fields (max 5): %r" % (arg,))
while len(parts) < 5:
parts.append('')
action, message, category, module, lineno = [s.strip()
for s in parts]
action = _getaction(action)
message = re.escape(message)
category = _getcategory(category)
module = re.escape(module)
if module:
module = module + '$'
if lineno:
try:
lineno = int(lineno)
if lineno < 0:
raise ValueError
except (ValueError, OverflowError):
raise _OptionError("invalid lineno %r" % (lineno,))
else:
lineno = 0
filterwarnings(action, message, category, module, lineno)
# Helper for _setoption()