def _convert_simple_pattern(self, regex_string): # EXPERIMENTAL
"""This is EXPERIMENTAL: Consider option to recognize "simple" patterns and
automatically put them in the trie, otherwise use Python matcher.
Convet a simple pattern to a form that can be inserted into a
`RegexTrieDict`, if possible. Returns `None` if the pattern is too
complicated. Simple pattern is essentially defined by what this routine
is implemented to do (and a `RegexTrieDict` can/should do)"""
return None
# TODO the immediate below seems to work for some very simple patterns.
simple_regex_patt = re.compile(r"^[a-zA-Z0-9_\-]+$", re.VERBOSE|re.UNICODE)
match = simple_regex_patt.match(regex_string)
if match is None: return None
return regex_string # No processing needed for very simple.
# SCRATCH BELOW
# Note negative lookbehind assertion (?<!\\) for escape before
# the strings which start Python regex special chars.
non_simple_regex_contains = \
r"""(
( (?<!\\)[.^$*+?{[|(] )+ # Start of special char.
| ( [\\][ABdDsSwWZ] )+ # Python regex escape.
))"""
compiled_non_simple_regex_contains = re.compile(
non_simple_regex_contains, re.VERBOSE|re.UNICODE)
def is_simple_pattern(regex_string):
# Could be single-char in brackets!
# https://docs.python.org/2.0/ref/strings.html
match_object = compiled_non_simple_regex_contains.search(regex_string)
#matched_string = regex_string[match_object.start():match_object.end()]
#print(" substring", matched_string)
return not bool(match_object)
#if is_simple_pattern(regex_string):
# print("simple pattern", regex_string)
#else:
# print("non-simple pattern", regex_string)
评论列表
文章目录