def fetch():
retval = {}
content = retrieve_content(__url__)
if __check__ in content:
for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content):
prefix, mask = match.groups()
mask = int(mask)
start_int = addr_to_int(prefix) & make_mask(mask)
end_int = start_int | ((1 << 32 - mask) - 1)
if 0 <= end_int - start_int <= 1024:
address = start_int
while start_int <= address <= end_int:
retval[int_to_addr(address)] = (__info__, __reference__)
address += 1
return retval
python类finditer()的实例源码
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def _GetEndOfTableIfNotAlias(self, query: str, column_name: str) -> bool:
"""Getting the start of the column if it is not an alias column
Args:
query (str): the query to be searched
column_name (str): the name to be searched for
Returns:
bool: 0 if no column could be found or the starting position of the
column
"""
wrong_positions = [name.start() for name in
re.finditer('.{0} as'.format(column_name), query)]
found_positions = []
for space in self._POSSIBLEQUERYSEPERATOR:
found_positions += [name.start() for name in
re.finditer('.{0}{1}'.format(column_name, space),
query)]
position = set(found_positions) - set(wrong_positions)
if position:
return position.pop()
else:
return 0
def register_options(self):
# type: () -> None
"""Parse options from text like this:
Preferences:
[+|-]alignArguments Enable/disable ...
...
[+|-]spacesWithinPatternBinders Enable/disable ...
-alignSingleLineCaseStatements.maxArrowIndent=[1-100] Set Maximum number ...
-indentSpaces=[1-10] Set Number of spaces ...
"""
exeresult = run_executable(self.exe, ['--help'], cache=self.cache)
options = []
text = unistr(exeresult.stdout)
for m in re.finditer(r'^ (\[\+\|-\]|-)([a-z][a-zA-Z.]+)(?:=\[(\d+)-(\d+)\])?', text,
re.MULTILINE):
optionprefix, optionname, start, end = m.groups()
if start is None:
optiontype = 'bool'
configs = [True, False] # type: List[OptionValue]
else:
optiontype = 'int'
configs = list(inclusiverange(int(start), int(end)))
options.append(option_make(optionname, optiontype, configs))
self.styledefinition = styledef_make(options)
def filter_output(self, output, regex):
""" Filter output from a command """
result = {}
result_list = []
if isinstance(output, str):
lines = [output]
else:
lines = output
for line in lines:
iterator = re.finditer(regex, line)
try:
while True:
cur = iterator.next()
result = cur.groupdict()
result['hostname'] = self.host
result_list.append(result)
except StopIteration:
pass
return result_list
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def split_into_sentences(text):
potential_end_pat = re.compile(r"".join([
r"([\w\.'’&\]\)]+[\.\?!])", # A word that ends with punctuation
r"([‘’“”'\"\)\]]*)", # Followed by optional quote/parens/etc
r"(\s+(?![a-z\-–—]))", # Followed by whitespace + non-(lowercase or dash)
]),
re.U
)
dot_iter = re.finditer(potential_end_pat, text)
end_indices = [
(x.start() + len(x.group(1)) + len(x.group(2)))
for x in dot_iter
if is_sentence_ender(x.group(1))
]
spans = zip([None] + end_indices, end_indices + [None])
sentences = [
text[start:end].strip() for start, end in spans
]
return sentences
def create_ad_hoc_field(cls, db_type):
'''
Give an SQL column description such as "Enum8('apple' = 1, 'banana' = 2, 'orange' = 3)"
this method returns a matching enum field.
'''
import re
try:
Enum # exists in Python 3.4+
except NameError:
from enum import Enum # use the enum34 library instead
members = {}
for match in re.finditer("'(\w+)' = (\d+)", db_type):
members[match.group(1)] = int(match.group(2))
enum_cls = Enum('AdHocEnum', members)
field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
return field_class(enum_cls)
def tokenize(token_specification, text):
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])
token_specification.extend((
('NEWLINE', r'\n'), # Line endings
('SKIP', r'.'), # Any other character
))
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
line_num = 1
line_start = 0
for mo in re.finditer(tok_regex, text):
kind = mo.lastgroup
value = filter(lambda x: x is not None, mo.groups())
if kind == 'NEWLINE':
line_start = mo.end()
line_num += 1
elif kind == 'SKIP':
pass
else:
column = mo.start() - line_start
yield Token(kind, value, line_num, column, mo)
def fetch():
retval = {}
content = retrieve_content(__url__)
if __check__ in content:
for match in re.finditer(r"(?m)^([\d.]+),IP used by ([^,]+) C&C", content):
retval[match.group(1)] = ("%s (malware)" % match.group(2).lower(), __reference__)
for row in re.finditer(r"(?s)<tr>(.+?)</tr>", content):
if "<span>100%</span>" in row.group(1):
domain = re.search(r"get_data_domain\('([^']+)", row.group(1))
if domain:
tag = re.search(r">(trojan|spyware|adware)\.([^<]+)", row.group(1))
retval[domain.group(1)] = (("%s (malware)" % tag.group(2)) if tag else "malware", __reference__)
return retval
def remove_job(self, job_guid):
"""
Remove a job given its GUID, including decreasing the job counter of the queue
:param job_guid: the GUID of the job to remove from queue
:return: void
"""
global _os_ver
occurs = [(oc.start(), oc.end()) for oc in
list(re.finditer('%s' % (BITSStateFile.JOB_HEADER_FOOTER_HEX[_os_ver].decode('hex')),
self._original_data))
]
if occurs:
self.set_jobs_counter(self.get_jobs_counter() - 1)
state_off = self._get_job_state_off(job_guid)
new_data_list = list(self._new_data)
job_start_off, job_end_off = BITSStateFile._get_job_limits_by_index_in_between(occurs, state_off)
new_data_list = new_data_list[:job_start_off + 1] + new_data_list[job_end_off + 1:]
self._update_new_data(0, "".join(new_data_list))
self.commit()
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def post(self, request, pk, **kwargs):
instance = CognateClassCitation.objects.get(id=pk)
form = EditCognateClassCitationForm(request.POST, instance=instance)
try:
# validate {ref foo ...}
s = Source.objects.all().filter(deprecated=False)
pattern = re.compile(r'(\{ref +([^\{]+?)(:[^\{]+?)? *\})')
for m in re.finditer(pattern, form.data['comment']):
foundSet = s.filter(shorthand=m.group(2))
if not foundSet.count() == 1:
raise ValidationError('In field “Comment” source shorthand “%(name)s” is unknown.',
params={'name': m.group(2)})
form.save()
except ValidationError as e:
messages.error(
request,
'Sorry, the server had problems updating the cognate citation. %s' % e)
return self.render_to_response({"form": form})
return HttpResponseRedirect(reverse('cognate-class-citation-detail', args=[pk]))
def tokenize(token_specification, text):
Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])
token_specification.extend((
('NEWLINE', r'\n'), # Line endings
('SKIP', r'.'), # Any other character
))
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
line_num = 1
line_start = 0
for mo in re.finditer(tok_regex, text):
kind = mo.lastgroup
value = filter(lambda x: x is not None, mo.groups())
if kind == 'NEWLINE':
line_start = mo.end()
line_num += 1
elif kind == 'SKIP':
pass
else:
column = mo.start() - line_start
yield Token(kind, value, line_num, column, mo)
def after_compile(self):
if self.engine.positional:
self.positiontup = []
match = r'%\(([\w_]+)\)s'
params = re.finditer(match, self.strings[self.statement])
for p in params:
self.positiontup.append(p.group(1))
if self.engine.paramstyle=='qmark':
self.strings[self.statement] = re.sub(match, '?', self.strings[self.statement])
elif self.engine.paramstyle=='format':
self.strings[self.statement] = re.sub(match, '%s', self.strings[self.statement])
elif self.engine.paramstyle=='numeric':
i = [0]
def getnum(x):
i[0] += 1
return str(i[0])
self.strings[self.statement] = re.sub(match, getnum, self.strings[self.statement])
def test_patterns(text, patterns):
"""Given source text and a list of patterns, look for
matches for each pattern within the text and print
them to stdout.
"""
# Look for each pattern in the text and print the results
for pattern, desc in patterns:
print("'{}' ({})\n".format(pattern, desc))
print(" '{}'".format(text))
for match in re.finditer(pattern, text):
s = match.start()
e = match.end()
substr = text[s:e]
n_backslashes = text[:s].count('\\')
prefix = '.' * (s + n_backslashes)
print(" {}'{}'".format(prefix, substr))
print()
return
def pun(self, ctx):
'''
Gives a random pun from the depths of the internet
'''
# Read from page
async with self.session.get('http://www.punoftheday.com/cgi-bin/randompun.pl') as r:
page = await r.text()
# Scrape the raw HTML
r = r'(<div class=\"dropshadow1\">\n<p>).*(</p>\n</div>)'
foundPun = [i for i in finditer(r, page)][0].group()
# Filter out the pun
r = r'(>).*(<)'
filteredPun = [i for i in finditer(r, foundPun)][0].group()
# Boop it out
fullPun = filteredPun[1:-1]
await self.sparcli.say(fullPun)
def steamid(self, ctx, *, gameURL:str):
'''
Gets the information of a game from Steam URL
'''
await self.sparcli.send_typing(ctx.message.channel)
# Grab the game ID from the user input
regexMatches = finditer(r'\d+', gameURL)
regexList = [i for i in regexMatches]
# Parse it as a group
if len(regexList) == 0:
await self.sparcli.say('I was unable to find the ID of that game on the Steam API.')
return
else:
await self.getSteamGameInfo(regexList[0].group())
def messageToEmbed(message):
# Get some default values that'll be in the embed
author = message.author
description = message.content
image = False
# Check to see if any images were added
regexMatch = r'.+(.png)|.+(.jpg)|.+(.jpeg)|.+(.gif)'
if len(message.attachments) > 0:
attachment = message.attachments[0]
matchList = [i for i in finditer(regexMatch, attachment['filename'])]
if len(matchList) > 0:
image = attachment['url']
# Get the time the message was created
createdTime = '.'.join(str(message.timestamp).split('.')[:-1])
# Make and return the embed
return makeEmbed(user=author, description=description, image=image, footer=createdTime)
def match_and_replace(
text = None,
rule = None,
phoneme = None
):
"""
Replace found text from a single rule.
"""
# Find all rule matches.
matches = [(match.start(), match.end()) for \
match in re.finditer(rule, text)]
# Start from behind, so replace in-place.
matches.reverse()
# Convert to characters because strings are immutable.
characters = list(text)
for start, end in matches:
characters[start:end] = phoneme
# Convert back to string.
return "".join(characters)
def unpack_dword(line):
outs = ''
i = 0
for m in re.finditer(r'((?:0x[0-9a-f]{8},?\s*))', line):
l = m.group(0)
l = l.replace(',', '')
l = l.replace(' ', '')
dword = int(l, 16)
unpack = reversed([
(dword & 0xff000000) >> 24,
(dword & 0x00ff0000) >> 16,
(dword & 0x0000ff00) >> 8,
(dword & 0x000000ff)
])
i += 4
for b in unpack:
outs += '%02x' % b
out(dbg("After callback ('%s')" % outs))
return BytesParser.formats_compiled['hexstring'].match(outs)
def extract_videos_from_page(self, page):
ids_in_page = []
titles_in_page = []
for mobj in re.finditer(self._VIDEO_RE, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
video_title = unescapeHTML(mobj.group('title'))
if video_title:
video_title = video_title.strip()
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
titles_in_page[idx] = video_title
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
return zip(ids_in_page, titles_in_page)
def format_to_regex(self, fmt):
"""
Converts a string like
'%(title)s - %(artist)s'
to a regex like
'(?P<title>.+)\ \-\ (?P<artist>.+)'
"""
lastpos = 0
regex = ""
# replace %(..)s with regex group and escape other string parts
for match in re.finditer(r'%\((\w+)\)s', fmt):
regex += re.escape(fmt[lastpos:match.start()])
regex += r'(?P<' + match.group(1) + '>.+)'
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:len(fmt)])
return regex
def extract_object(self, objname):
obj = {}
obj_m = re.search(
(r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
r'\}\s*;',
self.code)
fields = obj_m.group('fields')
# Currently, it only supports function definitions
fields_m = re.finditer(
r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
fields)
for f in fields_m:
argnames = f.group('args').split(',')
obj[f.group('key')] = self.build_function(argnames, f.group('code'))
return obj
def find_links(file):
"""Find all markdown links in a file object.
Yield (lineno, regexmatch) tuples.
"""
# don't yield same link twice
seen = set()
# we need to loop over the file two lines at a time to support
# multi-line (actually two-line) links, so this is kind of a mess
firsts, seconds = itertools.tee(file)
next(seconds) # first line is never second line
# we want 1-based indexing instead of 0-based and one-line links get
# caught from linepair[1], so we need to start at two
for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
lines = linepair[0] + linepair[1]
for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
if match.group(0) not in seen:
seen.add(match.group(0))
yield match, lineno
def split_problematic_endpoints_line(line):
"""
If the line of host contains more than one ":",
for example: 10.99.184.69:900010.37.170.125:9006
this splits the line and return a list of correct endpoints
Args:
``line``: the problemtic line which contains more than one endpoint string.
Returns:
the splitted list of the problematic line which has correct endpoint strings.
"""
colon_parts = line.strip().split(":")
offset = len(colon_parts[-1])
colon_positions = [m.start() for m in re.finditer(':', line)]
start = 0
split_parts = []
for colon_position in colon_positions:
end = colon_position + offset + 1
split_part = line[start:end]
split_parts.append(split_part)
start = end
return split_parts
def get_classes(self, folder, class_suffix, selected = None):
classes = []
for file in glob.glob(folder + "/*.py"):
handle = open(file, "r")
content = handle.read()
handle.close()
module = folder.replace('/', '.').replace('\\', '.') + '.' + os.path.basename(file).replace('.py', '')
regexp = "\sclass\s+([\w\d]+"+class_suffix+")\s*\(([\w\d]*)\)\s*:\s"
for m in re.finditer(regexp, content):
parent_class = m.group(2)
if len(parent_class) == 0 or parent_class == 'object':
continue
class_name = m.group(1)
classes.append(module + '.' + class_name)
return classes
def analyze(line, linenum, lang):
annotations = []
if lang is Language.en_EN or lang is None:
weasel_words = WEASEL_WORDS_EN
elif lang is Language.de_DE:
weasel_words = WEASEL_WORDS_DE
for weasel_pattern in weasel_words:
for match in re.finditer(weasel_pattern, line, flags=re.IGNORECASE):
if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
continue
index = match.start()
annotation = WeaselWord(linenum, line, index, word=match.group(0))
annotations.append(annotation)
return annotations
def analyze(line, linenum, lang):
annotations = []
if lang is Language.en_EN or lang is None:
contractions = CONTRACTIONS_EN
elif lang is Language.de_DE:
contractions = CONTRACTIONS_DE
for pattern in contractions:
for match in re.finditer(pattern, line, flags=re.IGNORECASE):
if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
continue
index = match.start()
replaced_contraction = re.sub(pattern, contractions[pattern], match.group(0), flags=re.IGNORECASE)
annotation = Contraction(linenum, line, index, word=match.group(0), contraction=replaced_contraction)
annotations.append(annotation)
return annotations
def paged_github_json_request(url, headers=None):
response = requests.get(url, headers=headers)
assert response.ok, response.content
results = response.json()
if 'Link' in response.headers:
links = response.headers['Link']
# There are likely better ways to parse/extract the link information
# but here we just find the last page number mentioned in the header
# 'Link' section and then loop over all pages to get the comments
last_match = list(re.finditer('page=[0-9]+', links))[-1]
last_page = int(links[last_match.start():last_match.end()].split('=')[1])
# If there are other pages, just loop over them and get all the
# comments
if last_page > 1:
for page in range(2, last_page + 1):
response = requests.get(url + '?page={0}'.format(page), headers=headers)
assert response.ok, response.content
results += response.json()
return results