def getDetailList(self,content):
s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
pattern =re.compile(s2 , re.S
)
result = re.findall(pattern, content)
with open('file.txt','w',encoding='gbk') as f:
f.write(content)
if not result:
print('???????..............')
threadsList=[]
for item in result:
t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
threadsList.append(t)
t.start()
for threadid in threadsList:
threadid.join()
python类S的实例源码
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def getDetailList(self,content):
s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
pattern =re.compile(s2 , re.S
)
result = re.findall(pattern, content)
with open('file.txt','w',encoding='gbk') as f:
f.write(content)
if not result:
print('???????..............')
threadsList=[]
for item in result:
t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
threadsList.append(t)
t.start()
for threadid in threadsList:
threadid.join()
def getDetailList(self,content):
pattern =re.compile(r'<h2><a target="_blank" href="(.*?)"'\
+r'title="(.*?)">', re.S
)
#uf-8??????
file = open('file.txt', 'w',encoding='gbk')
file.write(content)
file.close()
result = re.findall(pattern, content)
if not result:
print('???????..............')
for item in result:
self.getDetailPic(item)
def get_module_source_metadata(cls, module_source, full_line_map=False):
source_map = re.search(
r"__M_BEGIN_METADATA(.+?)__M_END_METADATA",
module_source, re.S).group(1)
source_map = compat.json.loads(source_map)
source_map['line_map'] = dict(
(int(k), int(v))
for k, v in source_map['line_map'].items())
if full_line_map:
f_line_map = source_map['full_line_map'] = []
line_map = source_map['line_map']
curr_templ_line = 1
for mod_line in range(1, max(line_map)):
if mod_line in line_map:
curr_templ_line = line_map[mod_line]
f_line_map.append(curr_templ_line)
return source_map
def __init__(self, code, **exception_kwargs):
self.codeargs = []
self.args = []
self.declared_identifiers = set()
self.undeclared_identifiers = set()
if isinstance(code, compat.string_types):
if re.match(r"\S", code) and not re.match(r",\s*$", code):
# if theres text and no trailing comma, insure its parsed
# as a tuple by adding a trailing comma
code += ","
expr = pyparser.parse(code, "exec", **exception_kwargs)
else:
expr = code
f = pyparser.FindTuple(self, PythonCode, **exception_kwargs)
f.visit(expr)
def __init__(self, code, **exception_kwargs):
m = re.match(r'^(\w+)(?:\s+(.*?))?:\s*(#|$)', code.strip(), re.S)
if not m:
raise exceptions.CompileException(
"Fragment '%s' is not a partial control statement" %
code, **exception_kwargs)
if m.group(3):
code = code[:m.start(3)]
(keyword, expr) = m.group(1, 2)
if keyword in ['for', 'if', 'while']:
code = code + "pass"
elif keyword == 'try':
code = code + "pass\nexcept:pass"
elif keyword == 'elif' or keyword == 'else':
code = "if False:pass\n" + code + "pass"
elif keyword == 'except':
code = "try:pass\n" + code + "pass"
elif keyword == 'with':
code = code + "pass"
else:
raise exceptions.CompileException(
"Unsupported control keyword: '%s'" %
keyword, **exception_kwargs)
super(PythonFragment, self).__init__(code, **exception_kwargs)
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def parse_page(self, response):
pattern = re.compile('gp.insertPrx\((.*?)\)', re.S)
items = re.findall(pattern, response.body)
for item in items:
data = json.loads(item)
#?????????
port = data.get('PROXY_PORT')
port = str(int(port, 16))
proxy = Proxy()
proxy.set_value(
ip = data.get('PROXY_IP'),
port = port,
country = data.get('PROXY_COUNTRY'),
anonymity = data.get('PROXY_TYPE'),
source = self.name,
)
self.add_proxy(proxy = proxy)
def parse_page(self, response):
pattern = re.compile(
'<tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>('
'.*?)</td><td>(.*?)</td></tr>',
re.S)
items = re.findall(pattern, response.body)
if items is not None:
for item in items:
proxy = Proxy()
proxy.set_value(
ip = item[0],
port = item[1],
country = item[3],
anonymity = item[4],
source = self.name,
)
self.add_proxy(proxy)
def parse_page(self, response):
pattern = re.compile('<tr><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td></tr>',
re.S)
items = re.findall(pattern, response.body)
for i, item in enumerate(items):
if i >= 1:
proxy = Proxy()
proxy.set_value(
ip = item[0],
port = item[1],
country = item[2],
anonymity = item[3],
source = self.name
)
self.add_proxy(proxy = proxy)
def parse_page(self, response):
pattern = re.compile(
'<tr>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>('
'.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?<td.*?>(.*?)</td>\s.*?</tr>',
re.S)
items = re.findall(pattern, response.body)
for item in items:
proxy = Proxy()
proxy.set_value(
ip = item[0],
port = item[1],
country = item[4],
anonymity = item[2],
source = self.name,
)
self.add_proxy(proxy)
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def update_consts(filename, constname, content):
with open(filename) as f:
data = f.read()
# Line to start/end inserting
re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S)
m = re_match.search(data)
if not m:
raise ValueError('Could not find existing definition for %s' %
(constname,))
new_block = format_lines(constname, content)
data = data[:m.start()] + new_block + data[m.end():]
with open(filename, 'w') as f:
f.write(data)
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def getMovieSaveFile(self, moviename):
if re.search('[Ss][0-9]+[Ee][0-9]+', moviename) is not None:
tvseries = compile('(.*\w)[\s\.|-]+[S|s][0-9]+[E|e][0-9]+[\s\.|-].*?\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]')
tvseriesalt = compile('^[S|s][0-9]+[E|e][0-9]+[\s\.\-](.*\w)\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]')
if tvseries.match(moviename) is not None:
return tvseries.match(moviename).groups()[0].replace(" ","_").replace(".","_")
elif tvseriesalt.match(moviename) is not None:
return tvseriesalt.match(moviename).groups()[0].replace(" ","_").replace(".","_")
else:
return None
else:
movietitle = compile('(.*\w)\.[ts|avi|mkv|divx|f4v|flv|img|iso|m2ts|m4v|mov|mp4|mpeg|mpg|mts|vob|wmv]')
if movietitle.match(moviename) is not None:
return movietitle.match(moviename).groups()[0].replace(" ","_").replace(".","_")
else:
return None
def getInfos(self, data, id, type, filename):
if type == "movie":
infos = re.findall('"genres":\[(.*?)\].*?"overview":"(.*?)"', data, re.S)
if infos:
(genres, desc) = infos[0]
genre = re.findall('"name":"(.*?)"', genres, re.S)
genre = str(genre).replace('\'','').replace('[','').replace(']','')
self.writeTofile(decodeHtml(desc), filename)
elif type == "serie":
infos = re.findall('<Overview>(.*?)</Overview>', data, re.S)
if infos:
desc = infos[0]
self.writeTofile(decodeHtml(desc), filename)
self.close(False)
def split_arg_string(string):
"""Given an argument string this attempts to split it into small parts."""
rv = []
for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
r'|\S+)\s*', string, re.S):
arg = match.group().strip()
if arg[:1] == arg[-1:] and arg[:1] in '"\'':
arg = arg[1:-1].encode('ascii', 'backslashreplace') \
.decode('unicode-escape')
try:
arg = type(string)(arg)
except UnicodeError:
pass
rv.append(arg)
return rv
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def __init__(self, text = "", engine=None, bindparams=None, typemap=None, escape=True):
self.parens = False
self._engine = engine
self.id = id(self)
self.bindparams = {}
self.typemap = typemap
if typemap is not None:
for key in typemap.keys():
typemap[key] = engine.type_descriptor(typemap[key])
def repl(m):
self.bindparams[m.group(1)] = bindparam(m.group(1))
return self.engine.bindtemplate % m.group(1)
if escape:
self.text = re.compile(r':([\w_]+)', re.S).sub(repl, text)
else:
self.text = text
if bindparams is not None:
for b in bindparams:
self.bindparams[b.key] = b
def fetch_xml(url):
with request.urlopen(url) as f:
print('Status:', f.status, f.reason)
for k, v in f.getheaders():
print('%s: %s' % (k, v))
html = f.read().decode('utf-8')
pattern_one = re.compile(r'<yweather:location.*?city="(.*?)".*?country="(.*?)".*?region="(.*?)".*?/>', re.S)
pattern_two = re.compile(r'<yweather:forecast.*?date="(.*?)".*?day="(.*?)".*?high="(.*?)".*?low="(.*?)".*?text="(.*?)".*?/>', re.S)
location_info = re.findall(pattern_one, html)
items = re.findall(pattern_two, html)
weather = {}
weather['city'] = location_info[0][0]
weather['country'] = location_info[0][1]
weather['region'] = location_info[0][2]
for item in items:
weather[item[1]] = {}
weather[item[1]]['data'] = item[0]
weather[item[1]]['high'] = item[2]
weather[item[1]]['low'] = item[3]
weather[item[1]]['text'] = item[4]
return weather
def unhighlight(text):
hits = re.findall(
'<div class="highlight"><pre><span></span>(?P<text>.+?)</pre></div>', text, re.M | re.S)
for h in hits:
# print 'h',h.strip()
if h.strip():
if h.find('<span') == -1: # it's note
# print 'no span'
h_and_context = re.findall(
r'<div class="highlight"><pre><span></span>' + re.escape(h) + '</pre></div>', text, re.M | re.S)
if h_and_context:
h_and_context = h_and_context[0]
h_and_context_unhigh = h_and_context.replace(
'<div class="highlight">', '').replace('</pre></div>', '</pre>')
text = text.replace(h_and_context, h_and_context_unhigh)
else:
h_and_context = re.findall(
r'<div class="highlight"><pre><span></span>' + re.escape(h) + '</pre></div>', text, re.M | re.S)
# print h_and_context
return text
def fetch(remote_file, local_tmp_file, local_file, diff_tool):
cmd = 'scp %s %s' % (remote_file, local_tmp_file)
print(cmd)
os.system(cmd)
with open(local_tmp_file) as f:
txt = f.read()
ntxt = re.sub('Table of Contents.+markdown-toc.go\)', '[tableofcontent]', txt, flags=re.S)
with open(local_tmp_file, 'w') as f:
f.write(ntxt)
cmd = 'diff ' + local_tmp_file + ' ' + local_file
o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if o.stdout.read().strip(): # if diff empty, file are the same
cmd = diff_tool + ' ' + local_tmp_file + ' ' + local_file
o = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
o.communicate()
o.wait()
else:
print('The files are the same')
# main
def _do_code_blocks(self, text):
"""Process Markdown `<pre><code>` blocks."""
code_block_re = re.compile(r'''
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?:
(?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
# Lookahead to make sure this block isn't already in a code block.
# Needed when syntax highlighting is being used.
(?![^<]*\</code\>)
''' % (self.tab_width, self.tab_width),
re.M | re.X)
return code_block_re.sub(self._code_block_sub, text)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# TODO store the loader or env on the tool factory for faster partial builds
# (this would need to cope with new files)
self._loader = FrontMatterFileSystemLoader(self._config.jinja_directories)
self._env = Environment(loader=self._loader)
self._env.filters['S'] = self.static_file_filter
self._file_ctxs = {}
self._initialise_templates()
self._ctx = self._config.context
self._library = self._config.find_library()
self._library_files = walk(self._library) if self._library else []
self._extra_files = []
def compile_rules(environment):
"""Compiles all the rules from the environment into a list of rules."""
e = re.escape
rules = [
(len(environment.comment_start_string), 'comment',
e(environment.comment_start_string)),
(len(environment.block_start_string), 'block',
e(environment.block_start_string)),
(len(environment.variable_start_string), 'variable',
e(environment.variable_start_string))
]
if environment.line_statement_prefix is not None:
rules.append((len(environment.line_statement_prefix), 'linestatement',
r'^[ \t\v]*' + e(environment.line_statement_prefix)))
if environment.line_comment_prefix is not None:
rules.append((len(environment.line_comment_prefix), 'linecomment',
r'(?:^|(?<=\S))[^\S\r\n]*' +
e(environment.line_comment_prefix)))
return [x[1:] for x in sorted(rules, reverse=True)]
def next(self):
# log('video ' + str(episodes[self.current][CONSTANTS.D_SOURCE]) + ',' + str(episodes[self.current][CONSTANTS.D_SHOW]))
# addVideo('plugin://plugin.video.gdrive?mode=playvideo&title='+episodes[video][0],
# { 'title' : str(episodes[video][CONSTANTS.D_SHOW]) + ' - S' + str(episodes[video][CONSTANTS.D_SEASON]) + 'xE' + str(episodes[video][CONSTANTS.D_EPISODE]) + ' ' + str(episodes[video][CONSTANTS.D_PART]) , 'plot' : episodes[video][CONSTANTS.D_SHOW] },
# img='None')
# play video
# if self.isExit == 0:
self.play('plugin://plugin.video.gdrive-testing/?mode=video&instance='+str(self.service.instanceName)+'&title='+self.content[self.current][0])
#self.play('plugin://plugin.video.gdrive/?mode=video&instance='+str(self.service.instanceName)+'&title='+self.content[self.current][0])
# self.play(self.content[self.current][0])
# self.tvScheduler.setVideoWatched(self.worksheet, self.content[self.current][0])
# self.tvScheduler.createRow(self.worksheet, '','','','')
if self.current < len(self.content):
self.current += 1
else:
self.current = 0