def register_command(self, commands, callback, allow_edited=False):
"""Registers commands handler
Args:
commands(list|tuple): list of commands to register
callback(function): callable object to execute
allow_edited(Optional[bool]): pass edited messages
Raises:
ValueError: if one of commands in ``commands`` was already registered
"""
for command in commands:
self._register_command(command)
@utils.log(logger, print_ret=False)
def process_update(bot, update):
lang = utils.get_lang(self._storage, update.effective_user)
callback(update.effective_message,
update.effective_message.text.split(' ')[1:], lang)
self._dispatcher.add_handler(CommandHandler(commands, process_update,
allow_edited=allow_edited))
python类log()的实例源码
def success_parse(self, response):
proxy = response.meta.get('proxy_info')
table = response.meta.get('table')
self.save_page(proxy.ip, response.body)
self.log('success_parse speed:%s meta:%s' % (time.time() - response.meta.get('cur_time'), response.meta))
proxy.vali_count += 1
proxy.speed = time.time() - response.meta.get('cur_time')
if self.success_mark in response.text or self.success_mark is '':
if table == self.name:
if proxy.speed > self.timeout:
self.sql.del_proxy_with_id(table, proxy.id)
else:
self.sql.update_proxy(table, proxy)
else:
if proxy.speed < self.timeout:
self.sql.insert_proxy(table_name = self.name, proxy = proxy)
else:
if table == self.name:
self.sql.del_proxy_with_id(table_name = table, id = proxy.id)
self.sql.commit()
def delete_proxy(self, proxy):
try:
rets = proxy.split(':')
ip = rets[1]
ip = ip[2:]
for item in self.proxys:
if item.get('ip') == ip:
self.proxys.remove(item)
break
if len(self.proxys) < 3:
self.update_proxy()
utils.log('--------------delete ip:%s-----------' % ip)
r = requests.get(url = '%s/delete?name=%s&ip=%s' % (self.address, 'douban', ip))
return r.text
except:
return False
def __init__(self, *a, **kw):
super(Movieurls, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
utils.make_dir(self.log_dir)
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'movie.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
self.init()
def get_all_category(self, response):
self.write_file('%s/category.html' % self.log_dir, response.body)
tags = response.xpath('//table/tbody/tr/td/a/@href').extract()
for tag in tags:
res = tag.split('/')
res = res[len(res) - 1]
utils.log('tag:%s' % tag)
url = response.urljoin(tag)
yield Request(
url = url,
headers = self.headers,
dont_filter = True,
meta = {
'tag': res,
'download_timeout': 20,
# 'is_proxy': False,
},
callback = self.get_page_count,
errback = self.error_parse
)
def __init__(self, *a, **kw):
super(Bookurls, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
utils.make_dir(self.log_dir)
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'book.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
self.init()
def get_all_category(self, response):
self.write_file('%s/category.html' % self.log_dir, response.body)
tags = response.xpath('//table/tbody/tr/td/a/@href').extract()
for tag in tags:
res = tag.split('/')
tag = res[len(res) - 1]
utils.log('tag:%s' % tag)
url = response.urljoin(tag)
yield Request(
url = url,
headers = self.headers,
dont_filter = True,
meta = {
'tag': tag,
'download_timeout': 20,
# 'is_proxy': False,
},
callback = self.get_page_count,
errback = self.error_parse
)
def insert_json(self, data = {}, table_name = None, commit = False):
try:
keys = []
vals = []
for k, v in data.items():
keys.append(k)
vals.append(v)
val_str = ','.join(['%s'] * len(vals))
key_str = ','.join(keys)
command = "INSERT IGNORE INTO {table} ({keys}) VALUES({values})". \
format(keys = key_str, values = val_str, table = table_name)
# utils.log('insert_json data:%s' % data)
self.cursor.execute(command, tuple(vals))
if commit:
self.conn.commit()
except Exception, e:
utils.log('sql helper insert_json exception msg:%s' % e, logging.WARNING)
def success_parse(self, response):
proxy = response.meta.get('proxy_info')
table = response.meta.get('table')
self.save_page(proxy.ip, response.body)
self.log('success_parse speed:%s meta:%s' % (time.time() - response.meta.get('cur_time'), response.meta))
proxy.vali_count += 1
proxy.speed = time.time() - response.meta.get('cur_time')
if self.success_content_parse(response):
if table == self.name:
if proxy.speed > self.timeout:
self.sql.del_proxy_with_id(table, proxy.id)
else:
self.sql.update_proxy(table, proxy)
else:
if proxy.speed < self.timeout:
self.sql.insert_proxy(table_name = self.name, proxy = proxy)
else:
if table == self.name:
self.sql.del_proxy_with_id(table_name = table, id = proxy.id)
self.sql.commit()
def run(self):
# threading.Thread.run(self)
#log(' p-running ' + str( self.work_list ))
self.running = True
# Rather than running forever, check to see if it is still OK
while self.running:
try:
# Don't block
#item = self.queue.get(block=False)
self.do_work()
self.ev.set()
#work dome end
log( ' p-all done ' )
self.stop()
except Empty:
# Allow other stuff to run
time.sleep(0.1)
def return_action_and_link_tuple_accdg_to_setting_wether_to_use_addon_for_youtube(self, video_id):
link_actn=''
link_=''
if video_id:
if use_addon_for_youtube:
link_actn=self.DI_ACTION_PLAYABLE
link_="plugin://plugin.video.youtube/play/?video_id=" + video_id
else:
link_actn=self.DI_ACTION_YTDL
#some youtube links take a VERY long time for youtube_dl to parse. we simplify it by getting the video id and using a simpler url
#BUT if there is a time skip code in the url, we just pass it right through. youtube-dl can handle this part.
# time skip code comes in the form of ?t=122 OR #t=1m45s OR ?t=2:43
link_=self.build_youtube_url_with_video_id(video_id)
#log(' returning:{0} {1}'.format(link_actn, link_))
return link_actn, link_
def get_video_id(self, yt_url):
#video_id_regex=re.compile('(?:youtube(?:-nocookie)?\.com/(?:\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&;]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})', re.DOTALL)
#added parsing for video_id in kodi_youtube_plugin url
video_id_regex=re.compile('(?:youtube(?:-nocookie)?\.com/(?:\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&;]v=)|youtu\.be\/|plugin:\/\/plugin\.video\.youtube\/play\/\?video_id=)([a-zA-Z0-9_-]{11})', re.DOTALL)
video_id=''
match = video_id_regex.findall(yt_url)
if match:
video_id=match[0]
else:
#log(' second parsing for video id:'+yt_url)
#for parsing this: https://www.youtube.com/attribution_link?a=y08k0cdNBKw&u=%2Fwatch%3Fv%3DQOVrrL5KtsM%26feature%3Dshare%26list%3DPLVonsjaXkSpfuIv02l6IM1pN1Z3IfXWUW%26index%3D4
o = urlparse.urlparse(yt_url)
query = urlparse.parse_qs(o.query)
if 'a' in query and 'u' in query: #if all (k in query for k in ("a","u")):
u=query['u'][0]
#log(' u '+ repr(u)) # <-- /watch?v=QOVrrL5KtsM&feature=share&list=PLVonsjaXkSpfuIv02l6IM1pN1Z3IfXWUW&index=4
match = video_id_regex.findall('youtube.com'+u)
if match:
video_id=match[0]
else:
log(" Can't get youtube video id:"+yt_url)
return video_id
def build_query_params_for_search(self,youtube_api_key,search_string,type_='video'):
from utils import ret_bracketed_option
#specify different results by adding order_option in square brackets in the search string.
stripped_string, order_option=ret_bracketed_option(search_string) #developer feature: specify the order in search parameter "[date]" etc.
if order_option:
if order_option.lower() in['date','rating','relevance','title','videocount','viewcount']:
log(' youtube search:using special order option [{0}]'.format(order_option))
else:
log(' youtube search:unsupported order option [{0}]'.format(order_option))
order_option='relevance'
stripped_string=search_string
else:
order_option='relevance'
return 'search', {
'key': youtube_api_key,
'fields':'items(kind,id(videoId),snippet(publishedAt,channelTitle,channelId,title,description,thumbnails(medium)))',
'type': type_, #video,channel,playlist.
'maxResults': '50', # Acceptable values are 0 to 50
'part': 'snippet',
'order': order_option, #date,rating,relevance,title,videoCount,viewCount
'q': stripped_string,
'safeSearch':'moderate' if hide_nsfw else 'none',
}
def get_video_durations(self,youtube_api_key,videoIds):
from utils import ytDurationToSeconds
durations=[]
query_params={'key': youtube_api_key,
'part': 'contentDetails',
'id': ",".join(videoIds), #','.join(map(str, myList))#if the list contains numbers
}
api_url='https://www.googleapis.com/youtube/v3/{0}?{1}'.format("videos",urllib.urlencode(query_params))
r = self.requests_get(api_url)
j=r.json()
#log(repr(j))
for i in j.get('items'):
d=clean_str(i, ['contentDetails','duration'],'')
durations.append(ytDurationToSeconds(d))
#import iso8601
#iso8601.parse_duration(d)
return durations
def get_gallery_info(self, media_url):
gallery_name = media_url.split("/gallery/",1)[1]
if gallery_name=="":
return False
request_url="https://api.imgur.com/3/gallery/"+gallery_name
#log(" imgur:check if album- request_url---"+request_url )
try:
r = self.requests_get(request_url, headers=ClassImgur.request_header)
except requests.exceptions.HTTPError:
#http://imgur.com/gallery/Ji0IWhG this link has /gallery/ but returns 404 if asked as gallery
request_url="https://api.imgur.com/3/image/"+gallery_name
#log(' Trying a different query:'+request_url)
try:
r = self.requests_get(request_url, headers=ClassImgur.request_header)
except requests.exceptions.HTTPError:
#https://imgur.com/gallery/knbXW this link has is not "image" nor "gallery" but is "album"
request_url="https://api.imgur.com/3/album/"+gallery_name
#log(' Trying a different query:'+request_url)
r = self.requests_get(request_url, headers=ClassImgur.request_header)
#there has to be a better way to do this...
return r
def ask_imgur_for_link(self, media_url):
#sometimes, imgur links are posted without the extension(gif,jpg etc.). we ask imgur for it.
#log(" ask_imgur_for_link: "+media_url )
media_url=media_url.split('?')[0] #get rid of the query string
img_id=media_url.split("com/",1)[1] #.... just get whatever is after "imgur.com/" hope nothing is beyond the id
#log(" ask_imgur_for_link: "+img_id )
#6/30/2016: noticed a link like this: http://imgur.com/topic/Aww/FErKmLG
if '/' in img_id:
#log(" split_ask_imgur_for_link: "+ str( img_id.split('/')) )
img_id = img_id.split('/')[-1] #the -1 gets the last item on the list returned by split
if img_id:
request_url="https://api.imgur.com/3/image/"+img_id
r = self.requests_get(request_url, headers=ClassImgur.request_header)
j=r.json()
if j['data'].get('mp4'):
return j['data'].get('mp4')
else:
return j['data'].get('link')
def get_playable_url(self, media_url='', is_probably_a_video=False ):
if not media_url:
media_url=self.media_url
self.get_video_id()
if self.video_id:
#if use_ytdl_for_yt: #ytdl can also handle vimeo
# (10/2/2016) --- please only use script.module.youtube.dl if possible and remove these dependencies.
self.link_action=sitesBase.DI_ACTION_YTDL
return media_url, self.TYPE_VIDEO
#else:
#self.link_action=self.DI_ACTION_PLAYABLE
#return "plugin://plugin.video.vimeo/play/?video_id=" + self.video_id, self.TYPE_VIDEO
else:
log(" %s cannot get videoID %s" %( self.__class__.__name__, media_url) )
#feed it to ytdl. sometimes link points to multiple streams: https://vimeo.com/mrmichaelrobinson/videos/
self.link_action=sitesBase.DI_ACTION_YTDL
return media_url, self.TYPE_VIDEO
def get_playable_url(self, media_url='', is_probably_a_video=False ):
if not media_url:
media_url=self.media_url
# *** needs access token to get playable url. we'll just have ytdl handle dailymotion
self.link_action=sitesBase.DI_ACTION_YTDL
return media_url, self.TYPE_VIDEO
# self.get_video_id()
# #log(' videoID:' + self.video_id)
# if self.video_id:
# request_url= 'https://api.dailymotion.com/video/' + self.video_id
#
# #https://api.dailymotion.com/video/x4qviso?fields=aspect_ratio,stream_h264_hd_url,poster_url,thumbnail_url,sprite_320x_url
#
# content = requests.get(request_url )
# log(' ' + str(content.text))
# if content.status_code==200:
# j = content.json()
# log( pprint.pformat(j, indent=1) )
# else:
# log(" dailymotion query failed:" + str(content.status_code) )
# else:
# log(" %s cannot get videoID %s" %( self.__class__.__name__, media_url) )
def ret_blog_post_request(self):
o=urlparse.urlparse(self.media_url) #scheme, netloc, path, params, query, fragment
#log( ' blogpath=' + o.path )
blog_path= o.path
if not blog_path:
log(' could not determine blog path in:' + self.media_url)
return None
blog_info_request='https://www.googleapis.com/blogger/v3/blogs/byurl?' + self.key_string + '&url=' + self.media_url
content = self.requests_get(blog_info_request)
j = content.json()
#log( pprint.pformat(j, indent=1) )
blog_id=j.get('id')
blog_post_request='https://www.googleapis.com/blogger/v3/blogs/%s/posts/bypath?%s&path=%s' %( blog_id, self.key_string, blog_path)
#log( ' api request:'+blog_post_request )
content = self.requests_get(blog_post_request)
return content
def ret_album_list(self,album_url):
r = self.requests_get(album_url)
jo=re.compile('window._sharedData = ({.*});</script>').findall(r.text)
if jo:
#import pprint; log( pprint.pformat(jo[0], indent=1) )
try:
j=json.loads(jo[0] )
entry_data=j.get('entry_data')
if entry_data:
if 'ProfilePage' in entry_data.keys():
profile_page=entry_data.get('ProfilePage')[0]
images=self.ret_images_dict_from_album_json(profile_page)
#for i in images: log( '##' + repr(i))
self.assemble_images_dictList(images)
return self.dictList
else:
log(" Could not get 'entry_data' from scraping instagram [window._sharedData = ]")
except (AttributeError,TypeError) as e:
log(' exception while parsing json:'+str(e))
def get_playable(self, media_url='', is_probably_a_video=False ):
media_type=self.TYPE_VIDEO
if not media_url:
media_url=self.media_url
filename,ext=parse_filename_and_ext_from_url(media_url)
#log(' file:%s.%s' %(filename,ext) )
if ext in ["mp4","webm","gif"]:
if ext=='gif':
media_type=self.TYPE_GIF
self.link_action=sitesBase.DI_ACTION_PLAYABLE
self.thumb_url=media_url.replace( '%s.%s'%(filename,ext) , '%s.jpg' %(filename))
self.poster_url=self.thumb_url
self.media_url=media_url.replace( '%s.%s'%(filename,ext) , '%s.mp4' %(filename)) #just replacing gif to mp4 works
return self.media_url, media_type
if ext in image_exts: #excludes .gif
self.link_action='viewImage'
self.thumb_url=media_url
self.poster_url=self.thumb_url
return media_url,self.TYPE_IMAGE
return self.get_playable_url(self.media_url, is_probably_a_video=False )
def get_video_id(self):
#looks like the filename is also the video id and some links have it at the "-"end od url
self.video_id=''
#https://j.gifs.com/zpOmn5.gif <-- this is handled in get_playable -> .gif replaced with .mp4
#http://gifs.com/gif/qxBQMp <-- parsed here.
#https://gifs.com/gif/yes-nooo-whaaa-5yZ8rK <-- parsed here.
match = re.compile('gifs\.com/(?:gif/)?(.+)(?:.gif|$)').findall(self.media_url)
#log(' matches' + repr(match) )
if match:
vid=match[0]
if '-' in vid:
vid= vid.split('-')[-1]
self.video_id=vid
def ret_album_list(self, album_url, thumbnail_size_code=''):
#returns an object (list of dicts) that contain info for the calling function to create the listitem/addDirectoryItem
content = self.requests_get( album_url)
match = re.compile('var album\s=\s(.*)\;').findall(content.text)
#log('********* ' + match[0])
if match:
j = json.loads(match[0])
images=self.ret_images_dict_from_album_json(j)
self.assemble_images_dictList(images)
#self.assemble_images_dictList( ( [ s.get('description'), prefix+s.get('url_full')] for s in items) )
else:
log(' eroshare:ret_album_list: var album string not found. ')
return self.dictList
def get_playable_url(self, media_url, is_probably_a_video=False ):
if self.is_album(media_url):
log(' is an album:'+ media_url )
self.media_type = self.TYPE_ALBUM
return media_url, sitesBase.TYPE_ALBUM
log(' scraping:'+ media_url )
content = self.requests_get( media_url)
#https://github.com/downthemall/anticontainer/blob/master/plugins/imgbox.com.json
match = re.compile("id=\"img\".+?src=\"(.+?)\" title=\"(.+?)\"", re.DOTALL).findall(content.text)
#log(' match:' + repr(match))
if match:
#log(' match' + match[0][0])
self.poster_url=match[0][0]
self.thumb_url=self.poster_url
return self.poster_url, self.TYPE_IMAGE
else:
log(" %s can't scrape image " %(self.__class__.__name__ ) )
def get_playable_url(self, link_url, is_probably_a_video):
from reddit import assemble_reddit_filter_string
subreddit=self.get_video_id(link_url)
self.video_id=subreddit
#log(' **get_playable_url subreddit=' + self.video_id )
self.media_type=sitesBase.TYPE_REDDIT
#if link_url is in the form of https://np.reddit.com/r/teslamotors/comments/50bc6a/tesla_bumped_dying_man_up_the_production_queue_so/d72vfbg?context=2
if '/comments/' in link_url:
self.link_action='listLinksInComment'
return link_url, self.media_type
else:
#link_url is in the form of "r/subreddit". this type of link is found in comments
if subreddit:
self.link_action='listSubReddit'
reddit_url=assemble_reddit_filter_string('',subreddit)
return reddit_url, self.media_type
if link_url.startswith('/u/'):
author=link_url.split('/u/')[1]
self.link_action='listSubReddit'
#show links submitted by author
reddit_url=assemble_reddit_filter_string("","/user/"+author+'/submitted')
return reddit_url, self.media_type
return '',''
def get_playable_url(self, link_url, is_probably_a_video=False ):
self.media_url=link_url
#u=media_url.split('?')[0]
html=self.requests_get(link_url)
#if '11616' in link_url:log(html.text)
images=self.get_images(html.text,self.p)
if images:
#if '11616' in link_url:log(pprint.pformat(images))
self.media_type=self.TYPE_ALBUM
return self.media_url, self.media_type
else:
#default to youtube-dl video.
#direct image link posts are already taken care of in get_playable()
#the only video sample i found is not playable via ytdl. TODO: .mp4 is in javascript block
# http://acidcow.com/video/61149-russian_soldiers_got_the_steel_balls.html
self.link_action=self.DI_ACTION_YTDL
return self.media_url, self.TYPE_VIDEO
def sitesManager( media_url ):
#picks which class will handle the media identification and extraction for website_name
#first resolve url shortener
shorteners=['bit.ly','goo.gl','tinyurl.com']
if any(shortener in media_url for shortener in shorteners):
#v=sitesBase.requests_get('https://unshorten.me/s/'+ urllib.quote_plus( media_url ) )
v = requests.head( media_url, timeout=REQUEST_TIMEOUT, allow_redirects=True )
log(' short url(%s)=%s'%(media_url,repr(v.url)))
media_url=v.url
for subcls in sitesBase.__subclasses__():
regex=subcls.regex
if regex:
match=re.compile( regex , re.I).findall( media_url )
#log("testing:{0}[{1}] {2}".format(media_url,regex, repr(match)) )
if match :
return subcls( media_url )
def ydtl_get_playable_url( url_to_check ):
from YoutubeDLWrapper import YoutubeDLWrapper, _selectVideoQuality
#log('ydtl_get_playable_url:' +url_to_check )
if link_url_is_playable(url_to_check)=='video':
return url_to_check
video_urls=[]
ytdl=YoutubeDLWrapper()
try:
ydl_info=ytdl.extract_info(url_to_check, download=False)
video_infos=_selectVideoQuality(ydl_info, quality=1, disable_dash=True)
for video_info in video_infos:
video_urls.append(video_info.get('xbmc_url'))
return video_urls
except:
return None
def onClick(self, controlID):
if controlID == self.main_control_id:
self.gui_listbox_SelectedPosition = self.gui_listbox.getSelectedPosition()
item = self.gui_listbox.getSelectedItem()
if not item: #panel listbox control allows user to pick non-existing item by mouse/touchscreen. bypass it here.
return
if self.include_parent_directory_entry and self.gui_listbox_SelectedPosition == 0:
self.close() #include_parent_directory_entry means that we've added a ".." as the first item on the list onInit
self.process_clicked_item(item)
else:
clicked_control=self.getControl(controlID)
log('clicked on controlID='+repr(controlID))
self.process_clicked_item(clicked_control)
def process_clicked_item(self, clicked_item):
if isinstance(clicked_item, xbmcgui.ListItem ):
di_url=clicked_item.getProperty('onClick_action') #this property is created when assembling the kwargs.get("listing") for this class
item_type=clicked_item.getProperty('item_type').lower()
elif isinstance(clicked_item, xbmcgui.ControlButton ):
#buttons have no setProperty() hiding it in Label2 no good.
#ast.literal_eval(cxm_string):
#di_url=clicked_item.getLabel2()
#log(' button label2='+repr(di_url))
#item_type=clicked_item.getProperty('item_type').lower()
pass
log( " clicked %s IsPlayable=%s url=%s " %( repr(clicked_item),item_type, di_url ) )
if item_type=='playable':
#a big thank you to spoyser (http://forum.kodi.tv/member.php?action=profile&uid=103929) for this help
pl = xbmc.PlayList(xbmc.PLAYLIST_VIDEO)
pl.clear()
pl.add(di_url, clicked_item)
xbmc.Player().play(pl, windowed=False)
elif item_type=='script':
#if user clicked on 'next' we close this screen and load the next page.
if 'mode=listSubReddit' in di_url:
self.busy_execute_sleep(di_url,500,True )
else:
self.busy_execute_sleep(di_url,3000,False )