def parse_job(self, response):
#????????
item_loader = LagouJobItemLoader(item=LagouJobItem(), response=response)
item_loader.add_css("title", ".job-name::attr(title)")
item_loader.add_value("url", response.url)
item_loader.add_value("url_object_id", get_md5(response.url))
item_loader.add_css("salary", ".job_request .salary::text")
item_loader.add_xpath("job_city", "//*[@class='job_request']/p/span[2]/text()")
item_loader.add_xpath("work_years", "//*[@class='job_request']/p/span[3]/text()")
item_loader.add_xpath("degree_need", "//*[@class='job_request']/p/span[4]/text()")
item_loader.add_xpath("job_type", "//*[@class='job_request']/p/span[5]/text()")
item_loader.add_css("tags", '.position-label li::text')
item_loader.add_css("publish_time", ".publish_time::text")
item_loader.add_css("job_advantage", ".job-advantage p::text")
item_loader.add_css("job_desc", ".job_bt div")
item_loader.add_css("job_addr", ".work_addr")
item_loader.add_css("company_name", "#job_company dt a img::attr(alt)")
item_loader.add_css("company_url", "#job_company dt a::attr(href)")
item_loader.add_value("crawl_time", datetime.now())
job_item = item_loader.load_item()
return job_item
python类now()的实例源码
def occurred(at_):
"""Calculate when a service event occurred.
Arguments:
at_ (:py:class:`str`): When the event occurred.
Returns:
:py:class:`str`: The humanized occurrence time.
"""
try:
occurred_at = parse(at_)
except (TypeError, ValueError):
logger.warning('failed to parse occurrence time %r', at_)
return 'time not available'
utc_now = datetime.now(tz=timezone.utc)
try:
return naturaltime((utc_now - occurred_at).total_seconds())
except TypeError: # at_ is a naive datetime
return naturaltime((datetime.now() - occurred_at).total_seconds())
def calculate_timeout(http_date):
"""Extract request timeout from e.g. ``Retry-After`` header.
Note:
Per :rfc:`2616#section-14.37`, the ``Retry-After`` header can
be either an integer number of seconds or an HTTP date. This
function can handle either.
Arguments:
http_date (:py:class:`str`): The date to parse.
Returns:
:py:class:`int`: The timeout, in seconds.
"""
try:
return int(http_date)
except ValueError:
date_after = parse(http_date)
utc_now = datetime.now(tz=timezone.utc)
return int((date_after - utc_now).total_seconds())
def download_current_dataset(self, dest_path='.', unzip=True):
now = datetime.now().strftime('%Y%m%d')
file_name = 'numerai_dataset_{0}.zip'.format(now)
dest_file_path ='{0}/{1}'.format(dest_path, file_name)
r = requests.get(self._dataset_url)
if r.status_code!=200:
return r.status_code
with open(dest_file_path, "wb") as fp:
for byte in r.content:
fp.write(byte)
if unzip:
with zipfile.ZipFile(dest_file_path, "r") as z:
z.extractall(dest_path)
return r.status_code
def check_if_media_sync_offset_satisfied(logger, settings, audit):
"""
Check if the media sync offset is satisfied. The media sync offset is a duration in seconds specified in the
configuration file. This duration is the amount of time audit media is given to sync up with SafetyCulture servers
before this tool exports the audit data.
:param logger: The logger
:param settings: Settings from command line and configuration file
:param audit: Audit JSON
:return: Boolean - True if the media sync offset is satisfied, otherwise, returns false.
"""
modified_at = dateutil.parser.parse(audit['modified_at'])
now = datetime.utcnow()
elapsed_time_difference = (pytz.utc.localize(now) - modified_at)
# if the media_sync_offset has been satisfied
if not elapsed_time_difference > timedelta(seconds=settings[MEDIA_SYNC_OFFSET_IN_SECONDS]):
logger.info('Audit {0} modified too recently, some media may not have completed syncing. Skipping export until next sync cycle'.format(
audit['audit_id']))
return False
return True
def configure_logging(self):
"""
Configure logging to log to std output as well as to log file
"""
log_level = logging.DEBUG
log_filename = datetime.now().strftime('%Y-%m-%d') + '.log'
sp_logger = logging.getLogger('sp_logger')
sp_logger.setLevel(log_level)
formatter = logging.Formatter('%(asctime)s : %(levelname)s : %(message)s')
fh = logging.FileHandler(filename=self.log_dir + log_filename)
fh.setLevel(log_level)
fh.setFormatter(formatter)
sp_logger.addHandler(fh)
sh = logging.StreamHandler(sys.stdout)
sh.setLevel(log_level)
sh.setFormatter(formatter)
sp_logger.addHandler(sh)
def __ssl_version(self, try_next=False):
"""
Returns an SSL Context Object while handling the many
supported protocols
"""
if try_next:
# Increment version
self.secure_protocol_idx += 1
while self.secure_protocol_idx < len(SECURE_PROTOCOL_PRIORITY):
# Now return it
return SECURE_PROTOCOL_PRIORITY[self.secure_protocol_idx][0]
# If we reach here, we had a problem; use SocketRetryLimit() instead
# of SocketException() since we're at the end of the line now.
raise SocketRetryLimit('There are no protocols left to try.')
def write_log(target_uri, doi, pmid, found_rrids, head, body, text, h):
now = datetime.now().isoformat()[0:19].replace(':','').replace('-','')
frv = list(set(found_rrids.values()))
if len(frv) == 1 and frv[0] == 'Already Annotated':
head, body, text = None, None, None
log = {'target_uri':target_uri,
'group':h.group,
'doi':doi,
'pmid':pmid,
'found_rrids':found_rrids,
'count':len(found_rrids),
'head':head,
'body':body,
'text':text,
}
fname = 'logs/' + 'rrid-%s.json' % now
with open(fname, 'wt') as f:
json.dump(log, f, sort_keys=True, indent=4)
def on_success(self, data):
''' Called when we detect an event through the streaming API.
The base class version looks for quoted tweets and for each one it
finds, we write out a text file that contains the ID of the tweet
that mentions us.
The other (cron-job) version of your bot will look for any files with the
correct extension (identified by `kStreamFileExtension`) in its
HandleQuotes() method and favorite^H^H^H^H like those tweets.
See https://dev.twitter.com/streaming/userstreams
'''
# for now, all we're interested in handling are events.
if 'event' in data:
# Dump the data into a JSON file for the other cron-process to
# handle the next time it wakes up.
fileName = os.path.join(self.path, "{0}{1}".format(
uuid4().hex, kStreamFileExtension))
with open(fileName, "wt") as f:
f.write(json.dumps(data).encode("utf-8"))
def HandleOneMention(self, mention):
''' Like the tweet that mentions us. If the word 'tick' appears
in that tweet, also reply with the current time.
'''
who = mention['user']['screen_name']
text = mention['text']
theId = mention['id_str']
eventType = "Mention"
# we favorite every mention that we see
if self.debug:
print "Faving tweet {0} by {1}:\n {2}".format(theId, who, text.encode("utf-8"))
else:
self.twitter.create_favorite(id=theId)
if 'tick' in text.lower():
# reply to them with the current time.
replyMsg = "@{0} {1}".format(who, NowString(datetime.now()))
if self.debug:
print "REPLY: {}".format(replyMsg)
else:
self.tweets.append({'status': replyMsg, 'in_reply_to_status_id': theId})
eventType = "Reply"
self.Log(eventType, [who])
def __init__(self, username, password, avatar="default.png", nickname = "", description = "", status = "init",
e_mail = "" , student_number = "", department = "", truename = "", tel="", date = None, usergroup = "primary"
, auth_method = "local"):
# using sha512
#if (len(password) <= 6):
# self = None
# return None
self.username = username
self.password = password
self.avatar = avatar
self.nickname = nickname
self.description = description
self.status = status
self.e_mail = e_mail
self.student_number = student_number
self.department = department
self.truename = truename
self.tel = tel
self.beans = 1000
if (date != None):
self.register_date = date
else:
self.register_date = datetime.now()
self.user_group = usergroup
self.auth_method = auth_method
def calculate_etl(self, downloaded, total, start=None):
"""Calculates the estimated time left, based on how long it took us
to reach "downloaded" and how many messages we have left.
If no start time is given, the time will simply by estimated by how
many chunks are left, which will NOT work if what is being downloaded is media"""
left = total - downloaded
if not start:
# We add chunk size - 1 because division will truncate the decimal places,
# so for example, if we had a chunk size of 8:
# 7 messages + 7 = 14 -> 14 // 8 = 1 chunk download required
# 8 messages + 7 = 15 -> 15 // 8 = 1 chunk download required
# 9 messages + 7 = 16 -> 16 // 8 = 2 chunks download required
#
# Clearly, both 7 and 8 fit in one chunk, but 9 doesn't.
chunks_left = (left + self.download_chunk_size - 1) // self.download_chunk_size
etl = chunks_left * self.download_delay
else:
if downloaded:
delta_time = (datetime.now() - start).total_seconds() / downloaded
etl = left * delta_time
else:
etl = 0
return timedelta(seconds=round(etl, 1))
def getMoreInfo(self, nzb):
"""
Get details about a torrent.
.. seealso:: MovieSearcher.correctRelease
"""
data = self.getHTMLData(nzb['detail_url'])
soup = BeautifulSoup(data, 'html.parser')
description = soup.find(id='description')
if description:
nzb['description'] = description.prettify()
line = soup.find(text='Date de publication').parent.parent
pub = line.find_all('td')[1]
added = datetime.strptime(pub.getText().split('(')[0].strip(),
'%d/%m/%Y %H:%M')
nzb['age'] = (datetime.now() - added).days
self.log.debug(nzb['age'])
def get_courses_with_session(self):
query = self.ds.query(kind='teaches')
query.add_filter('tid', '=', self.tid)
teaches = list(query.fetch())
courses = list()
for teach in teaches:
query = self.ds.query(kind='courses')
query.add_filter('cid', '=', teach['cid'])
courses = courses + list(query.fetch())
results = list()
for course in courses:
query = self.ds.query(kind='sessions')
query.add_filter('cid', '=', course['cid'])
sessions = list(query.fetch())
for session in sessions:
if session['expires'].replace(tzinfo=None) > datetime.now():
results.append(session)
if len(results) == 1:
course['secret'] = sessions[0]['secret']
# result = courses + sessions
return courses
def get_secret_and_seid(self):
query = self.ds.query(kind='enrolled_in')
enrolled_in = list(query.fetch())
results = list()
for enrolled in enrolled_in:
query = self.ds.query(kind='sessions')
query.add_filter('cid', '=', enrolled['cid'])
sessions = list(query.fetch())
for session in sessions:
if session['expires'].replace(tzinfo=None) > datetime.now():
results.append(session)
# results = results + list(query.fetch())
if len(results) == 1:
secret = results[0]['secret']
seid = results[0]['seid']
else:
secret, seid = None, -1
return secret, seid
def __init__(self, lte=None, gte=None, limit=250, sort='date_disseminated,ASC', fastout=False, verify=True, endpoint='http://127.0.0.1/', start_offset=0):
if gte and not lte:
lte = datetime.now().isoformat()
if lte and not gte:
gte = '2000-01-01'
self.lte = lte
self.gte = gte
self.limit = limit
self.sort = sort
self.fastout = fastout
self.verify = verify
self.endpoint = endpoint
self.fcc_endpoint = 'https://ecfsapi.fcc.gov/filings'
self.index_fields = mappings.FIELDS.keys()
self.es = Elasticsearch(self.endpoint, timeout=30)
self.start_offset = start_offset
self.stats = {'indexed': start_offset, 'fetched': start_offset}
def handle(self, *args, **options):
today = datetime.now()
# Cron tasks
if today.hour == 0:
self.change_day()
self.evaluate_block_users()
self.send_daily_email()
if today.day == 1:
self.change_month()
if (today.day == 1 and today.month == 1):
self.change_year()
# Force actions
if options['force-day']:
self.change_day()
self.evaluate_block_users()
self.send_daily_email()
if options['force-month']:
self.change_month()
if options['force-year']:
self.change_year()
def log(self):
# keep reading until we get a log_trigger
timeout = 0.5
start_time = datetime.now()
while 1:
if (datetime.now() - start_time).total_seconds() > timeout:
return self.buff
msg = self.bus.recv(0.5)
if msg is None:
continue
pid, obd_data = self.separate_can_msg(msg)
if pid in self.pids2log:
parsed = self.pids[pid]['parse'](obd_data)
self.buff.update(parsed)
# if pid == self.trigger:
# return buff
def log(self):
# keep reading until we get a log_trigger
timeout = 1
start_time = datetime.now()
fms_ccvs = 'FMS_CRUISE_CONTROL_VEHICLE_SPEED (km/h)'
while 1:
if (datetime.now() - start_time).total_seconds() > timeout:
return self.buff
msg = self.bus.recv(0.5)
if msg is None:
continue
pid, obd_data = self.separate_can_msg(msg)
if pid in self.pids2log:
parsed = self.pids[pid]['parse'](obd_data)
self.buff.update(parsed)
if fms_ccvs in self.buff and self.buff[fms_ccvs] > 200:
del self.buff[fms_ccvs]
# don't trigger a log if we get an invalid value
continue
if pid == self.trigger:
return self.buff
def _make_csv_writer(self):
"""
:return:
"""
self._buffer = StringIO()
self._bytes_written = 0
now = datetime.now()
self._out_csv = open(self.log_folder + '/' + now.strftime('%Y%m%d_%H%M%S.csv'.format(self.make_random(6))), 'w')
logging.warning("Writing to {} ({} bytes)".format(self._out_csv.name, self.max_bytes))
self._out_writer = csv.DictWriter(self._buffer, fieldnames=self.fieldnames, restval=None)
self._out_writer.writeheader()
self._out_csv.write(self._buffer.getvalue())
self._reset_buffer()
self.writerow({'vid': self.vin})
def _make_writer(self):
"""
:return:
"""
self._buffer = StringIO()
self._bytes_written = 0
now = datetime.now()
self.fname = self.log_folder + '/' + now.strftime('%Y%m%d_%H%M%S_{}.json'.format(self.make_random(6)))
self.fname = str(pathlib.Path(self.fname))
self._out_fh = open(self.fname, 'w')
self.write_pid()
logging.warning("Writing to {} ({} bytes)".format(self._out_fh.name, self.max_bytes))
# compress any old files still lying around
for fname in glob(self.log_folder+"/*.json"):
if fname != self.fname:
self._compress(fname)
def execute(self):
try:
self.system.run()
except (ReadTimeout, ConnectionError, JSONDecodeError):
pass
except exceptions.TradingSystemException as e:
curr = datetime.now()
print('{time} - {text}'.format(time=curr.strftime('%Y-%m-%d %H:%M:%S'), text=str(e)))
except Exception as e:
curr = datetime.now()
print('{time} - {text} - {args}'.format(time=curr.strftime('%Y-%m-%d %H:%M:%S'), text=str(e), args=e.args))
traceback.print_exc()
if self.interval:
threading.Timer(self.interval, self.execute).start()
def list_pr(list_pr,trackname):
print (header)
print ('<table>')
print('<tr><td>%s %s%s</font></br></br></td></tr>'%(ask,color,url)+'<tr><th>%s %s</br>%s %s</br></br></th></tr>'%(Artist,artist_song(html)[1],Album,artist_song(html)[0])+'<tr><th><img src="%s" /></th></tr>'%Image(html)[1]+'</table>')
if ((album(url)[0]).isdigit() == True):
print('<table><tr><td>'+'</br><a href="%(1)s">Download track %(2)s</a> - %(4)s (%(3)s) %(5)s at: %(1)s'%{'1':list_pr[int(album(url)[0])],'2':(int(album(url)[0]))+1,'3':file_size(list_pr[int(album(url)[0])])[1],'4':trackname[int(album(url)[0])],'5':color}+'</font></br></br></br>All album tracks are:'+'</td></tr>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> - %s (%s) %s at: '%(i,list_pr.index(i)+1,trackname[list_pr.index(i)],file_size(i)[1],color)+i+'</font></td></tr>')
else:
print('<table></br>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> - %s (%s) %s at: '%(i,list_pr.index(i)+1,trackname[list_pr.index(i)],file_size(i)[1],color)+i+'</font></td></tr>')
print('</table>')
#print(datetime.now().strftime('</br></br></br>%A, %d %b %Y, %I:%M:%S %p'))
print(difficulties)
print ("<p><b><a href='/RJ'>Try again</a></b></p>")
print ("</body></html>");
def list_pr(list_pr,trackname):
print (header)
print ('<table>')
print('<tr><td>%s %s%s</font></br></br></td></tr>'%(ask,color,url)+'<tr><th>%s %s</br>%s %s</br></br></th></tr>'%(Artist,artist_song(html)[1],Album,artist_song(html)[0])+'<tr><th><img src="%s" /></th></tr>'%Image(html)[1]+'</table>')
if ((album(url)[0]).isdigit() == True):
print('<table><tr><td>'+'</br><a href="%(1)s">Download track %(2)s</a> - %(4)s (%(3)s) %(5)s at: %(1)s'%{'1':list_pr[int(album(url)[0])],'2':(int(album(url)[0]))+1,'3':file_size(list_pr[int(album(url)[0])])[1],'4':trackname[int(album(url)[0])],'5':color}+'</font></br></br></br>All album tracks are:'+'</td></tr>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> - %s (%s) %s at: '%(i,list_pr.index(i)+1,trackname[list_pr.index(i)],file_size(i)[1],color)+i+'</font></td></tr>')
else:
print('<table></br>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> - %s (%s) %s at: '%(i,list_pr.index(i)+1,trackname[list_pr.index(i)],file_size(i)[1],color)+i+'</font></td></tr>')
print('</table>')
#print(datetime.now().strftime('</br></br></br>%A, %d %b %Y, %I:%M:%S %p'))
print(difficulties)
print ("<p><b><a href='/RJ'>Try again</a></b></p>")
print ("</body></html>");
def list_pr(list_pr):
print (header)
print ('<table>')
print('<tr><td>You asked for %s</br></br></td></tr>'%url+'<tr><th>Artist: %s</br>Album: %s</br></br></th></tr>'%(artist_song(html)[1],artist_song(html)[0])+'<tr><th><img src="%s" /></th></tr>'%Image(html)[1]+'</table>')
if ((album(url)[0]).isdigit() == True):
print('<table><tr><td>'+'</br><a href="%(1)s">Download track %(2)s</a> (%(3)s) at: %(1)s'%{'1':list_dl(album(url))[int(album(url)[0])],'2':(int(album(url)[0]))+1,'3':file_size(list_dl(album(url))[int(album(url)[0])])}+'</br></br></br>Other album tracks are:'+'</td></tr>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> (%s) at: '%(i,list_pr.index(i)+1,file_size(i))+i+'</td></tr>')
else:
print('<table></br>')
for i in list_pr:
#print ('<tr><td>'+i[0]+'<tr><td>'+i[1]+'</td></th>')
print ('<tr><td>'+'<a href="%s">Download track %s</a> (%s) at: '%(i,list_pr.index(i)+1,file_size(i))+i+'</td></tr>')
print('</table>')
#print(datetime.now().strftime('</br></br></br>%A, %d %b %Y, %I:%M:%S %p'))
print ("<p><b><a href='/RJ'>Try again</a></b></p>")
print ("</body></html>");
def vid_pr(dl):
print (header)
print ('<table>')
j=0
k=0
AA=[]
AB=['Download 480p','Download 720p','Download 1080']
while j<len(video(url)):
if len(file_size(video(url)[j]))>6:
AA.append(video(url)[j])
j+=1
print ('<tr><td>'+'You asked for %s</br></br></td></tr>'%url+'<tr><th>Artist: %s</br>Track: %s</br></br></th></tr>'%(artist_song(html)[1],artist_song(html)[0])+'<tr><th><img src="%s" /></th></tr></table>'%Image(html)[0])
print('<table><tr><td></br>')
while k<len(AA):
print('<tr><td>'+'%s %s %s'%('<a href="%s"><b>%s</b></a>'%(AA[k],AB[k]),' (%s)'%file_size(AA[k]),'at: %s'%AA[k])+'</br></td></tr>')
k+=1
#print(datetime.now().strftime('</br></br></br>%A, %d %b %Y, %I:%M:%S %p'))
print('</td></tr></table>')
print ("<p><b><a href='/RJ'>Try again</a></b></p>")
print ("</body></html>");
def index(pattern=None):
"""Renders index.html page with a list of benchmarks."""
filter_regex = None
if pattern:
filter_regex = re.compile(urllib.parse.unquote(pattern))
min_time_to_lookup = datetime.now() - timedelta(days=_MAX_DAYS_WITHOUT_RUN)
client = datastore.Client()
query = client.query(kind='Test')
query.add_filter('start', '>', min_time_to_lookup)
fetched = list(query.fetch())
test_names = {} # maps test name to encoded test name
for fetched_result in fetched:
if fetched_result['test'] in test_names:
continue # already added
if not filter_regex or re.search(pattern, fetched_result['test']):
test_names[fetched_result['test']] = urllib.parse.quote(
fetched_result['test'], safe='')
# convert test_names to list and sort
test_names = sorted(test_names.items(), key=itemgetter(1), reverse=True)
return render_template('index.html', tests=test_names)
def benchmark_data():
"""Returns benchmark data in json format for graphing."""
test_id = urllib.parse.unquote(request.args.get('test'))
entry_id = urllib.parse.unquote(request.args.get('entry'))
min_time_to_lookup = datetime.now() - timedelta(days=_DAYS_TO_FETCH)
client = datastore.Client()
timing_query = client.query(kind='Entry')
timing_query.add_filter('test', '=', test_id)
timing_query.add_filter('entry', '=', entry_id)
timing_query.add_filter('start', '>', min_time_to_lookup)
timing_query.projection = ['start', 'timing']
start_and_timing = [
{'start': data['start'], 'timing': data['timing']}
for data in timing_query.fetch()]
start_and_timing_json = json.dumps(start_and_timing)
return start_and_timing_json
def in_words_from_now(stamp, sep='_', precision='{:0.1f}'):
if stamp is None:
return 'never'
nw = now()
if nw > stamp:
words = ('ago',)
rdate = delta(nw, stamp)
else:
words = ('from', 'now')
rdate = delta(stamp, nw)
if rdate.days > 0 or rdate.weeks > 0 or rdate.months > 0 or rdate.years > 0:
return stamp.astimezone(dateutil.tz.tzlocal()).isoformat()
if rdate.hours > 0:
value = rdate.hours + (rdate.minutes / 60.0)
label = 'hours'
elif rdate.minutes > 0:
value = rdate.minutes + (rdate.seconds / 60.0)
label = 'min'
else:
value = rdate.seconds + (rdate.microseconds / 1000000.0)
label = 'sec'
return sep.join((precision.format(value), label) + words)
def fetch_new_proxyes(self):
"""
?????????????????
"""
logger.info("extending proxyes using fetch_free_proxyes.py")
new_proxyes = fetch_free_proxyes.fetch_all()
logger.info("new proxyes: %s" % new_proxyes)
self.last_fetch_proxy_time = datetime.now()
for np in new_proxyes:
if self.url_in_proxyes("http://" + np):
continue
else:
self.proxyes.append({"proxy": "http://" + np,
"valid": True,
"count": 0})
if self.len_valid_proxy() < self.extend_proxy_threshold: # ??????????????, ??threshold???????
self.extend_proxy_threshold -= 1