def print_scores(response_logs: List[pd.DataFrame], opts):
joined = pd.concat(response_logs) # type: pd.DataFrame
binary_score = joined['score'] > 0.5
print()
print('Total number of pages: {:,}, relevant pages: {:,}, '
'average binary score: {:.2f}, average score: {:.2f}'.format(
len(joined), binary_score.sum(), binary_score.mean(),
joined['score'].mean()))
show_domain_stats(joined.copy(), output=opts.output, top=opts.top)
joined.sort_values(by='time', inplace=True)
joined.index = pd.to_datetime(joined.pop('time'), unit='s')
if opts.smooth:
crawl_time = (joined.index[-1] - joined.index[0]).total_seconds()
avg_rps = len(joined) / crawl_time
span = int(opts.smooth * opts.step * avg_rps)
joined['score'] = joined['score'].ewm(span=span).mean()
print_averages({'score': joined['score']}, opts.step, '{:.2f}')
title = 'Page relevancy score'
scores = joined['score'].resample('{}S'.format(opts.step)).mean()
plot = TimeSeries(scores, plot_width=1000,
xlabel='time', ylabel='score', title=title)
plot.set(y_range=Range1d(0, 1))
if not opts.no_show:
save_plot(plot, title=title, suffix='score', output=opts.output)
response_stats.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录