def run_bot(args):
logging.basicConfig(filename=LOGFILE_URI, level=LOG_LEVEL, format=LOG_FORMAT)
logging.info('Connecting to database {}'.format(DATABASE_URI))
Session = connect_to_database(DATABASE_URI)
logging.info('Database connection OK')
session = Session()
data = session.query(model.Corpus).all()
data_values = [col.title + ' ' + col.text for col in data]
data_targets = [col.category for col in data]
logging.info('Training classifier with {} values'.format(len(data_values)))
classifier = Classifier(data_values, data_targets)
logging.info('Classifier trained')
logging.info('Connecting to reddit...')
reddit = get_reddit_client()
logging.info('Authorizing...')
access_information = reddit.get_access_information(CLIENT_ACCESSCODE)
reddit.set_access_credentials(**access_information)
logging.info('Logged in successfully.')
for message in praw.helpers.submission_stream(reddit, SUBREDDIT, limit=5, verbosity=0):
message_text = message.title + ' ' + message.selftext
pred = classifier.classify(message_text)[0]
if pred in responses:
if args.supervised and input('Classify {} as {}? (y/n) '.format(message.id, pred)).lower() != 'y':
continue
try:
message.add_comment(responses[pred] + post_signature)
except praw.errors.RateLimitExceeded:
# TODO:
# Ideally, errors should actually be handled properly. Perhaps a dequeue could be used
# to store all the posts which failed, which could be retried every minute (or so)
logging.error('Rate limit exceeded, cannot post to thread {}'.format(message.title))
评论列表
文章目录