main.py 文件源码-python代码片段

def run_bot(args):
    logging.basicConfig(filename=LOGFILE_URI, level=LOG_LEVEL, format=LOG_FORMAT)

    logging.info('Connecting to database {}'.format(DATABASE_URI))
    Session = connect_to_database(DATABASE_URI)
    logging.info('Database connection OK')

    session = Session()
    data = session.query(model.Corpus).all()

    data_values = [col.title + ' ' + col.text for col in data]
    data_targets = [col.category for col in data]

    logging.info('Training classifier with {} values'.format(len(data_values)))
    classifier = Classifier(data_values, data_targets)
    logging.info('Classifier trained')

    logging.info('Connecting to reddit...')
    reddit = get_reddit_client()

    logging.info('Authorizing...')
    access_information = reddit.get_access_information(CLIENT_ACCESSCODE)
    reddit.set_access_credentials(**access_information)
    logging.info('Logged in successfully.')

    for message in praw.helpers.submission_stream(reddit, SUBREDDIT, limit=5, verbosity=0):
        message_text = message.title + ' ' + message.selftext
        pred = classifier.classify(message_text)[0]
        if pred in responses:
            if args.supervised and input('Classify {} as {}? (y/n) '.format(message.id, pred)).lower() != 'y':
                continue

            try:
                message.add_comment(responses[pred] + post_signature)
            except praw.errors.RateLimitExceeded:
                # TODO:
                # Ideally, errors should actually be handled properly. Perhaps a dequeue could be used
                # to store all the posts which failed, which could be retried every minute (or so)
                logging.error('Rate limit exceeded, cannot post to thread {}'.format(message.title))