pipelines.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:Crawlers 作者: mi-minus 项目源码 文件源码
def _conditional_insert(self,tx,item): 


        # ori_html_path = self.save_html(item)
        # item['repost_post_id'] = ori_html_path

        query=u"insert ignore into post (url, topic_id, topic_kws, site_id, site_name, title, content, pt_time, st_time) values (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
        param=(item['topic_url'], item['topic_id'], item['topic_kw'], item['topic_site_id'], item['topic_site_name'], item['topic_title'], item['topic_content'], item['topic_pt_time'], item['topic_st_time'])
        tx.execute(query,param)
        log.msg('insert one',level=log.WARNING)

        # sql = 'insert into '+ item['table_name'] +' (id ,url,board, site_id, data_type , title , content, post_time, scratch_time , poster_name,language_type,repost_post_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE post_time=%s'
        # param = (item['topic_url'],item['topic_url'],item['topic_board'], item['site_id'],item['data_type'],item['topic_title'], item['topic_content'], item['topic_post_time'],item['scratch_time'], item['topic_author'],0,item['repost_post_id'],item['topic_post_time'])
        # tx.execute(sql,param)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号