CommonSpider.java 文件源码

java
阅读 20 收藏 0 点赞 0 评论 0

项目:Gather-Platform 作者:
/**
     * 测试爬虫模板
     *
     * @param info
     * @return
     */
    public List<Webpage> testSpiderInfo(SpiderInfo info) throws JMException {
        final ResultItemsCollectorPipeline resultItemsCollectorPipeline = new ResultItemsCollectorPipeline();
        final String uuid = UUID.randomUUID().toString();
        Task task = taskManager.initTask(uuid, info.getDomain(), info.getCallbackURL(), "spiderInfoId=" + info.getId() + "&spiderUUID=" + uuid);
        task.addExtraInfo("spiderInfo", info);
        QueueScheduler queueScheduler = new QueueScheduler();
        MySpider spider = (MySpider) makeSpider(info, task)
                .addPipeline(resultItemsCollectorPipeline)
                .setScheduler(queueScheduler);
        spider.startUrls(info.getStartURL());
        //慎用爬虫监控,可能导致内存泄露
//        spiderMonitor.register(spider);
        spiderMap.put(uuid, spider);
        taskManager.getTaskById(uuid).setState(State.RUNNING);
        spider.run();
        List<Webpage> webpageList = Lists.newLinkedList();
        resultItemsCollectorPipeline.getCollected().forEach(resultItems -> webpageList.add(CommonWebpagePipeline.convertResultItems2Webpage(resultItems)));
        return webpageList;
    }
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号