CrawlDBTestUtil.java 文件源码

java
阅读 30 收藏 0 点赞 0 评论 0

项目:GeoCrawler 作者:
/**
 * Creates synthetic crawldb
 * 
 * @param fs
 *          filesystem where db will be created
 * @param crawldb
 *          path were db will be created
 * @param init
 *          urls to be inserted, objects are of type URLCrawlDatum
 * @throws Exception
 */
public static void createCrawlDb(Configuration conf, FileSystem fs,
    Path crawldb, List<URLCrawlDatum> init) throws Exception {
  LOG.trace("* creating crawldb: " + crawldb);
  Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
  Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
  org.apache.hadoop.io.SequenceFile.Writer.Option wValueOpt = SequenceFile.Writer.valueClass(CrawlDatum.class);
  MapFile.Writer writer = new MapFile.Writer(conf, new Path(dir,
      "part-00000"), wKeyOpt, wValueOpt);
  Iterator<URLCrawlDatum> it = init.iterator();
  while (it.hasNext()) {
    URLCrawlDatum row = it.next();
    LOG.info("adding:" + row.url.toString());
    writer.append(new Text(row.url), row.datum);
  }
  writer.close();
}
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号