private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb,
TreeSet<String> init, CrawlDatum cd) throws Exception {
LOG.fine("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
org.apache.hadoop.io.SequenceFile.Writer.Option wValueOpt = SequenceFile.Writer.valueClass(CrawlDatum.class);
MapFile.Writer writer = new MapFile.Writer(config, new Path(dir,
"part-00000"), wKeyOpt, wValueOpt);
Iterator<String> it = init.iterator();
while (it.hasNext()) {
String key = it.next();
writer.append(new Text(key), cd);
}
writer.close();
}
TestCrawlDbMerger.java 文件源码
java
阅读 19
收藏 0
点赞 0
评论 0
项目:GeoCrawler
作者:
评论列表
文章目录