CleaningJob.java 文件源码

java
阅读 22 收藏 0 点赞 0 评论 0

项目:GeoCrawler 作者:
public void delete(String crawldb, boolean noCommit) throws IOException {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("CleaningJob: starting at " + sdf.format(start));

  JobConf job = new NutchJob(getConf());

  FileInputFormat.addInputPath(job, new Path(crawldb, CrawlDb.CURRENT_NAME));
  job.setBoolean("noCommit", noCommit);
  job.setInputFormat(SequenceFileInputFormat.class);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapOutputKeyClass(ByteWritable.class);
  job.setMapOutputValueClass(Text.class);
  job.setMapperClass(DBFilter.class);
  job.setReducerClass(DeleterReducer.class);

  job.setJobName("CleaningJob");

  // need to expicitely allow deletions
  job.setBoolean(IndexerMapReduce.INDEXER_DELETE, true);

  JobClient.runJob(job);

  long end = System.currentTimeMillis();
  LOG.info("CleaningJob: finished at " + sdf.format(end) + ", elapsed: "
      + TimingUtil.elapsedTime(start, end));
}
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号