/**
* Runs the inverter job. The inverter job flips outlinks to inlinks to be
* passed into the analysis job.
*
* @param nodeDb
* The node database to use.
* @param outlinkDb
* The outlink database to use.
* @param output
* The output directory.
*
* @throws IOException
* If an error occurs while running the inverter job.
*/
private void runInverter(Path nodeDb, Path outlinkDb, Path output)
throws IOException {
// configure the inverter
JobConf inverter = new NutchJob(getConf());
inverter.setJobName("LinkAnalysis Inverter");
FileInputFormat.addInputPath(inverter, nodeDb);
FileInputFormat.addInputPath(inverter, outlinkDb);
FileOutputFormat.setOutputPath(inverter, output);
inverter.setInputFormat(SequenceFileInputFormat.class);
inverter.setMapperClass(Inverter.class);
inverter.setReducerClass(Inverter.class);
inverter.setMapOutputKeyClass(Text.class);
inverter.setMapOutputValueClass(ObjectWritable.class);
inverter.setOutputKeyClass(Text.class);
inverter.setOutputValueClass(LinkDatum.class);
inverter.setOutputFormat(SequenceFileOutputFormat.class);
inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
false);
// run the inverter job
LOG.info("Starting inverter job");
try {
JobClient.runJob(inverter);
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
}
LOG.info("Finished inverter job.");
}
LinkRank.java 文件源码
java
阅读 27
收藏 0
点赞 0
评论 0
项目:GeoCrawler
作者:
评论列表
文章目录