/**
* Inverts outlinks to inlinks while attaching node information to the
* outlink.
*/
public void reduce(Text key, Iterator<ObjectWritable> values,
OutputCollector<Text, LinkNode> output, Reporter reporter)
throws IOException {
String fromUrl = key.toString();
List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
Node node = null;
// loop through all values aggregating outlinks, saving node
while (values.hasNext()) {
ObjectWritable write = values.next();
Object obj = write.get();
if (obj instanceof Node) {
node = (Node) obj;
} else if (obj instanceof LinkDatum) {
outlinks.add(WritableUtils.clone((LinkDatum) obj, conf));
}
}
// only collect if there are outlinks
int numOutlinks = node.getNumOutlinks();
if (numOutlinks > 0) {
for (int i = 0; i < outlinks.size(); i++) {
LinkDatum outlink = outlinks.get(i);
String toUrl = outlink.getUrl();
// collect the outlink as an inlink with the node
output.collect(new Text(toUrl), new LinkNode(fromUrl, node));
}
}
}
LinkDumper.java 文件源码
java
阅读 26
收藏 0
点赞 0
评论 0
项目:GeoCrawler
作者:
评论列表
文章目录