@Override
public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
TrecOLParser document = new TrecOLParser(value.toString());
documentAnalyzed = new MapWritable();
if (document.isParsed()) {
this.tokenizer.tokenize(document.getDocContent());
while (this.tokenizer.hasMoreTokens()) {
IntWritable counter = CastingTypes.zero;
String newTerm = this.tokenizer.nextToken();
Text term = new Text(newTerm);
if (documentAnalyzed.containsKey(term)) {
counter = CastingTypes.strToIntWr(documentAnalyzed.get(term).toString());
}
documentAnalyzed.put(term, CastingTypes.intToIntWr(counter.get()+1));
}
if ( ! documentAnalyzed.isEmpty()) {
context.write(CastingTypes.strToIntWr(document.getDocId()), documentAnalyzed);
}
}
}
NutchMap.java 文件源码
java
阅读 18
收藏 0
点赞 0
评论 0
项目:YarnExamples
作者:
评论列表
文章目录