/**
* Read (say, deserialize) an employee
*/
@Override
public void readFields(DataInput in) throws IOException {
name = new Text();
name.readFields(in);
address = new Text();
address.readFields(in);
company = new Text();
company.readFields(in);
salary = new DoubleWritable();
salary.readFields(in);
department = new Text();
department.readFields(in);
isManager = new BooleanWritable();
isManager.readFields(in);
}
java类org.apache.hadoop.io.DoubleWritable的实例源码
Employee.java 文件源码
项目:DocIT
阅读 48
收藏 0
点赞 0
评论 0
TF.java 文件源码
项目:Wikipedia-Index
阅读 27
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<page>");
conf.set("xmlinput.end", "</page>");
Job job =Job.getInstance(conf);
job.setJobName("TermFrequencyCount");
job.setJarByClass(TF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TFMap.class);
job.setReducerClass(TFReduce.class);
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
TF_IDF.java 文件源码
项目:Wikipedia-Index
阅读 25
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("TF-IDFCount");
job.setJarByClass(TF_IDF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TF_IDFMap.class);
job.setReducerClass(TF_IDFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
Multiplication.java 文件源码
项目:mapreduce-samples
阅读 21
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(Multiplication.class);
ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);
job.setMapperClass(CooccurrenceMapper.class);
job.setMapperClass(RatingMapper.class);
job.setReducerClass(MultiplicationReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);
TextOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
Sum.java 文件源码
项目:mapreduce-samples
阅读 22
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setMapperClass(SumMapper.class);
job.setReducerClass(SumReducer.class);
job.setJarByClass(Sum.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
TextInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
UnitSum.java 文件源码
项目:mapreduce-samples
阅读 25
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setFloat("beta", Float.parseFloat(args[3]));
Job job = Job.getInstance(conf);
job.setJarByClass(UnitSum.class);
ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);
job.setReducerClass(SumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);
FileOutputFormat.setOutputPath(job, new Path(args[2]));
job.waitForCompletion(true);
}
HadoopPlatform.java 文件源码
项目:aliyun-oss-hadoop-fs
阅读 23
收藏 0
点赞 0
评论 0
@Override
public void init() throws IOException {
registerKey(NullWritable.class.getName(), NullWritableSerializer.class);
registerKey(Text.class.getName(), TextSerializer.class);
registerKey(LongWritable.class.getName(), LongWritableSerializer.class);
registerKey(IntWritable.class.getName(), IntWritableSerializer.class);
registerKey(Writable.class.getName(), DefaultSerializer.class);
registerKey(BytesWritable.class.getName(), BytesWritableSerializer.class);
registerKey(BooleanWritable.class.getName(), BoolWritableSerializer.class);
registerKey(ByteWritable.class.getName(), ByteWritableSerializer.class);
registerKey(FloatWritable.class.getName(), FloatWritableSerializer.class);
registerKey(DoubleWritable.class.getName(), DoubleWritableSerializer.class);
registerKey(VIntWritable.class.getName(), VIntWritableSerializer.class);
registerKey(VLongWritable.class.getName(), VLongWritableSerializer.class);
LOG.info("Hadoop platform inited");
}
MatvecNaive.java 文件源码
项目:fst-bench
阅读 24
收藏 0
点赞 0
评论 0
protected JobConf configPass2 () throws Exception
{
final JobConf conf = new JobConf(getConf(), MatvecNaive.class);
conf.set("number_nodes", "" + number_nodes);
conf.setJobName("MatvecNaive_pass2");
conf.setMapperClass(MapPass2.class);
conf.setReducerClass(RedPass2.class);
FileInputFormat.setInputPaths(conf, tempmv_path);
FileOutputFormat.setOutputPath(conf, output_path);
conf.setNumReduceTasks( nreducer );
conf.setOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(DoubleWritable.class);
conf.setOutputValueClass(Text.class);
return conf;
}
OrderStatsApp.java 文件源码
项目:pro-phoenix
阅读 22
收藏 0
点赞 0
评论 0
@Override
public int run(String[] args) throws Exception {
try {
final Configuration configuration = HBaseConfiguration.create(getConf());
setConf(configuration);
final Job job = Job.getInstance(configuration, "phoenix-mr-order_stats-job");
final String selectQuery = "SELECT ORDER_ID, CUST_ID, AMOUNT FROM ORDERS ";
// set the input table and select query. you can also pass in the list of columns
PhoenixMapReduceUtil.setInput(job, OrderWritable.class, "ORDERS", selectQuery);
// set the output table name and the list of columns.
PhoenixMapReduceUtil.setOutput(job, "ORDER_STATS", "CUST_ID, AMOUNT");
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
job.setOutputFormatClass(PhoenixOutputFormat.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(OrderWritable.class);
TableMapReduceUtil.addDependencyJars(job);
job.waitForCompletion(true);
return 0;
} catch (Exception ex) {
LOG.error(String.format("An exception [%s] occurred while performing the job: ", ex.getMessage()));
return -1;
}
}
HdfsProducerTest.java 文件源码
项目:Camel
阅读 23
收藏 0
点赞 0
评论 0
@Test
public void testWriteDouble() throws Exception {
if (!canTest()) {
return;
}
Double aDouble = 12.34D;
template.sendBody("direct:write_double", aDouble);
Configuration conf = new Configuration();
Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file1));
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
reader.next(key, value);
Double rDouble = ((DoubleWritable) value).get();
assertEquals(rDouble, aDouble);
IOHelper.close(reader);
}
HdfsProducerTest.java 文件源码
项目:Camel
阅读 28
收藏 0
点赞 0
评论 0
@Test
public void testWriteDouble() throws Exception {
if (!canTest()) {
return;
}
Double aDouble = 12.34D;
template.sendBody("direct:write_double", aDouble);
Configuration conf = new Configuration();
Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
FileSystem fs1 = FileSystem.get(file1.toUri(), conf);
SequenceFile.Reader reader = new SequenceFile.Reader(fs1, file1, conf);
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
reader.next(key, value);
Double rDouble = ((DoubleWritable) value).get();
assertEquals(rDouble, aDouble);
IOHelper.close(reader);
}
Norm2Job.java 文件源码
项目:sPCA
阅读 24
收藏 0
点赞 0
评论 0
public void run(Configuration conf, Path matrixInputPath,
String meanSpanFileName, Path matrixOutputPath) throws IOException,
InterruptedException, ClassNotFoundException {
conf.set(MEANSPANOPTION, meanSpanFileName);
Job job = new Job(conf);
job.setJobName("Norm2Job");
job.setJarByClass(Norm2Job.class);
FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
matrixInputPath = fs.makeQualified(matrixInputPath);
matrixOutputPath = fs.makeQualified(matrixOutputPath);
FileInputFormat.addInputPath(job, matrixInputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, matrixOutputPath);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.submit();
job.waitForCompletion(true);
}
Norm2Job.java 文件源码
项目:sPCA
阅读 37
收藏 0
点赞 0
评论 0
public double loadResult(Path outputDirPath, Configuration conf) throws IOException {
Path finalNumberFile = new Path(outputDirPath, "part-r-00000");
SequenceFileIterator<NullWritable, DoubleWritable> iterator =
new SequenceFileIterator<NullWritable, DoubleWritable>(
finalNumberFile, true, conf);
double norm2;
try {
Pair<NullWritable, DoubleWritable> next = iterator.next();
norm2 = next.getSecond().get();
if (iterator.hasNext())
throw new IOException("More than one value after norm2Job!");
} finally {
Closeables.close(iterator, false);
}
return norm2;
}
ReconstructionErrJobTest.java 文件源码
项目:sPCA
阅读 19
收藏 0
点赞 0
评论 0
private void verifyReducerOutput(
DummyRecordWriter<IntWritable, DoubleWritable> writer) {
Assert.assertEquals("The reducer should output three key!", 3, writer
.getKeys().size());
for (IntWritable key : writer.getKeys()) {
List<DoubleWritable> list = writer.getValue(key);
assertEquals("reducer produces more than one values per key!", 1,
list.size());
Double value = list.get(0).get();
switch (key.get()) {
case 0:
assertEquals("the computed reconstructionError is incorrect!",
reconstructionError, value, EPSILON);
break;
case 1:
assertEquals("the computed yNorm is incorrect!", yNorm, value, EPSILON);
break;
case 2:
assertEquals("the computed centralizedYNorm is incorrect!",
centralizedYNorm, value, EPSILON);
break;
default:
fail("Unknown key in reading the results: " + key);
}
}
}
HadoopPipeline.java 文件源码
项目:titan0.5.4-hbase1.1.1-custom
阅读 22
收藏 0
点赞 0
评论 0
private Class<? extends WritableComparable> convertJavaToHadoop(final Class klass) {
if (klass.equals(String.class)) {
return Text.class;
} else if (klass.equals(Integer.class)) {
return IntWritable.class;
} else if (klass.equals(Double.class)) {
return DoubleWritable.class;
} else if (klass.equals(Long.class)) {
return LongWritable.class;
} else if (klass.equals(Float.class)) {
return FloatWritable.class;
} else if (klass.equals(Boolean.class)) {
return BooleanWritable.class;
} else {
throw new IllegalArgumentException("The provided class is not supported: " + klass.getSimpleName());
}
}
WritableHandler.java 文件源码
项目:titan0.5.4-hbase1.1.1-custom
阅读 21
收藏 0
点赞 0
评论 0
public WritableComparable set(final Long l) {
if (null == l) return NULL_LONG;
if (type.equals(LongWritable.class)) {
longWritable.set(l);
return longWritable;
} else if (type.equals(IntWritable.class)) {
intWritable.set(l.intValue());
return intWritable;
} else if (type.equals(DoubleWritable.class)) {
doubleWritable.set(l.doubleValue());
return doubleWritable;
} else if (type.equals(FloatWritable.class)) {
floatWritable.set(l.floatValue());
return floatWritable;
} else {
text.set(String.valueOf(l));
return text;
}
}
Search.java 文件源码
项目:search-1047
阅读 28
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
if(args.length != 3) {
System.err.println("Usage: Search <input> <output> <pattern>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = new Job(conf,"Search");
job.setJarByClass(Search.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
pattern = args[2];
job.setMapperClass(SearchMapper.class);
job.setReducerClass(SearchReducer.class);
job.setOutputKeyClass(DoubleWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
pageRankRead = new MapFileRead("hdfs://localhost/input/PageRankMap");
job.waitForCompletion(true);
}
UrlModulus.java 文件源码
项目:search-1047
阅读 29
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
if(args.length != 2) {
System.err.println("Usage: UrlModulus <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = new Job(conf, "UrlModulus");
job.setJarByClass(UrlModulus.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputFormatClass(MapFileOutputFormat.class);
job.setMapperClass(UrlModulusMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setReducerClass(UrlModulusReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
}
PageRank.java 文件源码
项目:search-1047
阅读 23
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: OutLinks <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(PageRank.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputFormatClass(MapFileOutputFormat.class);
job.setMapperClass(PageRankMapper.class);
job.setReducerClass(PageRankReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
PageRankRead = new MapFileRead("/input/PageRankMap");
//OutLinksRead = new MapFileRead("/input/OutLinksMap");
job.waitForCompletion(true);
}
WritableHandler.java 文件源码
项目:titan0.5.4-hbase1.1.1-custom
阅读 35
收藏 0
点赞 0
评论 0
public WritableComparable set(final Integer i) {
if (null == i) return NULL_INT;
if (type.equals(LongWritable.class)) {
longWritable.set(i.longValue());
return longWritable;
} else if (type.equals(IntWritable.class)) {
intWritable.set(i);
return intWritable;
} else if (type.equals(DoubleWritable.class)) {
doubleWritable.set(i.doubleValue());
return doubleWritable;
} else if (type.equals(FloatWritable.class)) {
floatWritable.set(i.floatValue());
return floatWritable;
} else {
text.set(String.valueOf(i));
return text;
}
}
AvroMixedMapReduce.java 文件源码
项目:hiped2
阅读 20
收藏 0
点赞 0
评论 0
public void reduce(Text key,
Iterator<DoubleWritable> values,
OutputCollector<AvroWrapper<StockAvg>,
NullWritable> output,
Reporter reporter) throws IOException {
Mean mean = new Mean();
while (values.hasNext()) {
mean.increment(values.next().get());
}
StockAvg avg = new StockAvg();
avg.setSymbol(key.toString());
avg.setAvg(mean.getResult());
output.collect(new AvroWrapper<StockAvg>(avg),
NullWritable.get());
}
WritableHandler.java 文件源码
项目:titan0.5.4-hbase1.1.1-custom
阅读 22
收藏 0
点赞 0
评论 0
public WritableComparable set(final String s) {
if (null == s) return NULL_TEXT;
if (type.equals(LongWritable.class)) {
longWritable.set(Long.valueOf(s));
return longWritable;
} else if (type.equals(IntWritable.class)) {
intWritable.set(Integer.valueOf(s));
return intWritable;
} else if (type.equals(DoubleWritable.class)) {
doubleWritable.set(Double.valueOf(s));
return doubleWritable;
} else if (type.equals(FloatWritable.class)) {
floatWritable.set(Float.valueOf(s));
return floatWritable;
} else {
text.set(s);
return text;
}
}
RedditAverage.java 文件源码
项目:Hanhan-Hadoop-MapReduce
阅读 28
收藏 0
点赞 0
评论 0
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
Job job = Job.getInstance(conf, "reddit average");
job.setJarByClass(RedditAverage.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(RedditMapper.class);
job.setCombinerClass(RedditCombiner.class);
job.setReducerClass(RedditReducer.class);
job.setMapOutputValueClass(LongPairWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job, new Path(args[0]));
TextInputFormat.addInputPath(job, new Path(args[1]));
TextOutputFormat.setOutputPath(job, new Path(args[2]));
return job.waitForCompletion(true) ? 0 : 1;
}
PageRank.java 文件源码
项目:comparative-study-of-frameworks-for-parallel-processing-of-graphs
阅读 28
收藏 0
点赞 0
评论 0
@Override
public void compute(Iterable<DoubleWritable> messages) throws IOException {
if (this.getSuperstepCount() == 0) {
this.setValue(new DoubleWritable(1.0 / this.getNumVertices()));
}
else {
double pageRankSum = 0;
for (DoubleWritable message : messages) {
pageRankSum += message.get();
}
double alpha = (1.0 - DAMPING_FACTOR) / this.getNumVertices();
setValue(new DoubleWritable(alpha + (pageRankSum * DAMPING_FACTOR)));
}
long edges = this.getEdges().size();
this.sendMessageToNeighbors(new DoubleWritable(this.getValue().get() / edges));
}
PageRank.java 文件源码
项目:comparative-study-of-frameworks-for-parallel-processing-of-graphs
阅读 24
收藏 0
点赞 0
评论 0
@Override
public boolean parseVertex(LongWritable key, Text value, Vertex<IntWritable, NullWritable, DoubleWritable> vertex) {
String[] split = value.toString().split("\t");
vertex.setVertexID(new IntWritable(Integer.parseInt((split[0]))));
String[] aux = split[1].split(" ");
for (String aux1 : aux) {
vertex.addEdge(
new Edge<IntWritable, NullWritable>(
new IntWritable(Integer.parseInt((aux1))),
null
)
);
}
return true;
}
DataToDoublesSketchUDAFTest.java 文件源码
项目:sketches-hive
阅读 19
收藏 0
点赞 0
评论 0
@Test
public void partial1ModeDefaultK() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false);
GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info);
ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
checkResultInspector(resultInspector);
DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
eval.iterate(state, new Object[] { new DoubleWritable(2.0) });
BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
DoublesSketch resultSketch = DoublesSketch.wrap(Memory.wrap(bytes.getBytes()));
Assert.assertEquals(resultSketch.getK(), 128);
Assert.assertEquals(resultSketch.getRetainedItems(), 2);
Assert.assertEquals(resultSketch.getMinValue(), 1.0);
Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
eval.close();
}
IndexSortComparable.java 文件源码
项目:systemml
阅读 21
收藏 0
点赞 0
评论 0
@Override
public int compareTo(Object o)
{
//compare only double value (e.g., for partitioner)
if( o instanceof DoubleWritable ) {
return _dval.compareTo((DoubleWritable) o);
}
//compare double value and index (e.g., for stable sort)
else if( o instanceof IndexSortComparable) {
IndexSortComparable that = (IndexSortComparable)o;
int tmp = _dval.compareTo(that._dval);
if( tmp==0 ) //secondary sort
tmp = _lval.compareTo(that._lval);
return tmp;
}
else {
throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
}
}
CVB0Driver.java 文件源码
项目:Chi-FRBCS-BigData-Max
阅读 20
收藏 0
点赞 0
评论 0
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "Calculating perplexity for " + modelPath;
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setJarByClass(CachingCVB0PerplexityMapper.class);
job.setMapperClass(CachingCVB0PerplexityMapper.class);
job.setCombinerClass(DualDoubleSumReducer.class);
job.setReducerClass(DualDoubleSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputKeyClass(DoubleWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.addInputPath(job, corpusPath);
Path outputPath = perplexityPath(modelPath.getParent(), iteration);
FileOutputFormat.setOutputPath(job, outputPath);
setModelPaths(job, modelPath);
HadoopUtil.delete(conf, outputPath);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
}
return readPerplexity(conf, modelPath.getParent(), iteration);
}
DataToDoublesSketchUDAFTest.java 文件源码
项目:sketches-hive
阅读 20
收藏 0
点赞 0
评论 0
@Test
public void completeModeDefaultK() throws Exception {
ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false);
GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info);
ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
checkResultInspector(resultInspector);
DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
eval.iterate(state, new Object[] { new DoubleWritable(2.0) });
BytesWritable bytes = (BytesWritable) eval.terminate(state);
DoublesSketch resultSketch = DoublesSketch.wrap(Memory.wrap(bytes.getBytes()));
Assert.assertEquals(resultSketch.getK(), 128);
Assert.assertEquals(resultSketch.getRetainedItems(), 2);
Assert.assertEquals(resultSketch.getMinValue(), 1.0);
Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
eval.close();
}
CVB0Driver.java 文件源码
项目:Chi-FRBCS-BigData-Ave
阅读 28
收藏 0
点赞 0
评论 0
/**
* @param topicModelStateTemp
* @param iteration
* @return {@code double[2]} where first value is perplexity and second is model weight of those
* documents sampled during perplexity computation, or {@code null} if no perplexity data
* exists for the given iteration.
* @throws IOException
*/
public static double readPerplexity(Configuration conf, Path topicModelStateTemp, int iteration)
throws IOException {
Path perplexityPath = perplexityPath(topicModelStateTemp, iteration);
FileSystem fs = FileSystem.get(perplexityPath.toUri(), conf);
if (!fs.exists(perplexityPath)) {
log.warn("Perplexity path {} does not exist, returning NaN", perplexityPath);
return Double.NaN;
}
double perplexity = 0;
double modelWeight = 0;
long n = 0;
for (Pair<DoubleWritable, DoubleWritable> pair : new SequenceFileDirIterable<DoubleWritable, DoubleWritable>(
perplexityPath, PathType.LIST, PathFilters.partFilter(), null, true, conf)) {
modelWeight += pair.getFirst().get();
perplexity += pair.getSecond().get();
n++;
}
log.info("Read {} entries with total perplexity {} and model weight {}", new Object[] { n,
perplexity, modelWeight });
return perplexity / modelWeight;
}