java类org.apache.hadoop.io.DoubleWritable的实例源码

Employee.java 文件源码 项目:DocIT 阅读 47 收藏 0 点赞 0 评论 0
/**
 * Read (say, deserialize) an employee
 */
@Override
public void readFields(DataInput in) throws IOException {
    name = new Text();
    name.readFields(in);
    address = new Text();
    address.readFields(in);
    company = new Text();
    company.readFields(in);
    salary = new DoubleWritable();
    salary.readFields(in);
    department = new Text();
    department.readFields(in);
    isManager = new BooleanWritable();
    isManager.readFields(in);
}
TF.java 文件源码 项目:Wikipedia-Index 阅读 27 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<page>");
    conf.set("xmlinput.end", "</page>");

    Job job =Job.getInstance(conf);
    job.setJobName("TermFrequencyCount");
    job.setJarByClass(TF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TFMap.class);
    job.setReducerClass(TFReduce.class);

    job.setInputFormatClass(XmlInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
TF_IDF.java 文件源码 项目:Wikipedia-Index 阅读 25 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job =Job.getInstance(conf);
    job.setJobName("TF-IDFCount");
    job.setJarByClass(TF_IDF.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(TF_IDFMap.class);
    job.setReducerClass(TF_IDFReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    boolean wait = job.waitForCompletion(true);
    System.exit(wait ? 0 : 1);
}
Multiplication.java 文件源码 项目:mapreduce-samples 阅读 21 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf);
    job.setJarByClass(Multiplication.class);

    ChainMapper.addMapper(job, CooccurrenceMapper.class, LongWritable.class, Text.class, Text.class, Text.class, conf);
    ChainMapper.addMapper(job, RatingMapper.class, Text.class, Text.class, Text.class, Text.class, conf);

    job.setMapperClass(CooccurrenceMapper.class);
    job.setMapperClass(RatingMapper.class);

    job.setReducerClass(MultiplicationReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, CooccurrenceMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, RatingMapper.class);

    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    job.waitForCompletion(true);
}
Sum.java 文件源码 项目:mapreduce-samples 阅读 22 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setMapperClass(SumMapper.class);
        job.setReducerClass(SumReducer.class);

        job.setJarByClass(Sum.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        TextOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
UnitSum.java 文件源码 项目:mapreduce-samples 阅读 25 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitSum.class);

        ChainMapper.addMapper(job, PassMapper.class, Object.class, Text.class, Text.class, DoubleWritable.class, conf);
        ChainMapper.addMapper(job, BetaMapper.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, conf);

        job.setReducerClass(SumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PassMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, BetaMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
HadoopPlatform.java 文件源码 项目:aliyun-oss-hadoop-fs 阅读 23 收藏 0 点赞 0 评论 0
@Override
public void init() throws IOException {
  registerKey(NullWritable.class.getName(), NullWritableSerializer.class);
  registerKey(Text.class.getName(), TextSerializer.class);
  registerKey(LongWritable.class.getName(), LongWritableSerializer.class);
  registerKey(IntWritable.class.getName(), IntWritableSerializer.class);
  registerKey(Writable.class.getName(), DefaultSerializer.class);
  registerKey(BytesWritable.class.getName(), BytesWritableSerializer.class);
  registerKey(BooleanWritable.class.getName(), BoolWritableSerializer.class);
  registerKey(ByteWritable.class.getName(), ByteWritableSerializer.class);
  registerKey(FloatWritable.class.getName(), FloatWritableSerializer.class);
  registerKey(DoubleWritable.class.getName(), DoubleWritableSerializer.class);
  registerKey(VIntWritable.class.getName(), VIntWritableSerializer.class);
  registerKey(VLongWritable.class.getName(), VLongWritableSerializer.class);

  LOG.info("Hadoop platform inited");
}
MatvecNaive.java 文件源码 项目:fst-bench 阅读 24 收藏 0 点赞 0 评论 0
protected JobConf configPass2 () throws Exception
  {
final JobConf conf = new JobConf(getConf(), MatvecNaive.class);
conf.set("number_nodes", "" + number_nodes);

conf.setJobName("MatvecNaive_pass2");

conf.setMapperClass(MapPass2.class);        
conf.setReducerClass(RedPass2.class);

FileInputFormat.setInputPaths(conf, tempmv_path);  
FileOutputFormat.setOutputPath(conf, output_path);  

conf.setNumReduceTasks( nreducer );

conf.setOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(DoubleWritable.class);
conf.setOutputValueClass(Text.class);

return conf;
  }
OrderStatsApp.java 文件源码 项目:pro-phoenix 阅读 22 收藏 0 点赞 0 评论 0
@Override
public int run(String[] args) throws Exception {
    try {
        final Configuration configuration = HBaseConfiguration.create(getConf());
        setConf(configuration);
        final Job job = Job.getInstance(configuration, "phoenix-mr-order_stats-job");
        final String selectQuery = "SELECT ORDER_ID, CUST_ID, AMOUNT FROM ORDERS ";
        // set the input table and select query. you can also pass in the list of columns
        PhoenixMapReduceUtil.setInput(job, OrderWritable.class, "ORDERS", selectQuery);
        // set the output table name and the list of columns.
        PhoenixMapReduceUtil.setOutput(job, "ORDER_STATS", "CUST_ID, AMOUNT");
        job.setMapperClass(OrderMapper.class);
        job.setReducerClass(OrderReducer.class);
        job.setOutputFormatClass(PhoenixOutputFormat.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(DoubleWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(OrderWritable.class);
        TableMapReduceUtil.addDependencyJars(job);
        job.waitForCompletion(true);
        return 0;
    } catch (Exception ex) {
        LOG.error(String.format("An exception [%s] occurred while performing the job: ", ex.getMessage()));
        return -1;
    }
}
HdfsProducerTest.java 文件源码 项目:Camel 阅读 23 收藏 0 点赞 0 评论 0
@Test
public void testWriteDouble() throws Exception {
    if (!canTest()) {
        return;
    }
    Double aDouble = 12.34D;
    template.sendBody("direct:write_double", aDouble);

    Configuration conf = new Configuration();
    Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
    SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file1));
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    reader.next(key, value);
    Double rDouble = ((DoubleWritable) value).get();
    assertEquals(rDouble, aDouble);

    IOHelper.close(reader);
}
HdfsProducerTest.java 文件源码 项目:Camel 阅读 28 收藏 0 点赞 0 评论 0
@Test
public void testWriteDouble() throws Exception {
    if (!canTest()) {
        return;
    }
    Double aDouble = 12.34D;
    template.sendBody("direct:write_double", aDouble);

    Configuration conf = new Configuration();
    Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel-double");
    FileSystem fs1 = FileSystem.get(file1.toUri(), conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs1, file1, conf);
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    reader.next(key, value);
    Double rDouble = ((DoubleWritable) value).get();
    assertEquals(rDouble, aDouble);

    IOHelper.close(reader);
}
Norm2Job.java 文件源码 项目:sPCA 阅读 24 收藏 0 点赞 0 评论 0
public void run(Configuration conf, Path matrixInputPath,
    String meanSpanFileName, Path matrixOutputPath) throws IOException,
    InterruptedException, ClassNotFoundException {
  conf.set(MEANSPANOPTION, meanSpanFileName);
  Job job = new Job(conf);
  job.setJobName("Norm2Job");
  job.setJarByClass(Norm2Job.class);
  FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
  matrixInputPath = fs.makeQualified(matrixInputPath);
  matrixOutputPath = fs.makeQualified(matrixOutputPath);
  FileInputFormat.addInputPath(job, matrixInputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileOutputFormat.setOutputPath(job, matrixOutputPath);
  job.setMapperClass(MyMapper.class);
  job.setReducerClass(MyReducer.class);
  job.setNumReduceTasks(1);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.submit();
  job.waitForCompletion(true);
}
Norm2Job.java 文件源码 项目:sPCA 阅读 37 收藏 0 点赞 0 评论 0
public double loadResult(Path outputDirPath, Configuration conf) throws IOException {
  Path finalNumberFile = new Path(outputDirPath, "part-r-00000");
  SequenceFileIterator<NullWritable, DoubleWritable> iterator = 
      new SequenceFileIterator<NullWritable, DoubleWritable>(
      finalNumberFile, true, conf);
  double norm2;
  try {
    Pair<NullWritable, DoubleWritable> next = iterator.next();
    norm2 = next.getSecond().get();
    if (iterator.hasNext())
      throw new IOException("More than one value after norm2Job!");
  } finally {
    Closeables.close(iterator, false);
  }
  return norm2;
}
ReconstructionErrJobTest.java 文件源码 项目:sPCA 阅读 19 收藏 0 点赞 0 评论 0
private void verifyReducerOutput(
    DummyRecordWriter<IntWritable, DoubleWritable> writer) {
  Assert.assertEquals("The reducer should output three key!", 3, writer
      .getKeys().size());
  for (IntWritable key : writer.getKeys()) {
    List<DoubleWritable> list = writer.getValue(key);
    assertEquals("reducer produces more than one values per key!", 1,
        list.size());
    Double value = list.get(0).get();
    switch (key.get()) {
    case 0:
      assertEquals("the computed reconstructionError is incorrect!",
          reconstructionError, value, EPSILON);
      break;
    case 1:
      assertEquals("the computed yNorm is incorrect!", yNorm, value, EPSILON);
      break;
    case 2:
      assertEquals("the computed centralizedYNorm is incorrect!",
          centralizedYNorm, value, EPSILON);
      break;
    default:
      fail("Unknown key in reading the results: " + key);
    }
  }
}
HadoopPipeline.java 文件源码 项目:titan0.5.4-hbase1.1.1-custom 阅读 22 收藏 0 点赞 0 评论 0
private Class<? extends WritableComparable> convertJavaToHadoop(final Class klass) {
    if (klass.equals(String.class)) {
        return Text.class;
    } else if (klass.equals(Integer.class)) {
        return IntWritable.class;
    } else if (klass.equals(Double.class)) {
        return DoubleWritable.class;
    } else if (klass.equals(Long.class)) {
        return LongWritable.class;
    } else if (klass.equals(Float.class)) {
        return FloatWritable.class;
    } else if (klass.equals(Boolean.class)) {
        return BooleanWritable.class;
    } else {
        throw new IllegalArgumentException("The provided class is not supported: " + klass.getSimpleName());
    }
}
WritableHandler.java 文件源码 项目:titan0.5.4-hbase1.1.1-custom 阅读 21 收藏 0 点赞 0 评论 0
public WritableComparable set(final Long l) {
    if (null == l) return NULL_LONG;

    if (type.equals(LongWritable.class)) {
        longWritable.set(l);
        return longWritable;
    } else if (type.equals(IntWritable.class)) {
        intWritable.set(l.intValue());
        return intWritable;
    } else if (type.equals(DoubleWritable.class)) {
        doubleWritable.set(l.doubleValue());
        return doubleWritable;
    } else if (type.equals(FloatWritable.class)) {
        floatWritable.set(l.floatValue());
        return floatWritable;
    } else {
        text.set(String.valueOf(l));
        return text;
    }
}
Search.java 文件源码 项目:search-1047 阅读 28 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
  if(args.length != 3) {
    System.err.println("Usage: Search <input> <output> <pattern>");
    System.exit(-1);
  }


  Configuration conf = new Configuration();
  Job job = new Job(conf,"Search");
  job.setJarByClass(Search.class);

  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  pattern = args[2];

  job.setMapperClass(SearchMapper.class);
  job.setReducerClass(SearchReducer.class);
  job.setOutputKeyClass(DoubleWritable.class);
  job.setOutputValueClass(Text.class);

  job.setNumReduceTasks(1);

  pageRankRead = new MapFileRead("hdfs://localhost/input/PageRankMap");
  job.waitForCompletion(true);
}
UrlModulus.java 文件源码 项目:search-1047 阅读 29 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
  if(args.length != 2) {
    System.err.println("Usage: UrlModulus <input path> <output path>");
    System.exit(-1);
  }

  Configuration conf = new Configuration();
  Job job = new Job(conf, "UrlModulus");
  job.setJarByClass(UrlModulus.class);

  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setOutputFormatClass(MapFileOutputFormat.class);

  job.setMapperClass(UrlModulusMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setReducerClass(UrlModulusReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);

  job.setNumReduceTasks(1);
  job.waitForCompletion(true);
}
PageRank.java 文件源码 项目:search-1047 阅读 23 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: OutLinks <input path> <output path>");
    System.exit(-1);
  }

  Job job = new Job();
  job.setJarByClass(PageRank.class);

  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));

  job.setOutputFormatClass(MapFileOutputFormat.class);
  job.setMapperClass(PageRankMapper.class);
  job.setReducerClass(PageRankReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(DoubleWritable.class);

  PageRankRead = new MapFileRead("/input/PageRankMap");
  //OutLinksRead = new MapFileRead("/input/OutLinksMap");

  job.waitForCompletion(true);
}
WritableHandler.java 文件源码 项目:titan0.5.4-hbase1.1.1-custom 阅读 35 收藏 0 点赞 0 评论 0
public WritableComparable set(final Integer i) {
    if (null == i) return NULL_INT;

    if (type.equals(LongWritable.class)) {
        longWritable.set(i.longValue());
        return longWritable;
    } else if (type.equals(IntWritable.class)) {
        intWritable.set(i);
        return intWritable;
    } else if (type.equals(DoubleWritable.class)) {
        doubleWritable.set(i.doubleValue());
        return doubleWritable;
    } else if (type.equals(FloatWritable.class)) {
        floatWritable.set(i.floatValue());
        return floatWritable;
    } else {
        text.set(String.valueOf(i));
        return text;
    }
}
AvroMixedMapReduce.java 文件源码 项目:hiped2 阅读 20 收藏 0 点赞 0 评论 0
public void reduce(Text key,
                   Iterator<DoubleWritable> values,
                   OutputCollector<AvroWrapper<StockAvg>,
                       NullWritable> output,
                   Reporter reporter) throws IOException {

  Mean mean = new Mean();
  while (values.hasNext()) {
    mean.increment(values.next().get());
  }
  StockAvg avg = new StockAvg();
  avg.setSymbol(key.toString());
  avg.setAvg(mean.getResult());
  output.collect(new AvroWrapper<StockAvg>(avg),
      NullWritable.get());
}
WritableHandler.java 文件源码 项目:titan0.5.4-hbase1.1.1-custom 阅读 22 收藏 0 点赞 0 评论 0
public WritableComparable set(final String s) {
    if (null == s) return NULL_TEXT;

    if (type.equals(LongWritable.class)) {
        longWritable.set(Long.valueOf(s));
        return longWritable;
    } else if (type.equals(IntWritable.class)) {
        intWritable.set(Integer.valueOf(s));
        return intWritable;
    } else if (type.equals(DoubleWritable.class)) {
        doubleWritable.set(Double.valueOf(s));
        return doubleWritable;
    } else if (type.equals(FloatWritable.class)) {
        floatWritable.set(Float.valueOf(s));
        return floatWritable;
    } else {
        text.set(s);
        return text;
    }
}
RedditAverage.java 文件源码 项目:Hanhan-Hadoop-MapReduce 阅读 28 收藏 0 点赞 0 评论 0
@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "reddit average");
    job.setJarByClass(RedditAverage.class);

    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(RedditMapper.class);
    job.setCombinerClass(RedditCombiner.class);
    job.setReducerClass(RedditReducer.class);

    job.setMapOutputValueClass(LongPairWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));
    TextInputFormat.addInputPath(job, new Path(args[1]));
    TextOutputFormat.setOutputPath(job, new Path(args[2]));

    return job.waitForCompletion(true) ? 0 : 1;
}
PageRank.java 文件源码 项目:comparative-study-of-frameworks-for-parallel-processing-of-graphs 阅读 28 收藏 0 点赞 0 评论 0
@Override
public void compute(Iterable<DoubleWritable> messages) throws IOException {
    if (this.getSuperstepCount() == 0) {
        this.setValue(new DoubleWritable(1.0 / this.getNumVertices()));
    }
    else {
        double pageRankSum = 0;

        for (DoubleWritable message : messages) {
            pageRankSum += message.get();
        }

        double alpha = (1.0 - DAMPING_FACTOR) / this.getNumVertices();
        setValue(new DoubleWritable(alpha + (pageRankSum * DAMPING_FACTOR)));
    }

    long edges = this.getEdges().size();
    this.sendMessageToNeighbors(new DoubleWritable(this.getValue().get() / edges));
}
PageRank.java 文件源码 项目:comparative-study-of-frameworks-for-parallel-processing-of-graphs 阅读 24 收藏 0 点赞 0 评论 0
@Override
public boolean parseVertex(LongWritable key, Text value, Vertex<IntWritable, NullWritable, DoubleWritable> vertex) {
    String[] split = value.toString().split("\t");

    vertex.setVertexID(new IntWritable(Integer.parseInt((split[0]))));

    String[] aux = split[1].split(" ");

    for (String aux1 : aux) {
        vertex.addEdge(
            new Edge<IntWritable, NullWritable>(
                new IntWritable(Integer.parseInt((aux1))),
                null
            )
        );
    }
    return true;
}
DataToDoublesSketchUDAFTest.java 文件源码 项目:sketches-hive 阅读 19 收藏 0 点赞 0 评论 0
@Test
public void partial1ModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false);
  GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info);
  ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
  checkResultInspector(resultInspector);

  DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
  eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
  eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

  BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
  DoublesSketch resultSketch = DoublesSketch.wrap(Memory.wrap(bytes.getBytes()));
  Assert.assertEquals(resultSketch.getK(), 128);
  Assert.assertEquals(resultSketch.getRetainedItems(), 2);
  Assert.assertEquals(resultSketch.getMinValue(), 1.0);
  Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  eval.close();
}
IndexSortComparable.java 文件源码 项目:systemml 阅读 21 收藏 0 点赞 0 评论 0
@Override
public int compareTo(Object o) 
{
    //compare only double value (e.g., for partitioner)
    if( o instanceof DoubleWritable ) {
        return _dval.compareTo((DoubleWritable) o);
    }
    //compare double value and index (e.g., for stable sort)
    else if( o instanceof IndexSortComparable) {
        IndexSortComparable that = (IndexSortComparable)o;
        int tmp = _dval.compareTo(that._dval);
        if( tmp==0 ) //secondary sort
            tmp = _lval.compareTo(that._lval);
        return tmp;
    }   
    else {
        throw new RuntimeException("Unsupported comparison involving class: "+o.getClass().getName());
    }
}
CVB0Driver.java 文件源码 项目:Chi-FRBCS-BigData-Max 阅读 20 收藏 0 点赞 0 评论 0
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
  throws IOException, ClassNotFoundException, InterruptedException {
  String jobName = "Calculating perplexity for " + modelPath;
  log.info("About to run: " + jobName);
  Job job = new Job(conf, jobName);
  job.setJarByClass(CachingCVB0PerplexityMapper.class);
  job.setMapperClass(CachingCVB0PerplexityMapper.class);
  job.setCombinerClass(DualDoubleSumReducer.class);
  job.setReducerClass(DualDoubleSumReducer.class);
  job.setNumReduceTasks(1);
  job.setOutputKeyClass(DoubleWritable.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  FileInputFormat.addInputPath(job, corpusPath);
  Path outputPath = perplexityPath(modelPath.getParent(), iteration);
  FileOutputFormat.setOutputPath(job, outputPath);
  setModelPaths(job, modelPath);
  HadoopUtil.delete(conf, outputPath);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
  }
  return readPerplexity(conf, modelPath.getParent(), iteration);
}
DataToDoublesSketchUDAFTest.java 文件源码 项目:sketches-hive 阅读 20 收藏 0 点赞 0 评论 0
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false);
  GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info);
  ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
  checkResultInspector(resultInspector);

  DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
  eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
  eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

  BytesWritable bytes = (BytesWritable) eval.terminate(state);
  DoublesSketch resultSketch = DoublesSketch.wrap(Memory.wrap(bytes.getBytes()));
  Assert.assertEquals(resultSketch.getK(), 128);
  Assert.assertEquals(resultSketch.getRetainedItems(), 2);
  Assert.assertEquals(resultSketch.getMinValue(), 1.0);
  Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  eval.close();
}
CVB0Driver.java 文件源码 项目:Chi-FRBCS-BigData-Ave 阅读 28 收藏 0 点赞 0 评论 0
/**
 * @param topicModelStateTemp
 * @param iteration
 * @return {@code double[2]} where first value is perplexity and second is model weight of those
 *         documents sampled during perplexity computation, or {@code null} if no perplexity data
 *         exists for the given iteration.
 * @throws IOException
 */
public static double readPerplexity(Configuration conf, Path topicModelStateTemp, int iteration)
  throws IOException {
  Path perplexityPath = perplexityPath(topicModelStateTemp, iteration);
  FileSystem fs = FileSystem.get(perplexityPath.toUri(), conf);
  if (!fs.exists(perplexityPath)) {
    log.warn("Perplexity path {} does not exist, returning NaN", perplexityPath);
    return Double.NaN;
  }
  double perplexity = 0;
  double modelWeight = 0;
  long n = 0;
  for (Pair<DoubleWritable, DoubleWritable> pair : new SequenceFileDirIterable<DoubleWritable, DoubleWritable>(
      perplexityPath, PathType.LIST, PathFilters.partFilter(), null, true, conf)) {
    modelWeight += pair.getFirst().get();
    perplexity += pair.getSecond().get();
    n++;
  }
  log.info("Read {} entries with total perplexity {} and model weight {}", new Object[] { n,
          perplexity, modelWeight });
  return perplexity / modelWeight;
}


问题


面经


文章

微信
公众号

扫码关注公众号