java类org.apache.hadoop.io.BytesWritable的实例源码

BinaryProtocol.java 文件源码 项目:hadoop 阅读 19 收藏 0 点赞 0 评论 0
private void readObject(Writable obj) throws IOException {
  int numBytes = WritableUtils.readVInt(inStream);
  byte[] buffer;
  // For BytesWritable and Text, use the specified length to set the length
  // this causes the "obvious" translations to work. So that if you emit
  // a string "abc" from C++, it shows up as "abc".
  if (obj instanceof BytesWritable) {
    buffer = new byte[numBytes];
    inStream.readFully(buffer);
    ((BytesWritable) obj).set(buffer, 0, numBytes);
  } else if (obj instanceof Text) {
    buffer = new byte[numBytes];
    inStream.readFully(buffer);
    ((Text) obj).set(buffer);
  } else {
    obj.readFields(inStream);
  }
}
TFile.java 文件源码 项目:hadoop-oss 阅读 38 收藏 0 点赞 0 评论 0
/**
 * Copy the value into BytesWritable. The input BytesWritable will be
 * automatically resized to the actual value size. The implementation
 * directly uses the buffer inside BytesWritable for storing the value.
 * The call does not require the value length to be known.
 * 
 * @param value
 * @throws IOException
 */
public long getValue(BytesWritable value) throws IOException {
  DataInputStream dis = getValueStream();
  int size = 0;
  try {
    int remain;
    while ((remain = valueBufferInputStream.getRemain()) > 0) {
      value.setSize(size + remain);
      dis.readFully(value.getBytes(), size, remain);
      size += remain;
    }
    return value.getLength();
  } finally {
    dis.close();
  }
}
IntegrationTestLoadAndVerify.java 文件源码 项目:ditb 阅读 32 收藏 0 点赞 0 评论 0
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "verify-output");
  LOG.info("Verify output dir: " + outputDir);

  Job job = Job.getInstance(conf);
  job.setJarByClass(this.getClass());
  job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
  setJobScannerConf(job);

  Scan scan = new Scan();

  TableMapReduceUtil.initTableMapperJob(
      htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
      BytesWritable.class, BytesWritable.class, job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
  TableMapReduceUtil.setScannerCaching(job, scannerCaching);

  job.setReducerClass(VerifyReducer.class);
  job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
  FileOutputFormat.setOutputPath(job, outputDir);
  assertTrue(job.waitForCompletion(true));

  long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
  assertEquals(0, numOutputRecords);
}
TFile.java 文件源码 项目:hadoop 阅读 31 收藏 0 点赞 0 评论 0
/**
 * Copy the value into BytesWritable. The input BytesWritable will be
 * automatically resized to the actual value size. The implementation
 * directly uses the buffer inside BytesWritable for storing the value.
 * The call does not require the value length to be known.
 * 
 * @param value
 * @throws IOException
 */
public long getValue(BytesWritable value) throws IOException {
  DataInputStream dis = getValueStream();
  int size = 0;
  try {
    int remain;
    while ((remain = valueBufferInputStream.getRemain()) > 0) {
      value.setSize(size + remain);
      dis.readFully(value.getBytes(), size, remain);
      size += remain;
    }
    return value.getLength();
  } finally {
    dis.close();
  }
}
TestTFileSeqFileComparison.java 文件源码 项目:hadoop-oss 阅读 23 收藏 0 点赞 0 评论 0
public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize,
    String compress, int minBlkSize) throws IOException {
  Configuration conf = new Configuration();

  CompressionCodec codec = null;
  if ("lzo".equals(compress)) {
    codec = Compression.Algorithm.LZO.getCodec();
  }
  else if ("gz".equals(compress)) {
    codec = Compression.Algorithm.GZ.getCodec();
  }
  else if (!"none".equals(compress))
    throw new IOException("Codec not supported.");

  this.fsdos = fs.create(path, true, osBufferSize);

  if (!"none".equals(compress)) {
    writer =
        SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
            BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
  }
  else {
    writer =
        SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
            BytesWritable.class, SequenceFile.CompressionType.NONE, null);
  }
}
TestJoinTupleWritable.java 文件源码 项目:hadoop 阅读 33 收藏 0 点赞 0 评论 0
public void testNestedIterable() throws Exception {
  Random r = new Random();
  Writable[] writs = {
    new BooleanWritable(r.nextBoolean()),
    new FloatWritable(r.nextFloat()),
    new FloatWritable(r.nextFloat()),
    new IntWritable(r.nextInt()),
    new LongWritable(r.nextLong()),
    new BytesWritable("dingo".getBytes()),
    new LongWritable(r.nextLong()),
    new IntWritable(r.nextInt()),
    new BytesWritable("yak".getBytes()),
    new IntWritable(r.nextInt())
  };
  TupleWritable sTuple = makeTuple(writs);
  assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
GenericUDFSha2.java 文件源码 项目:hive-udf-backports 阅读 20 收藏 0 点赞 0 评论 0
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
  if (digest == null) {
    return null;
  }

  digest.reset();
  if (isStr) {
    Text n = getTextValue(arguments, 0, converters);
    if (n == null) {
      return null;
    }
    digest.update(n.getBytes(), 0, n.getLength());
  } else {
    BytesWritable bWr = getBinaryValue(arguments, 0, converters);
    if (bWr == null) {
      return null;
    }
    digest.update(bWr.getBytes(), 0, bWr.getLength());
  }
  byte[] resBin = digest.digest();
  String resStr = Hex.encodeHexString(resBin);

  output.set(resStr);
  return output;
}
SequenceFileAsBinaryInputFormat.java 文件源码 项目:hadoop 阅读 25 收藏 0 点赞 0 评论 0
/**
 * Read raw bytes from a SequenceFile.
 */
public synchronized boolean next(BytesWritable key, BytesWritable val)
    throws IOException {
  if (done) return false;
  long pos = in.getPosition();
  boolean eof = -1 == in.nextRawKey(buffer);
  if (!eof) {
    key.set(buffer.getData(), 0, buffer.getLength());
    buffer.reset();
    in.nextRawValue(vbytes);
    vbytes.writeUncompressedBytes(buffer);
    val.set(buffer.getData(), 0, buffer.getLength());
    buffer.reset();
  }
  return !(done = (eof || (pos >= end && in.syncSeen())));
}
IdentifierResolver.java 文件源码 项目:hadoop 阅读 24 收藏 0 点赞 0 评论 0
/**
 * Resolves a given identifier. This method has to be called before calling
 * any of the getters.
 */
public void resolve(String identifier) {
  if (identifier.equalsIgnoreCase(RAW_BYTES_ID)) {
    setInputWriterClass(RawBytesInputWriter.class);
    setOutputReaderClass(RawBytesOutputReader.class);
    setOutputKeyClass(BytesWritable.class);
    setOutputValueClass(BytesWritable.class);
  } else if (identifier.equalsIgnoreCase(TYPED_BYTES_ID)) {
    setInputWriterClass(TypedBytesInputWriter.class);
    setOutputReaderClass(TypedBytesOutputReader.class);
    setOutputKeyClass(TypedBytesWritable.class);
    setOutputValueClass(TypedBytesWritable.class);
  } else if (identifier.equalsIgnoreCase(KEY_ONLY_TEXT_ID)) {
    setInputWriterClass(KeyOnlyTextInputWriter.class);
    setOutputReaderClass(KeyOnlyTextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(NullWritable.class);
  } else { // assume TEXT_ID
    setInputWriterClass(TextInputWriter.class);
    setOutputReaderClass(TextOutputReader.class);
    setOutputKeyClass(Text.class);
    setOutputValueClass(Text.class);
  }
}
TestIPCServerResponder.java 文件源码 项目:hadoop 阅读 24 收藏 0 点赞 0 评论 0
@Override
public void run() {
  for (int i = 0; i < count; i++) {
    try {
      int byteSize = RANDOM.nextInt(BYTE_COUNT);
      byte[] bytes = new byte[byteSize];
      System.arraycopy(BYTES, 0, bytes, 0, byteSize);
      Writable param = new BytesWritable(bytes);
      client.call(param, address);
      Thread.sleep(RANDOM.nextInt(20));
    } catch (Exception e) {
      LOG.fatal("Caught Exception", e);
      failed = true;
    }
  }
}
LobFile.java 文件源码 项目:aliyun-maxcompute-data-collectors 阅读 19 收藏 0 点赞 0 评论 0
public void readFields(DataInput in) throws IOException {
  // After the RecordStartMark, we expect to get a SEGMENT_HEADER_ID (-1).
  long segmentId = WritableUtils.readVLong(in);
  if (SEGMENT_HEADER_ID != segmentId) {
    throw new IOException("Expected segment header id " + SEGMENT_HEADER_ID
        + "; got " + segmentId);
  }

  // Get the length of the rest of the segment, in bytes.
  long length = WritableUtils.readVLong(in);

  // Now read the actual main byte array.
  if (length > Integer.MAX_VALUE) {
    throw new IOException("Unexpected oversize data array length: "
        + length);
  } else if (length < 0) {
    throw new IOException("Unexpected undersize data array length: "
        + length);
  }
  byte [] segmentData = new byte[(int) length];
  in.readFully(segmentData);
  recordLenBytes = new BytesWritable(segmentData);

  reset(); // Reset the iterator allowing the user to yield offset/lengths.
}
SequenceFileInputFilter.java 文件源码 项目:hadoop 阅读 21 收藏 0 点赞 0 评论 0
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode / frequency * frequency == hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
BytesWritableCompactionReducer.java 文件源码 项目:dataSqueeze 阅读 25 收藏 0 点赞 0 评论 0
/**
 * {@inheritDoc}
 */
protected void reduce(final Text key, final Iterable<BytesWritable> values, final Context context) throws IOException, InterruptedException {
    final Configuration configuration = context.getConfiguration();
    final String sourcePath = configuration.get("compactionSourcePath");
    final String targetPath = configuration.get("compactionTargetPath");

    // Reducer stores data at the target directory retaining the directory structure of files
    String filePath = key.toString().replace(sourcePath, targetPath);
    if (key.toString().endsWith("/")) {
        filePath = filePath.concat("file");
    }

    log.info("Compaction output path {}", filePath);
    final URI uri = URI.create(filePath);
    final MultipleOutputs multipleOutputs = new MultipleOutputs<NullWritable, BytesWritable>(context);
    try {
        for (final BytesWritable text : values) {
            multipleOutputs.write(NullWritable.get(), text, uri.toString());
        }
    } finally {
        multipleOutputs.close();
    }
}
CommonStub.java 文件源码 项目:hadoop 阅读 24 收藏 0 点赞 0 评论 0
protected void readObject(Writable obj, DataInputStream inStream) throws IOException {
  int numBytes = WritableUtils.readVInt(inStream);
  byte[] buffer;
  // For BytesWritable and Text, use the specified length to set the length
  // this causes the "obvious" translations to work. So that if you emit
  // a string "abc" from C++, it shows up as "abc".
  if (obj instanceof BytesWritable) {
    buffer = new byte[numBytes];
    inStream.readFully(buffer);
    ((BytesWritable) obj).set(buffer, 0, numBytes);
  } else if (obj instanceof Text) {
    buffer = new byte[numBytes];
    inStream.readFully(buffer);
    ((Text) obj).set(buffer);
  } else {
    obj.readFields(inStream);
  }
}
UtilsForTests.java 文件源码 项目:hadoop 阅读 23 收藏 0 点赞 0 评论 0
/**
 * Configure a waiting job
 */
static void configureWaitingJobConf(JobConf jobConf, Path inDir,
                                    Path outputPath, int numMaps, int numRed,
                                    String jobName, String mapSignalFilename,
                                    String redSignalFilename)
throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(UtilsForTests.HalfWaitingMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setInputFormat(RandomInputFormat.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(numRed);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  jobConf.set(getTaskSignalParameter(true), mapSignalFilename);
  jobConf.set(getTaskSignalParameter(false), redSignalFilename);
}
TestCombineSequenceFileInputFormat.java 文件源码 项目:hadoop 阅读 20 收藏 0 点赞 0 评论 0
private static void createFiles(int length, int numFiles, Random random,
  Job job) throws IOException {
  Range[] ranges = createRanges(length, numFiles, random);

  for (int i = 0; i < numFiles; i++) {
    Path file = new Path(workDir, "test_" + i + ".seq");
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer =
      SequenceFile.createWriter(localFs, job.getConfiguration(), file,
                                IntWritable.class, BytesWritable.class);
    Range range = ranges[i];
    try {
      for (int j = range.start; j < range.end; j++) {
        IntWritable key = new IntWritable(j);
        byte[] data = new byte[random.nextInt(10)];
        random.nextBytes(data);
        BytesWritable value = new BytesWritable(data);
        writer.append(key, value);
      }
    } finally {
      writer.close();
    }
  }
}
KVGenerator.java 文件源码 项目:ditb 阅读 21 收藏 0 点赞 0 评论 0
private void fillKey(BytesWritable o) {
  int len = keyLenRNG.nextInt();
  if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
  o.setSize(len);
  int n = MIN_KEY_LEN;
  while (n < len) {
    byte[] word = dict[random.nextInt(dict.length)];
    int l = Math.min(word.length, len - n);
    System.arraycopy(word, 0, o.get(), n, l);
    n += l;
  }
  if (sorted
      && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey
          .getSize()
          - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) {
    incrementPrefix();
  }

  System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN);
  lastKey.set(o);
}
BaileyBorweinPlouffe.java 文件源码 项目:hadoop 阅读 26 收藏 0 点赞 0 评论 0
/** Compute the (offset+1)th to (offset+length)th digits. */
protected void map(LongWritable offset, IntWritable length,
    final Context context) throws IOException, InterruptedException {
  LOG.info("offset=" + offset + ", length=" + length);

  // compute digits
  final byte[] bytes = new byte[length.get() >> 1];
  long d = offset.get();
  for (int i = 0; i < bytes.length; d += 4) {
    final long digits = hexDigits(d);
    bytes[i++] = (byte) (digits >> 8);
    bytes[i++] = (byte) digits;
  }

  // output map results
  context.write(offset, new BytesWritable(bytes));
}
TestCombineSequenceFileInputFormat.java 文件源码 项目:hadoop 阅读 22 收藏 0 点赞 0 评论 0
private static void createFiles(int length, int numFiles, Random random)
  throws IOException {
  Range[] ranges = createRanges(length, numFiles, random);

  for (int i = 0; i < numFiles; i++) {
    Path file = new Path(workDir, "test_" + i + ".seq");
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer =
      SequenceFile.createWriter(localFs, conf, file,
                                IntWritable.class, BytesWritable.class);
    Range range = ranges[i];
    try {
      for (int j = range.start; j < range.end; j++) {
        IntWritable key = new IntWritable(j);
        byte[] data = new byte[random.nextInt(10)];
        random.nextBytes(data);
        BytesWritable value = new BytesWritable(data);
        writer.append(key, value);
      }
    } finally {
      writer.close();
    }
  }
}
TestMultipleLevelCaching.java 文件源码 项目:hadoop 阅读 29 收藏 0 点赞 0 评论 0
static RunningJob launchJob(JobConf jobConf, Path inDir, Path outputPath,
    int numMaps, String jobName) throws IOException {
  jobConf.setJobName(jobName);
  jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outputPath);
  jobConf.setMapperClass(IdentityMapper.class);
  jobConf.setReducerClass(IdentityReducer.class);
  jobConf.setOutputKeyClass(BytesWritable.class);
  jobConf.setOutputValueClass(BytesWritable.class);
  jobConf.setNumMapTasks(numMaps);
  jobConf.setNumReduceTasks(0);
  jobConf.setJar("build/test/mapred/testjar/testjob.jar");
  return JobClient.runJob(jobConf);
}
TestFixedLengthInputFormat.java 文件源码 项目:hadoop 阅读 23 收藏 0 点赞 0 评论 0
/**
 * Test with no record length set.
 */
@Test (timeout=5000)
public void testNoRecordLength() throws IOException {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  JobConf job = new JobConf(defaultConf);
  FileInputFormat.setInputPaths(job, workDir);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.configure(job);
  InputSplit splits[] = format.getSplits(job, 1);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      RecordReader<LongWritable, BytesWritable> reader = 
          format.getRecordReader(split, job, voidReporter);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for not setting record length:", exceptionThrown);
}
TestFixedLengthInputFormat.java 文件源码 项目:hadoop 阅读 27 收藏 0 点赞 0 评论 0
/**
 * Test with record length set to 0
 */
@Test (timeout=5000)
public void testZeroRecordLength() throws IOException {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  JobConf job = new JobConf(defaultConf);
  FileInputFormat.setInputPaths(job, workDir);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job, 0);
  format.configure(job);
  InputSplit splits[] = format.getSplits(job, 1);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      RecordReader<LongWritable, BytesWritable> reader = 
                           format.getRecordReader(split, job, voidReporter);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for zero record length:", exceptionThrown);
}
TestFixedLengthInputFormat.java 文件源码 项目:hadoop 阅读 22 收藏 0 点赞 0 评论 0
/**
 * Test with record length set to a negative value
 */
@Test (timeout=5000)
public void testNegativeRecordLength() throws IOException {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  JobConf job = new JobConf(defaultConf);
  FileInputFormat.setInputPaths(job, workDir);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job, -10);
  format.configure(job);
  InputSplit splits[] = format.getSplits(job, 1);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      RecordReader<LongWritable, BytesWritable> reader = 
          format.getRecordReader(split, job, voidReporter);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for negative record length:", exceptionThrown);
}
KeySampler.java 文件源码 项目:ditb 阅读 24 收藏 0 点赞 0 评论 0
public void next(BytesWritable key) {
  key.setSize(Math.max(MIN_KEY_LEN, keyLenRNG.nextInt()));
  random.nextBytes(key.get());
  int rnd = 0;
  if (max != min) {
    rnd = random.nextInt(max - min);
  }
  int n = rnd + min;
  byte[] b = key.get();
  b[0] = (byte) (n >> 24);
  b[1] = (byte) (n >> 16);
  b[2] = (byte) (n >> 8);
  b[3] = (byte) n;
}
BCFile.java 文件源码 项目:hadoop-oss 阅读 26 收藏 0 点赞 0 评论 0
/**
 * Constructor
 * 
 * @param fout
 *          FS output stream.
 * @param compressionName
 *          Name of the compression algorithm, which will be used for all
 *          data blocks.
 * @throws IOException
 * @see Compression#getSupportedAlgorithms
 */
public Writer(FSDataOutputStream fout, String compressionName,
    Configuration conf) throws IOException {
  if (fout.getPos() != 0) {
    throw new IOException("Output file not at zero offset.");
  }

  this.out = fout;
  this.conf = conf;
  dataIndex = new DataIndex(compressionName);
  metaIndex = new MetaIndex();
  fsOutputBuffer = new BytesWritable();
  Magic.write(fout);
}
BytesWritableCompactionMapper.java 文件源码 项目:dataSqueeze 阅读 23 收藏 0 点赞 0 评论 0
/**
 * {@inheritDoc}
 */
protected void map(final Object key, final BytesWritable value, final Context context) throws IOException, InterruptedException {
    if (value!= null && value.toString() != null && value.toString().isEmpty()) {
        return;
    }

    // Mapper sends data with parent directory path as keys to retain directory structure
    final FileSplit fileSplit = (FileSplit) context.getInputSplit();
    final Path filePath = fileSplit.getPath();
    final String parentFilePath = String.format("%s/", filePath.getParent().toString());
    log.debug("Parent file path {}", parentFilePath);

    if (!fileSizesMap.containsKey(filePath.toString())) {
        if (fileSystem == null){
            final URI uri = URI.create(filePath.toString());
            fileSystem = FileSystem.get(uri, configuration);
        }
        final FileStatus[] listStatuses = fileSystem.listStatus(filePath);
        for (FileStatus fileStatus : listStatuses) {
            if (!fileStatus.isDirectory()) {
                fileSizesMap.put(fileStatus.getPath().toString(), fileStatus.getLen());
                log.info("Entry added to fileSizes Map {} {}", fileStatus.getPath().toString(), fileStatus.getLen());
            }
        }
    }

    final Text parentFilePathKey = new Text(parentFilePath);
    final Text filePathKey = new Text(filePath.toString());
    final Long fileSize = fileSizesMap.get(filePath.toString());
    if (fileSize < threshold) {
        context.write(parentFilePathKey, value);
    } else {
        context.write(filePathKey, value);
    }
}
KVGenerator.java 文件源码 项目:hadoop-oss 阅读 20 收藏 0 点赞 0 评论 0
public KVGenerator(Random random, boolean sorted, DiscreteRNG keyLenRNG,
    DiscreteRNG valLenRNG, DiscreteRNG wordLenRNG, int dictSize) {
  this.random = random;
  dict = new byte[dictSize][];
  this.sorted = sorted;
  this.keyLenRNG = keyLenRNG;
  this.valLenRNG = valLenRNG;
  for (int i = 0; i < dictSize; ++i) {
    int wordLen = wordLenRNG.nextInt();
    dict[i] = new byte[wordLen];
    random.nextBytes(dict[i]);
  }
  lastKey = new BytesWritable();
  fillKey(lastKey);
}
KVGenerator.java 文件源码 项目:hadoop-oss 阅读 26 收藏 0 点赞 0 评论 0
private void fillValue(BytesWritable o) {
  int len = valLenRNG.nextInt();
  o.setSize(len);
  int n = 0;
  while (n < len) {
    byte[] word = dict[random.nextInt(dict.length)];
    int l = Math.min(word.length, len - n);
    System.arraycopy(word, 0, o.getBytes(), n, l);
    n += l;
  }
}
TestFixedLengthInputFormat.java 文件源码 项目:hadoop 阅读 24 收藏 0 点赞 0 评论 0
/**
 * Test with record length set to a negative value
 */
@Test (timeout=5000)
public void testNegativeRecordLength() throws Exception {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  Job job = Job.getInstance(defaultConf);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job.getConfiguration(), -10);
  FileInputFormat.setInputPaths(job, workDir);
  List<InputSplit> splits = format.getSplits(job);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      TaskAttemptContext context = MapReduceTestUtil.
          createDummyMapTaskAttemptContext(job.getConfiguration());
      RecordReader<LongWritable, BytesWritable> reader = 
          format.createRecordReader(split, context);
      MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
          mcontext =
          new MapContextImpl<LongWritable, BytesWritable, LongWritable,
          BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
          reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
      reader.initialize(split, mcontext);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for negative record length:", exceptionThrown);
}
KVGenerator.java 文件源码 项目:ditb 阅读 24 收藏 0 点赞 0 评论 0
public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
  if (dupKey) {
    key.set(lastKey);
  }
  else {
    fillKey(key);
  }
  fillValue(value);
}


问题


面经


文章

微信
公众号

扫码关注公众号