TestConcatenatedCompressedInput.java 文件源码-java代码片段

/**
   * Extended bzip2 test, similar to BuiltInGzipDecompressor test above.
   */
  @Test
  public void testMoreBzip2() throws IOException {
    JobConf jobConf = new JobConf(defaultConf);

    CompressionCodec bzip2 = new BZip2Codec();
    ReflectionUtils.setConf(bzip2, jobConf);
    localFs.delete(workDir, true);

    System.out.println(COLOR_BR_MAGENTA +
      "testMoreBzip2() using non-native CBZip2InputStream (presumably)" +
      COLOR_NORMAL);

    // copy single-member test file to HDFS
    String fn1 = "testConcatThenCompress.txt" + bzip2.getDefaultExtension();
    Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1);
    Path fnHDFS1  = new Path(workDir, fn1);
    localFs.copyFromLocalFile(fnLocal1, fnHDFS1);

    // copy multiple-member test file to HDFS
    String fn2 = "testCompressThenConcat.txt" + bzip2.getDefaultExtension();
    Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2);
    Path fnHDFS2  = new Path(workDir, fn2);
    localFs.copyFromLocalFile(fnLocal2, fnHDFS2);

    FileInputFormat.setInputPaths(jobConf, workDir);

    // here's first pair of BlockDecompressorStreams:
    final FileInputStream in1 = new FileInputStream(fnLocal1.toString());
    final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
    assertEquals("concat bytes available", 2567, in1.available());
    assertEquals("concat bytes available", 3056, in2.available());

/*
    // FIXME
    // The while-loop below dies at the beginning of the 2nd concatenated
    // member (after 17 lines successfully read) with:
    //
    //   java.io.IOException: bad block header
    //   at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
    //   CBZip2InputStream.java:527)
    //
    // It is not critical to concatenated-gzip support, HADOOP-6835, so it's
    // simply commented out for now (and HADOOP-6852 filed).  If and when the
    // latter issue is resolved--perhaps by fixing an error here--this code
    // should be reenabled.  Note that the doMultipleBzip2BufferSizes() test
    // below uses the same testCompressThenConcat.txt.bz2 file but works fine.

    CompressionInputStream cin2 = bzip2.createInputStream(in2);
    LineReader in = new LineReader(cin2);
    Text out = new Text();

    int numBytes, totalBytes=0, lineNum=0;
    while ((numBytes = in.readLine(out)) > 0) {
      ++lineNum;
      totalBytes += numBytes;
    }
    in.close();
    assertEquals("total uncompressed bytes in concatenated test file",
                 5346, totalBytes);
    assertEquals("total uncompressed lines in concatenated test file",
                 84, lineNum);
 */

    // test CBZip2InputStream with lots of different input-buffer sizes
    doMultipleBzip2BufferSizes(jobConf);
  }