java类org.apache.hadoop.io.compress.BZip2Codec的实例源码

OfficeFormatHadoopExcelTest.java 文件源码 项目:hadoopoffice 阅读 23 收藏 0 点赞 0 评论 0
@Test
   public void readExcelInputFormatBzip2CompressedExcel2013MultiSheetAll() throws IOException {
      JobConf job = new JobConf(defaultConf);
      CompressionCodec bzip2 = new BZip2Codec();
      ReflectionUtils.setConf(bzip2, job);
ClassLoader classLoader = getClass().getClassLoader();
    String fileName="excel2013testmultisheet.xlsx.bz2";
    String fileNameSpreadSheet=classLoader.getResource(fileName).getFile(); 
    Path file = new Path(fileNameSpreadSheet);
    FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    format.configure(job);
    InputSplit[] inputSplits = format.getSplits(job,1);
    assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned  null RecordReader");
Text spreadSheetKey = new Text();   
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1 (first sheet)"); 
assertEquals("[excel2013testmultisheet.xlsx.bz2]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx.bz2]Sheet1!A1\"");
assertEquals(4,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 4 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(),"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");  
assertEquals("A1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(),"Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");  
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\""); 
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\""); 
assertEquals("test4",((SpreadSheetCellDAO)spreadSheetValue.get()[3]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 4 == \"test4\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 2 (first sheet)");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 2 with 1 column");
assertEquals("4",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 2 with cell 1 == \"4\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 3 (first sheet)"); 
assertEquals(5,spreadSheetValue.get().length,"Input Split for Excel file contains row 3 with 5 columns");
assertEquals("31/12/99",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");   
assertEquals("5",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 2 == \"5\""); 
assertNull(spreadSheetValue.get()[2],"Input Split for Excel file contains row 3 with cell 3 == null");  
assertNull(spreadSheetValue.get()[3],"Input Split for Excel file contains row 3 with cell 4 == null");  
assertEquals("null",((SpreadSheetCellDAO)spreadSheetValue.get()[4]).getFormattedValue(),"Input Split for Excel file contains row 3 with cell 5 == \"null\"");       
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 4 (first sheet)");
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 4 with 1 column");
assertEquals("1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 4 with cell 1 == \"1\""); 
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 5 (first sheet)");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 5 with 3 columns");
assertEquals("2",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 1 == \"2\"");          
assertEquals("6",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 2== \"6\"");
assertEquals("10",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 5 with cell 3== \"10\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 6 (first sheet)"); 
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 6 with 3 columns");
assertEquals("3",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 1 == \"3\"");      
assertEquals("4",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 2== \"4\"");
assertEquals("15",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 6 with cell 3== \"15\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 7 (second sheet)");    
assertEquals("8",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 7 with cell 1 == \"8\""); 
assertEquals("99",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 7 with cell 2 == \"99\"");   
assertEquals(2,spreadSheetValue.get().length,"Input Split for Excel file contains row 7 with 2 columns");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 8 (second sheet)");    
assertEquals(1,spreadSheetValue.get().length,"Input Split for Excel file contains row 8 with 1 column");
assertEquals("test",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 8 with cell 1 == \"test\"");   
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 9 (second sheet)");    
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 9 with 3 columns");
assertNull(spreadSheetValue.get()[0],"Input Split for Excel file contains row 9 with cell 1 == null");  
assertNull(spreadSheetValue.get()[1],"Input Split for Excel file contains row 9 with cell 2 == null");  
assertEquals("seven",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 9 with cell 3 == \"seven\"");
   }
DataGenerator.java 文件源码 项目:pregelix 阅读 25 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws IOException {

        JobConf job = new JobConf(DataGenerator.class);
        FileSystem dfs = FileSystem.get(job);
        String maxFile = "/maxtemp";
        dfs.delete(new Path(maxFile), true);

        job.setJobName(DataGenerator.class.getSimpleName() + "max ID");
        job.setMapperClass(MapMaxId.class);
        job.setCombinerClass(CombineMaxId.class);
        job.setReducerClass(ReduceMaxId.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(VLongWritable.class);

        job.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(maxFile));
        job.setNumReduceTasks(1);
        JobClient.runJob(job);

        job = new JobConf(DataGenerator.class);
        job.set("hyracks.maxid.file", maxFile);
        job.setInt("hyracks.x", Integer.parseInt(args[2]));
        dfs.delete(new Path(args[1]), true);

        job.setJobName(DataGenerator.class.getSimpleName());
        job.setMapperClass(MapRecordGen.class);
        job.setReducerClass(ReduceRecordGen.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.setNumReduceTasks(Integer.parseInt(args[3]));

        if (args.length > 4) {
            if (args[4].startsWith("bzip"))
                FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
            if (args[4].startsWith("gz"))
                FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        }
        JobClient.runJob(job);
    }
TestCompression.java 文件源码 项目:zSqoop 阅读 21 收藏 0 点赞 0 评论 0
public void testBzip2TextCompression() throws IOException {
  runTextCompressionTest(new BZip2Codec(), 4);
}
TestCompression.java 文件源码 项目:zSqoop 阅读 21 收藏 0 点赞 0 评论 0
public void testBzip2SequenceFileCompression() throws Exception {
  runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
TestCompression.java 文件源码 项目:sqoop 阅读 20 收藏 0 点赞 0 评论 0
public void testBzip2TextCompression() throws IOException {
  runTextCompressionTest(new BZip2Codec(), 4);
}
TestCompression.java 文件源码 项目:sqoop 阅读 21 收藏 0 点赞 0 评论 0
public void testBzip2SequenceFileCompression() throws Exception {
  runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
ManyTxtToFewSeqJob.java 文件源码 项目:MRSmallFileCombiner 阅读 23 收藏 0 点赞 0 评论 0
public static void main(String[] args) throws Exception {
    if (args.length < 3) {
        System.out
                .println("ManyTxtToFewSeqJob <inputPath> <outputPath> <# mappers> <compressionCodec>");
        System.out.println();
        System.out
                .println("Example: ManyTxtToFewSeqJob ./input ./output 20 snappy");
        return;
    }

    // Get values from args
    String inputPath = args[0];
    String outputPath = args[1];
    String numberOfMappers = args[2];
    String compressionCodec = args[3];

    // Create job
    Job job = new Job();
    job.setJobName("ManyTxtToFewSeqJob");


    job.setJarByClass(ManyTxtToFewSeqJob.class);
    // Define input format and path
    job.setInputFormatClass(ConfigurableInputFormat.class);
    ConfigurableInputFormat.setInputPath(job, inputPath);
    ConfigurableInputFormat.setMapperNumber(job, Integer.parseInt(numberOfMappers));

    // Define output format and path
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    if (compressionCodec.toLowerCase().equals("gzip")) {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                GzipCodec.class);
    } else if (compressionCodec.toLowerCase().equals("bzip2")) {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                BZip2Codec.class);
    } else {
        SequenceFileOutputFormat.setOutputCompressorClass(job,
                SnappyCodec.class);
    }

    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Define the mapper and reducer
    job.setMapperClass(ConsalidatorMapper.class);
    // job.setReducerClass(Reducer.class);

    // Define the key and value format
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setNumReduceTasks(0);

    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(config);

    hdfs.delete(new Path(outputPath), true);

    // Exit
    job.waitForCompletion(true);
}
HadoopCompressionCodecs.java 文件源码 项目:eoulsan 阅读 22 收藏 0 点赞 0 评论 0
/**
 * Create a bzip2 input stream.
 * @param is input stream
 * @return an uncompressed input stream
 * @throws IOException if an error occurs while creating the input stream
 */
public static InputStream createBZip2InputStream(final InputStream is)
    throws IOException {

  return new BZip2Codec().createInputStream(is);
}
HadoopCompressionCodecs.java 文件源码 项目:eoulsan 阅读 21 收藏 0 点赞 0 评论 0
/**
 * Create a bzip2 output stream.
 * @param os the output stream to compress
 * @return a compressed output stream
 * @throws IOException if an error occurs while creating the output stream
 */
public static OutputStream createBZip2OutputStream(final OutputStream os)
    throws IOException {

  return new BZip2Codec().createOutputStream(os);
}


问题


面经


文章

微信
公众号

扫码关注公众号