@Test
public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException {
JobConf job = new JobConf(defaultConf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, job);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
BytesWritable block = new BytesWritable();
assertTrue( reader.next(key,block),"Input Split for block version contains at least one block");
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
BytesWritable emptyKey = new BytesWritable();
BytesWritable emptyBlock = new BytesWritable();
assertFalse( reader.next(emptyKey,emptyBlock),"No further blocks in compressed block");
reader.close();
}
java类org.apache.hadoop.io.compress.BZip2Codec的实例源码
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 25
收藏 0
点赞 0
评论 0
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 22
收藏 0
点赞 0
评论 0
@Test
public void readBitcoinTransactionInputFormatBzip2Compressed() throws IOException {
JobConf job = new JobConf(defaultConf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, job);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
RecordReader<BytesWritable, BitcoinTransaction> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
BitcoinTransaction transaction = new BitcoinTransaction();
int transactCount=0;
while (reader.next(key,transaction)) {
transactCount++;
}
assertEquals( 936, transactCount,"Compressed block must have at least 936 transactions");
reader.close();
}
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 20
收藏 0
点赞 0
评论 0
@Test
public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
Job job = Job.getInstance(conf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, conf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
List<InputSplit> splits = format.getSplits(job);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
assertEquals( 1, splits.size(),"Only one split generated for compressed block");
RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 998039, block.getLength(),"Compressed block must have a size of 998.039 bytes");
assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
}
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 20
收藏 0
点赞 0
评论 0
@Test
public void readBitcoinTransactionInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
Job job = Job.getInstance(conf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, conf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinTransactionFileInputFormat format = new BitcoinTransactionFileInputFormat();
List<InputSplit> splits = format.getSplits(job);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
assertEquals( 1, splits.size(),"Only one split generated for compressed block");
RecordReader<BytesWritable, BitcoinTransaction> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned null RecordReader");
reader.initialize(splits.get(0),context);
int transactCount=0;
while (reader.nextKeyValue()) {
transactCount++;
}
assertEquals( 936, transactCount,"Compressed block must have at least 936 transactions");
reader.close();
}
DataBalancer.java 文件源码
项目:pregelix
阅读 21
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws IOException {
JobConf job = new JobConf(DataBalancer.class);
job.setJobName(DataBalancer.class.getSimpleName());
job.setMapperClass(MapRecordOnly.class);
job.setReducerClass(ReduceRecordOnly.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setNumReduceTasks(Integer.parseInt(args[2]));
if (args.length > 3) {
if (args[3].startsWith("bzip"))
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
if (args[3].startsWith("gz"))
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
JobClient.runJob(job);
}
SortDataPreprocessor.java 文件源码
项目:hadoop-in-action
阅读 22
收藏 0
点赞 0
评论 0
@Override
public int run(String[] args) throws Exception {
Job job = JobBuilder.parseInputAndOutput(this, super.getConf(), args);
if (job == null) {
return -1;
}
job.setMapperClass(CleanerMapper.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
SequenceFileOutputFormat.setOutputCompressionType(job,
CompressionType.BLOCK);
return job.waitForCompletion(true) ? 0 : 1;
}
SortByTemperatureToMapFile.java 文件源码
项目:hadoop-in-action
阅读 16
收藏 0
点赞 0
评论 0
@Override
public int run(String[] args) throws Exception {
Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
if (job == null) {
return -1;
}
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputFormatClass(MapFileOutputFormat.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
return job.waitForCompletion(true) ? 0 : 1;
}
TestLineRecordReader.java 文件源码
项目:hadoop
阅读 23
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:hadoop
阅读 28
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:aliyun-oss-hadoop-fs
阅读 27
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:aliyun-oss-hadoop-fs
阅读 21
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:big-c
阅读 23
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:big-c
阅读 26
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 18
收藏 0
点赞 0
评论 0
@Test
public void readBitcoinBlockInputFormatBzip2Compressed() throws IOException {
JobConf job = new JobConf(defaultConf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, job);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length,"Only one split generated for compressed block");
RecordReader<BytesWritable, BitcoinBlock> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull( reader,"Format returned null RecordReader");
BytesWritable key = new BytesWritable();
BitcoinBlock block = new BitcoinBlock();
assertTrue( reader.next(key,block),"Input Split for block version contains at least one block");
assertEquals( 936, block.getTransactions().size(),"Compressed block must have at least 936 transactions");
assertEquals( 4, block.getTransactions().get(0).getListOfInputs().get(0).getTxInScript().length,"Compressed block must contain exactly 936 transactions of which the first has one input and script length 4");
assertEquals( 2, block.getTransactions().get(0).getListOfOutputs().size(),"Compressed block must contain exactly 936 transactions of which the first has two outputs");
assertEquals( 25, block.getTransactions().get(0).getListOfOutputs().get(0).getTxOutScript().length,"Compressed block must contain exactly 936 transactions of which the first has two output and the first output script length 25");
BytesWritable emptyKey = new BytesWritable();
BitcoinBlock emptyBlock = new BitcoinBlock();
assertFalse( reader.next(emptyKey,emptyBlock),"No further blocks in compressed block");
reader.close();
}
BitcoinFormatHadoopTest.java 文件源码
项目:hadoopcryptoledger
阅读 20
收藏 0
点赞 0
评论 0
@Test
public void readBitcoinBlockInputFormatBzip2Compressed() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
Job job = Job.getInstance(conf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, conf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version4comp.blk.bz2";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
FileInputFormat.setInputPaths(job, file);
BitcoinBlockFileInputFormat format = new BitcoinBlockFileInputFormat();
List<InputSplit> splits = format.getSplits(job);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
assertEquals( 1, splits.size(),"Only one split generated for compressed block");
RecordReader<BytesWritable, BitcoinBlock> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();
BitcoinBlock block = new BitcoinBlock();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 936, block.getTransactions().size(),"Compressed block must have at least 936 transactions");
assertEquals( 4, block.getTransactions().get(0).getListOfInputs().get(0).getTxInScript().length,"Compressed block must contain exactly 936 transactions of which the first has one input and script length 4");
assertEquals( 2, block.getTransactions().get(0).getListOfOutputs().size(),"Compressed block must contain exactly 936 transactions of which the first has two outputs");
assertEquals( 25, block.getTransactions().get(0).getListOfOutputs().get(0).getTxOutScript().length,"Compressed block must contain exactly 936 transactions of which the first has two output and the first output script length 25");
assertFalse( reader.nextKeyValue(),"No further blocks in compressed block");
reader.close();
}
TestPartitioner.java 文件源码
项目:sqoop-on-spark
阅读 19
收藏 0
点赞 0
评论 0
@DataProvider(name="test-hdfs-partitioner")
public static Object[][] data() {
List<Object[]> parameters = new ArrayList<Object[]>();
for (Class<?> compressionClass : new Class<?>[]{null, DefaultCodec.class, BZip2Codec.class}) {
for (Object outputFileType : new Object[]{TEXT_FILE, SEQUENCE_FILE}) {
parameters.add(new Object[]{outputFileType, compressionClass});
}
}
return parameters.toArray(new Object[0][]);
}
TestExtractor.java 文件源码
项目:sqoop-on-spark
阅读 21
收藏 0
点赞 0
评论 0
@DataProvider(name="test-hdfs-extractor")
public static Object[][] data() {
List<Object[]> parameters = new ArrayList<Object[]>();
for (Class<?> compressionClass : new Class<?>[]{null, DefaultCodec.class, BZip2Codec.class}) {
for (Object outputFileType : new Object[]{TEXT_FILE, SEQUENCE_FILE}) {
parameters.add(new Object[]{outputFileType, compressionClass});
}
}
return parameters.toArray(new Object[0][]);
}
TestLineRecordReader.java 文件源码
项目:hops
阅读 25
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
TestLineRecordReader.java 文件源码
项目:hops
阅读 23
收藏 0
点赞 0
评论 0
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().
getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
PigStorage.java 文件源码
项目:spork-streaming
阅读 23
收藏 0
点赞 0
评论 0
private void setCompression(Path path, Job job) {
String location=path.getName();
if (location.endsWith(".bz2") || location.endsWith(".bz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (location.endsWith(".gz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
FileOutputFormat.setCompressOutput( job, false);
}
}
MultiStorage.java 文件源码
项目:spork-streaming
阅读 19
收藏 0
点赞 0
评论 0
@Override
public void setStoreLocation(String location, Job job) throws IOException {
job.getConfiguration().set("mapred.textoutputformat.separator", "");
FileOutputFormat.setOutputPath(job, new Path(location));
if (comp == Compression.bz2 || comp == Compression.bz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (comp == Compression.gz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
}
PigStorage.java 文件源码
项目:spork
阅读 17
收藏 0
点赞 0
评论 0
private void setCompression(Path path, Job job) {
String location=path.getName();
if (location.endsWith(".bz2") || location.endsWith(".bz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (location.endsWith(".gz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
FileOutputFormat.setCompressOutput( job, false);
}
}
MultiStorage.java 文件源码
项目:spork
阅读 21
收藏 0
点赞 0
评论 0
@Override
public void setStoreLocation(String location, Job job) throws IOException {
job.getConfiguration().set(MRConfiguration.TEXTOUTPUTFORMAT_SEPARATOR, "");
FileOutputFormat.setOutputPath(job, new Path(location));
if (comp == Compression.bz2 || comp == Compression.bz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (comp == Compression.gz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
}
PigStorage.java 文件源码
项目:PonIC
阅读 17
收藏 0
点赞 0
评论 0
private void setCompression(Path path, Job job) {
String location=path.getName();
if (location.endsWith(".bz2") || location.endsWith(".bz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (location.endsWith(".gz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
FileOutputFormat.setCompressOutput( job, false);
}
}
PigStorage.java 文件源码
项目:sedge
阅读 21
收藏 0
点赞 0
评论 0
private void setCompression(Path path, Job job) {
String location=path.getName();
if (location.endsWith(".bz2") || location.endsWith(".bz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (location.endsWith(".gz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
FileOutputFormat.setCompressOutput( job, false);
}
}
MultiStorage.java 文件源码
项目:sedge
阅读 20
收藏 0
点赞 0
评论 0
@Override
public void setStoreLocation(String location, Job job) throws IOException {
job.getConfiguration().set("mapred.textoutputformat.separator", "");
FileOutputFormat.setOutputPath(job, new Path(location));
if (comp == Compression.bz2 || comp == Compression.bz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (comp == Compression.gz) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
}
TestCompression.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 19
收藏 0
点赞 0
评论 0
public void testBzip2TextCompression() throws IOException {
runTextCompressionTest(new BZip2Codec(), 4);
}
TestCompression.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 21
收藏 0
点赞 0
评论 0
public void testBzip2SequenceFileCompression() throws Exception {
runSequenceFileCompressionTest(new BZip2Codec(), 4);
}
TestConcatenatedCompressedInput.java 文件源码
项目:aliyun-oss-hadoop-fs
阅读 18
收藏 0
点赞 0
评论 0
/**
* Test using the bzip2 codec for reading
*/
@Test
public void testBzip2() throws IOException {
JobConf jobConf = new JobConf(defaultConf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, jobConf);
localFs.delete(workDir, true);
System.out.println(COLOR_BR_CYAN +
"testBzip2() using non-native CBZip2InputStream (presumably)" +
COLOR_NORMAL);
// copy prebuilt (correct!) version of concat.bz2 to HDFS
final String fn = "concat" + bzip2.getDefaultExtension();
Path fnLocal = new Path(System.getProperty("test.concat.data", "/tmp"), fn);
Path fnHDFS = new Path(workDir, fn);
localFs.copyFromLocalFile(fnLocal, fnHDFS);
writeFile(localFs, new Path(workDir, "part2.txt.bz2"), bzip2,
"this is a test\nof bzip2\n");
FileInputFormat.setInputPaths(jobConf, workDir);
TextInputFormat format = new TextInputFormat(); // extends FileInputFormat
format.configure(jobConf);
format.setMinSplitSize(256); // work around 2-byte splits issue
// [135 splits for a 208-byte file and a 62-byte file(!)]
InputSplit[] splits = format.getSplits(jobConf, 100);
assertEquals("compressed splits == 2", 2, splits.length);
FileSplit tmp = (FileSplit) splits[0];
if (tmp.getPath().getName().equals("part2.txt.bz2")) {
splits[0] = splits[1];
splits[1] = tmp;
}
List<Text> results = readSplit(format, splits[0], jobConf);
assertEquals("splits[0] num lines", 6, results.size());
assertEquals("splits[0][5]", "member #3",
results.get(5).toString());
results = readSplit(format, splits[1], jobConf);
assertEquals("splits[1] num lines", 2, results.size());
assertEquals("splits[1][0]", "this is a test",
results.get(0).toString());
assertEquals("splits[1][1]", "of bzip2",
results.get(1).toString());
}
TestConcatenatedCompressedInput.java 文件源码
项目:aliyun-oss-hadoop-fs
阅读 18
收藏 0
点赞 0
评论 0
/**
* Extended bzip2 test, similar to BuiltInGzipDecompressor test above.
*/
@Test
public void testMoreBzip2() throws IOException {
JobConf jobConf = new JobConf(defaultConf);
CompressionCodec bzip2 = new BZip2Codec();
ReflectionUtils.setConf(bzip2, jobConf);
localFs.delete(workDir, true);
System.out.println(COLOR_BR_MAGENTA +
"testMoreBzip2() using non-native CBZip2InputStream (presumably)" +
COLOR_NORMAL);
// copy single-member test file to HDFS
String fn1 = "testConcatThenCompress.txt" + bzip2.getDefaultExtension();
Path fnLocal1 = new Path(System.getProperty("test.concat.data","/tmp"),fn1);
Path fnHDFS1 = new Path(workDir, fn1);
localFs.copyFromLocalFile(fnLocal1, fnHDFS1);
// copy multiple-member test file to HDFS
String fn2 = "testCompressThenConcat.txt" + bzip2.getDefaultExtension();
Path fnLocal2 = new Path(System.getProperty("test.concat.data","/tmp"),fn2);
Path fnHDFS2 = new Path(workDir, fn2);
localFs.copyFromLocalFile(fnLocal2, fnHDFS2);
FileInputFormat.setInputPaths(jobConf, workDir);
// here's first pair of BlockDecompressorStreams:
final FileInputStream in1 = new FileInputStream(fnLocal1.toString());
final FileInputStream in2 = new FileInputStream(fnLocal2.toString());
assertEquals("concat bytes available", 2567, in1.available());
assertEquals("concat bytes available", 3056, in2.available());
/*
// FIXME
// The while-loop below dies at the beginning of the 2nd concatenated
// member (after 17 lines successfully read) with:
//
// java.io.IOException: bad block header
// at org.apache.hadoop.io.compress.bzip2.CBZip2InputStream.initBlock(
// CBZip2InputStream.java:527)
//
// It is not critical to concatenated-gzip support, HADOOP-6835, so it's
// simply commented out for now (and HADOOP-6852 filed). If and when the
// latter issue is resolved--perhaps by fixing an error here--this code
// should be reenabled. Note that the doMultipleBzip2BufferSizes() test
// below uses the same testCompressThenConcat.txt.bz2 file but works fine.
CompressionInputStream cin2 = bzip2.createInputStream(in2);
LineReader in = new LineReader(cin2);
Text out = new Text();
int numBytes, totalBytes=0, lineNum=0;
while ((numBytes = in.readLine(out)) > 0) {
++lineNum;
totalBytes += numBytes;
}
in.close();
assertEquals("total uncompressed bytes in concatenated test file",
5346, totalBytes);
assertEquals("total uncompressed lines in concatenated test file",
84, lineNum);
*/
// test CBZip2InputStream with lots of different input-buffer sizes
doMultipleBzip2BufferSizes(jobConf);
}