@Override
public List<Object> getStructFieldsDataAsList(final Object data) {
if (data == null) {
return null;
}
if (data instanceof ArrayWritable) {
final ArrayWritable arr = (ArrayWritable) data;
final Object[] arrWritable = arr.get();
return new ArrayList<Object>(Arrays.asList(arrWritable));
}
throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
java类org.apache.hadoop.io.ArrayWritable的实例源码
ArrayWritableObjectInspector.java 文件源码
项目:indexr
阅读 18
收藏 0
点赞 0
评论 0
ExcelFileInputFormat.java 文件源码
项目:hadoopoffice
阅读 19
收藏 0
点赞 0
评论 0
@Override
public RecordReader<Text,ArrayWritable> createRecordReader(InputSplit split, TaskAttemptContext ctx) throws IOException {
/** Create reader **/
try {
// send configuration option to ms excel. The format of the Excel (old vs new) is detected automaitcally
ctx.getConfiguration().set(HadoopOfficeReadConfiguration.CONF_MIMETYPE,"ms-excel");
return new ExcelRecordReader(ctx.getConfiguration(), (FileSplit) split);
} catch (FormatNotUnderstoodException e) {
// log
LOG.error(e);
} catch (GeneralSecurityException gse) {
LOG.error(gse);
}
return null;
}
OfficeFormatHadoopExcelTest.java 文件源码
项目:hadoopoffice
阅读 22
收藏 0
点赞 0
评论 0
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedPositive() throws IOException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="excel2013encrypt.xlsx";
String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// for decryption simply set the password
job.set("hadoopoffice.read.security.crypt.password","test");
ExcelFileInputFormat format = new ExcelFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length, "Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader, "Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue( reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 1");
assertEquals("[excel2013encrypt.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013encrypt.xlsx]Sheet1!A1\"");
assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
assertEquals("A1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
assertEquals("test2", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3", ((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}
OfficeFormatHadoopExcelTest.java 文件源码
项目:hadoopoffice
阅读 21
收藏 0
点赞 0
评论 0
@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedPositive() throws IOException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="excel2003encrypt.xls";
String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// for decryption simply set the password
job.set("hadoopoffice.read.security.crypt.password","test");
ExcelFileInputFormat format = new ExcelFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader, "Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 1");
assertEquals("[excel2003encrypt.xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2003encrypt.xls]Sheet1!A1\"");
assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
assertEquals("A1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
assertEquals("test2", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3", ((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}
ArrayWritableReadSupport.java 文件源码
项目:carbondata
阅读 17
收藏 0
点赞 0
评论 0
@Override public ArrayWritable readRow(Object[] data) {
String[] writables = new String[data.length];
for (int i = 0; i < data.length; i++) {
writables[i] = data[i].toString();
}
return new ArrayWritable(writables);
}
OfficeFormatHadoopExcelTest.java 文件源码
项目:hadoopoffice
阅读 25
收藏 0
点赞 0
评论 0
@Test
public void readExcelInputFormatExcel2013LinkedWorkbook() throws IOException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="excel2013linkedworkbooks.xlsx";
String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// enable option to read linked workbooks
job.setBoolean("hadoopoffice.read.linkedworkbooks",true);
job.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks",false);
ExcelFileInputFormat format = new ExcelFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader, "Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue( reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 1");
assertEquals("[excel2013linkedworkbooks.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013linkedworkbooks.xlsx]Sheet1!A1\"");
assertEquals( 3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals( "Sheet1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
assertEquals("A1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
assertEquals("test2", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals( "test3", ((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
assertTrue(reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 2");
assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns");
assertEquals( "3", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"3\" (this tests also if the cached value of 6 is ignored)");
assertEquals("5", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"5\"");
}
OfficeFormatHadoopExcelTest.java 文件源码
项目:hadoopoffice
阅读 25
收藏 0
点赞 0
评论 0
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedKeyStorePositive() throws IOException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="excel2013encrypt.xlsx";
String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
FileInputFormat.setInputPaths(job, file);
String keystoreFilename="keystore.jceks";
String filenameKeyStore=classLoader.getResource(keystoreFilename).getFile().toString();
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// for decryption set the keystore to retrieve the password
job.set("hadoopoffice.read.security.crypt.credential.keystore.file", filenameKeyStore);
job.set("hadoopoffice.read.security.crypt.credential.keystore.type","JCEKS");
job.set("hadoopoffice.read.security.crypt.credential.keystore.password","changeit");
ExcelFileInputFormat format = new ExcelFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals( 1, inputSplits.length, "Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader, "Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue( reader.next(spreadSheetKey,spreadSheetValue), "Input Split for Excel file contains row 1");
assertEquals("[excel2013encrypt.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013encrypt.xlsx]Sheet1!A1\"");
assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
assertEquals("A1", ((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
assertEquals("test2", ((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3", ((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}
IndexRSerde.java 文件源码
项目:indexr
阅读 23
收藏 0
点赞 0
评论 0
@Override
public Object deserialize(Writable writable) throws SerDeException {
// Different segments could contain different schemas.
// Especially the column orders could be different.
// Here we re-map the column names to the real column ids.
SchemaWritable reader = (SchemaWritable) writable;
if (this.projectCols != reader.columns) {
// Don't have to do it every time, only when schema is changed.
mapColIndex(reader.columns);
projectCols = reader.columns;
}
if (!isMapNeeded) {
serdeSize = columnNames.size();
return reader;
} else {
Writable[] projectWritables = reader.get();
Writable[] writables = new Writable[columnNames.size()];
for (int i = 0; i < validColIndexes.length; i++) {
int colIndex = validColIndexes[i];
int mapColId = validColMapIds[i];
writables[colIndex] = projectWritables[mapColId];
}
serdeSize = validColIndexes.length;
return new ArrayWritable(Writable.class, writables);
}
}
StringUtils.java 文件源码
项目:incubator-pirk
阅读 31
收藏 0
点赞 0
评论 0
/**
* Method to take an input json array format string and output an ArrayWritable
*/
public static ArrayWritable jsonArrayStringtoArrayWritable(String jsonString)
{
String modString = jsonString.replaceFirst("\\[", "");
modString = modString.replaceFirst("\\]", "");
modString = modString.replaceAll("\"", "");
String[] elements = modString.split("\\s*,\\s*");
logger.debug("elements = ");
for (String element : elements)
{
logger.debug("element: " + element);
}
return new ArrayWritable(elements);
}
OfficeFormatHadoopExcelTest.java 文件源码
项目:hadoopoffice
阅读 26
收藏 0
点赞 0
评论 0
@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedPositiveLowFootprint() throws IOException {
JobConf job = new JobConf(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="excel2003encrypt.xls";
String fileNameSpreadSheet=classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
FileInputFormat.setInputPaths(job, file);
// set locale to the one of the test data
job.set("hadoopoffice.read.locale.bcp47","de");
// low footprint
job.set("hadoopoffice.read.lowFootprint", "true");
// for decryption simply set the password
job.set("hadoopoffice.read.security.crypt.password","test");
ExcelFileInputFormat format = new ExcelFileInputFormat();
format.configure(job);
InputSplit[] inputSplits = format.getSplits(job,1);
assertEquals(1,inputSplits.length,"Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
assertNotNull(reader,"Format returned null RecordReader");
Text spreadSheetKey = new Text();
ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
assertTrue(reader.next(spreadSheetKey,spreadSheetValue),"Input Split for Excel file contains row 1");
assertEquals("[excel2003encrypt.xls]Sheet1!A1",spreadSheetKey.toString(),"Input Split for Excel file has keyname == \"[excel2003encrypt.xls]Sheet1!A1\"");
assertEquals(3,spreadSheetValue.get().length,"Input Split for Excel file contains row 1 with 3 columns");
assertEquals("test1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
assertEquals("Sheet1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getSheetName(),"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
assertEquals("A1",((SpreadSheetCellDAO)spreadSheetValue.get()[0]).getAddress(),"Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
assertEquals("test2",((SpreadSheetCellDAO)spreadSheetValue.get()[1]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
assertEquals("test3",((SpreadSheetCellDAO)spreadSheetValue.get()[2]).getFormattedValue(),"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}