SequenceFileAnalyzer.java 文件源码

java
阅读 23 收藏 0 点赞 0 评论 0

项目:wherehowsX 作者:
@Override
public DatasetJsonRecord getSchema(Path path) throws IOException {
    DatasetJsonRecord record = null;
    if (!fs.exists(path))
        LOG.error("sequencefileanalyzer file : " + path.toUri().getPath() + " is not exist on hdfs");
    else {
        try {
            LOG.info("sequencefileanalyzer start parse schema for  file path : {}", path.toUri().getPath());
            SequenceFile.Reader reader = new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(path));
            String keyName = "Key";
            String keyType = getWritableType(reader.getKeyClassName());
            String valueName = "Value";
            String valueType = getWritableType(reader.getValueClassName());
            FileStatus status = fs.getFileStatus(path);
            String storage = STORAGE_TYPE;
            String abstractPath = path.toUri().getPath();
            String codec = "sequence.codec";
            String schemaString = "{\"fields\": [{\"name\": \"" + keyName + "\", \"type\": \"" + keyType + "\"}, {\"name\": \"" + valueName + "\", \"type\": \"" + valueType + "\"}], \"name\": \"Result\", \"namespace\": \"com.tencent.lake\", \"type\": \"record\"}";

            record = new DatasetJsonRecord(schemaString, abstractPath, status.getModificationTime(), status.getOwner(), status.getGroup(),
                    status.getPermission().toString(), codec, storage, "");
            LOG.info("sequencefileanalyzer parse path :{},schema is {}", path.toUri().getPath(), record.toCsvString());

        } catch (Exception e) {
            LOG.error("path : {} content " + " is not Sequence File format content  ",path.toUri().getPath());
            LOG.info(e.getStackTrace().toString());
        }

    }
    return record;
}
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号