/**
*
* @param outputDir Output directory for the map file(s)
* @param mapFileSplitSize Split size for the map file: if 0, use a single map file for all output. If > 0,
* multiple map files will be used: each will contain a maximum of mapFileSplitSize.
* This can be used to avoid having a single multi gigabyte map file, which may be
* undesirable in some cases (transfer across the network, for example)
* @param convertTextTo If null: Make no changes to Text writable objects. If non-null, Text writable instances
* will be converted to this type. This is useful, when would rather store numerical values
* even if the original record reader produces strings/text.
* @param indexInterval Index interval for the Map file. Defaults to 1, which is suitable for most cases
* @param filenamePattern The naming pattern for the map files. Used with String.format(pattern, int)
* @param hadoopConfiguration Hadoop configuration.
*/
public AbstractMapFileWriter(@NonNull File outputDir, int mapFileSplitSize, WritableType convertTextTo,
int indexInterval, String filenamePattern,
org.apache.hadoop.conf.Configuration hadoopConfiguration) {
if(indexInterval <= 0){
throw new UnsupportedOperationException("Index interval: must be >= 0 (got: " + indexInterval + ")");
}
this.outputDir = outputDir;
this.mapFileSplitSize = mapFileSplitSize;
if (convertTextTo == WritableType.Text) {
convertTextTo = null;
}
this.convertTextTo = convertTextTo;
this.indexInterval = indexInterval;
this.filenamePattern = filenamePattern;
this.hadoopConfiguration = hadoopConfiguration;
if(this.hadoopConfiguration.get(MAP_FILE_INDEX_INTERVAL_KEY) != null){
this.hadoopConfiguration.set(MAP_FILE_INDEX_INTERVAL_KEY, String.valueOf(indexInterval));
}
opts = new SequenceFile.Writer.Option[]{MapFile.Writer.keyClass(KEY_CLASS),
SequenceFile.Writer.valueClass(getValueClass())};
}
AbstractMapFileWriter.java 文件源码
java
阅读 22
收藏 0
点赞 0
评论 0
项目:DataVec
作者:
评论列表
文章目录