AbstractMapFileWriter.java 文件源码-java代码片段

/**
 *
 * @param outputDir           Output directory for the map file(s)
 * @param mapFileSplitSize    Split size for the map file: if 0, use a single map file for all output. If > 0,
 *                            multiple map files will be used: each will contain a maximum of mapFileSplitSize.
 *                            This can be used to avoid having a single multi gigabyte map file, which may be
 *                            undesirable in some cases (transfer across the network, for example)
 * @param convertTextTo       If null: Make no changes to Text writable objects. If non-null, Text writable instances
 *                            will be converted to this type. This is useful, when would rather store numerical values
 *                            even if the original record reader produces strings/text.
 * @param indexInterval       Index interval for the Map file. Defaults to 1, which is suitable for most cases
 * @param filenamePattern     The naming pattern for the map files. Used with String.format(pattern, int)
 * @param hadoopConfiguration Hadoop configuration.
 */
public AbstractMapFileWriter(@NonNull File outputDir, int mapFileSplitSize, WritableType convertTextTo,
                             int indexInterval, String filenamePattern,
                             org.apache.hadoop.conf.Configuration hadoopConfiguration) {
    if(indexInterval <= 0){
        throw new UnsupportedOperationException("Index interval: must be >= 0 (got: " + indexInterval + ")");
    }
    this.outputDir = outputDir;
    this.mapFileSplitSize = mapFileSplitSize;
    if (convertTextTo == WritableType.Text) {
        convertTextTo = null;
    }
    this.convertTextTo = convertTextTo;
    this.indexInterval = indexInterval;
    this.filenamePattern = filenamePattern;

    this.hadoopConfiguration = hadoopConfiguration;
    if(this.hadoopConfiguration.get(MAP_FILE_INDEX_INTERVAL_KEY) != null){
        this.hadoopConfiguration.set(MAP_FILE_INDEX_INTERVAL_KEY, String.valueOf(indexInterval));
    }

    opts = new SequenceFile.Writer.Option[]{MapFile.Writer.keyClass(KEY_CLASS),
            SequenceFile.Writer.valueClass(getValueClass())};

}