java类org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter的实例源码

TestMapreduceConfigFields.java 文件源码 项目:aliyun-oss-hadoop-fs 阅读 38 收藏 0 点赞 0 评论 0
@SuppressWarnings("deprecation")
 @Override
 public void initializeMemberVariables() {
   xmlFilename = new String("mapred-default.xml");
   configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
       JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };

   // Initialize used variables
   configurationPropsToSkipCompare = new HashSet<String>();

   // Set error modes
   errorIfMissingConfigProps = true;
   errorIfMissingXmlProps = false;

   // Ignore deprecated MR1 properties in JobConf
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
 }
HFileOutputFormat.java 文件源码 项目:terrapin 阅读 37 收藏 0 点赞 0 评论 0
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(
        TaskAttemptContext context) throws IOException {
  // Get the path of the temporary output file
  final Path outputPath = FileOutputFormat.getOutputPath(context);
  final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
  final Configuration conf = context.getConfiguration();
  final FileSystem fs = outputDir.getFileSystem(conf);

  int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
  // Default to snappy.
  Compression.Algorithm compressionAlgorithm = getAlgorithm(
      conf.get(Constants.HFILE_COMPRESSION));
  final StoreFile.Writer writer =
      new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
          .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
          .withCompression(compressionAlgorithm)
          .build();
  return new HFileRecordWriter(writer);
}
TemporaryFileOutputFormat.java 文件源码 项目:asakusafw-compiler 阅读 38 收藏 0 点赞 0 评论 0
/**
 * Creates a new {@link RecordWriter} to output temporary data.
 * @param <V> value type
 * @param context current context
 * @param name output name
 * @param dataType value type
 * @return the created writer
 * @throws IOException if failed to create a new {@link RecordWriter}
 * @throws InterruptedException if interrupted
 */
public <V> RecordWriter<NullWritable, V> createRecordWriter(
        TaskAttemptContext context,
        String name,
        Class<V> dataType) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
    Path file = new Path(
            committer.getWorkPath(),
            FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$
    ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file);
    return new RecordWriter<NullWritable, V>() {
        @Override
        public void write(NullWritable key, V value) throws IOException {
            out.write(value);
        }
        @Override
        public void close(TaskAttemptContext ignored) throws IOException {
            out.close();
        }
        @Override
        public String toString() {
            return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$
        }
    };
}
ConfigurableHDFSFileSink.java 文件源码 项目:components 阅读 31 收藏 0 点赞 0 评论 0
@Override
public void open(String uId) throws Exception {
    this.hash = uId.hashCode();

    Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance();
    FileOutputFormat.setOutputPath(job, new Path(path));

    // Each Writer is responsible for writing one bundle of elements and is represented by one
    // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow
    // handles retrying of failed bundles, each task has one attempt only.
    JobID jobId = job.getJobID();
    TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash);
    configure(job);
    context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0));

    FileOutputFormat<K, V> outputFormat = formatClass.newInstance();
    recordWriter = outputFormat.getRecordWriter(context);
    outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context);
}
ForwardingBigQueryFileOutputCommitter.java 文件源码 项目:bigdata-interop 阅读 39 收藏 0 点赞 0 评论 0
/**
 * Queries the file system for the URIs of all files in the base output directory that are not
 * directories and whose name isn't {@link FileOutputCommitter#SUCCEEDED_FILE_NAME}.
 *
 * @return a list of all URIs in the form of strings.
 * @throws IOException if unable to query for the files in the base output directory.
 */
protected List<String> getOutputFileURIs() throws IOException {
  // Enumerate over all files in the output path.
  FileStatus[] outputFiles = outputFileSystem.listStatus(outputPath);
  ArrayList<String> sourceUris = new ArrayList<String>(outputFiles.length);

  for (int i = 0; i < outputFiles.length; i++) {
    FileStatus fileStatus = outputFiles[i];

    // Skip the success file and directories as they're not relevant to BigQuery.
    if (!fileStatus.isDir()
        && !fileStatus.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      sourceUris.add(fileStatus.getPath().toString());
    }
  }

  return sourceUris;
}
HFileAppender.java 文件源码 项目:tajo 阅读 41 收藏 0 点赞 0 评论 0
@Override
public void init() throws IOException {
  super.init();

  Configuration taskConf = new Configuration();
  Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
  taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString());

  ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId();
  writerContext = new TaskAttemptContextImpl(taskConf,
      new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP,
          taskAttemptId.getTaskId().getId(), taskAttemptId.getId()));

  HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2();
  try {
    writer = hFileOutputFormat2.getRecordWriter(writerContext);

    committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext);
    workingFilePath = committer.getWorkPath();
  } catch (InterruptedException e) {
    throw new IOException(e.getMessage(), e);
  }

  LOG.info("Created hbase file writer: " + workingFilePath);
}
MRToTezHelper.java 文件源码 项目:spork 阅读 30 收藏 0 点赞 0 评论 0
private static void populateMRSettingsToRetain() {

        // FileInputFormat
        mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
        mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
        mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
        mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
        mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
        mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);

        // FileOutputFormat
        mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
        mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
        mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
        mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
        mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
        mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
    }
TestMROutput.java 文件源码 项目:tez 阅读 32 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testNewAPI_TextOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  Configuration conf = new Configuration();
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
  DataSinkDescriptor dataSink = MROutput
      .createConfigBuilder(conf, TextOutputFormat.class, outputPath)
      .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();

  assertEquals(true, output.isMapperOutput);
  assertEquals(true, output.useNewApi);
  assertEquals(TextOutputFormat.class, output.newOutputFormat.getClass());
  assertNull(output.oldOutputFormat);
  assertNotNull(output.newApiTaskAttemptContext);
  assertNull(output.oldApiTaskAttemptContext);
  assertNotNull(output.newRecordWriter);
  assertNull(output.oldRecordWriter);
  assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
TestMROutput.java 文件源码 项目:tez 阅读 66 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testOldAPI_TextOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  Configuration conf = new Configuration();
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
  DataSinkDescriptor dataSink = MROutput
      .createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath)
      .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();

  assertEquals(false, output.isMapperOutput);
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.TextOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertNotNull(output.oldApiTaskAttemptContext);
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
TestMROutput.java 文件源码 项目:tez 阅读 37 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testNewAPI_SequenceFileOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  DataSinkDescriptor dataSink = MROutput
      .createConfigBuilder(conf, SequenceFileOutputFormat.class, outputPath)
      .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();
  assertEquals(true, output.useNewApi);
  assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
  assertNull(output.oldOutputFormat);
  assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.oldApiTaskAttemptContext);
  assertNotNull(output.newRecordWriter);
  assertNull(output.oldRecordWriter);
  assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
TestMROutput.java 文件源码 项目:tez 阅读 29 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testOldAPI_SequenceFileOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  DataSinkDescriptor dataSink = MROutput
      .createConfigBuilder(conf, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, outputPath)
      .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
TestMROutput.java 文件源码 项目:tez 阅读 46 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testNewAPI_WorkOutputPathOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  Configuration conf = new Configuration();
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
  DataSinkDescriptor dataSink = MROutput
    .createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, outputPath)
    .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();

  assertEquals(true, output.isMapperOutput);
  assertEquals(true, output.useNewApi);
  assertEquals(NewAPI_WorkOutputPathReadingOutputFormat.class, output.newOutputFormat.getClass());
  assertNull(output.oldOutputFormat);
  assertNotNull(output.newApiTaskAttemptContext);
  assertNull(output.oldApiTaskAttemptContext);
  assertNotNull(output.newRecordWriter);
  assertNull(output.oldRecordWriter);
  assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
TestMROutput.java 文件源码 项目:tez 阅读 32 收藏 0 点赞 0 评论 0
@Test(timeout = 5000)
public void testOldAPI_WorkOutputPathOutputFormat() throws Exception {
  String outputPath = "/tmp/output";
  Configuration conf = new Configuration();
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
  DataSinkDescriptor dataSink = MROutput
    .createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath)
    .build();

  OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
  MROutput output = new MROutput(outputContext, 2);
  output.initialize();

  assertEquals(false, output.isMapperOutput);
  assertEquals(false, output.useNewApi);
  assertEquals(OldAPI_WorkOutputPathReadingOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertNotNull(output.oldApiTaskAttemptContext);
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
TestMRJobs.java 文件源码 项目:FlexMap 阅读 37 收藏 0 点赞 0 评论 0
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

  // Make sure there are three files in the output-dir

  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
TestMrServer.java 文件源码 项目:cloudera-framework 阅读 29 收藏 0 点赞 0 评论 0
@Test
public void testMr() throws InterruptedException, IOException, ClassNotFoundException {
  String dirInput = "/tmp/wordcount/input";
  String dirOutput = "/tmp/wordcount/output";
  String fileInput = new Path(dirInput, "file1.txt").toString();
  BufferedWriter writer = new BufferedWriter(
    new OutputStreamWriter(getDfsServer().getFileSystem().create(getDfsServer().getPath(fileInput))));
  writer.write("a a a a a\n");
  writer.write("b b\n");
  writer.close();
  Job job = Job.getInstance(getMrServer().getConf());
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(MapClass.class);
  job.setCombinerClass(Reduce.class);
  job.setReducerClass(Reduce.class);
  FileInputFormat.setInputPaths(job, getDfsServer().getPathUri(dirInput));
  FileOutputFormat.setOutputPath(job, new Path(getDfsServer().getPathUri(dirOutput)));
  assertTrue(job.waitForCompletion(true));
  Path[] outputFiles = FileUtil.stat2Paths(getDfsServer().getFileSystem().listStatus(getDfsServer().getPath(dirOutput), path -> !path
    .getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)));
  assertEquals(1, outputFiles.length);
  InputStream in = getDfsServer().getFileSystem().open(outputFiles[0]);
  BufferedReader reader = new BufferedReader(new InputStreamReader(in));
  assertEquals("a\t5", reader.readLine());
  assertEquals("b\t2", reader.readLine());
  assertNull(reader.readLine());
  reader.close();
}
Partition.java 文件源码 项目:cloudera-framework 阅读 40 收藏 0 点赞 0 评论 0
@Override
public void cleanup(Context context) throws IOException, InterruptedException {
  for (String partition : partitions) {
    FileSystem.get(context.getConfiguration())
      .delete(new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR) + Path.SEPARATOR_CHAR + partition,
        FileOutputCommitter.SUCCEEDED_FILE_NAME), false);
  }
  partitions.clear();
  multipleOutputsAvro.close();
}
TestMapreduceConfigFields.java 文件源码 项目:hops 阅读 31 收藏 0 点赞 0 评论 0
@Override
 public void initializeMemberVariables() {
   xmlFilename = new String("mapred-default.xml");
   configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
       JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };

   // Initialize used variables
   configurationPropsToSkipCompare = new HashSet<String>();
   xmlPropsToSkipCompare = new HashSet<String>();

   // Set error modes
   errorIfMissingConfigProps = true;
   errorIfMissingXmlProps = false;

   // Ignore deprecated MR1 properties in JobConf
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
   configurationPropsToSkipCompare
           .add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);

   // Obsolete entries listed in MAPREDUCE-6057 were removed from trunk
   // but not removed from branch-2.
   xmlPropsToSkipCompare.add("map.sort.class");
   xmlPropsToSkipCompare.add("mapreduce.local.clientfactory.class.name");
   xmlPropsToSkipCompare.add("mapreduce.jobtracker.system.dir");
   xmlPropsToSkipCompare.add("mapreduce.jobtracker.staging.root.dir");
 }
TestStore.java 文件源码 项目:spork-streaming 阅读 37 收藏 0 点赞 0 评论 0
@Override
public Path getDefaultWorkFile(TaskAttemptContext context,
        String extension) throws IOException {
    FileOutputCommitter committer =
            (FileOutputCommitter) super.getOutputCommitter(context);
    return new Path(committer.getWorkPath(), getUniqueFile(context,
            "part", extension));
}
TestStore.java 文件源码 项目:spork 阅读 37 收藏 0 点赞 0 评论 0
@Override
public Path getDefaultWorkFile(TaskAttemptContext context,
        String extension) throws IOException {
    FileOutputCommitter committer =
            (FileOutputCommitter) super.getOutputCommitter(context);
    return new Path(committer.getWorkPath(), getUniqueFile(context,
            "part", extension));
}
TestMROutputLegacy.java 文件源码 项目:tez 阅读 41 收藏 0 点赞 0 评论 0
@Test (timeout = 5000)
public void testOldAPI_MR() throws Exception {
  String outputPath = "/tmp/output";
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
  org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
  // the output is attached to reducer
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
TestMROutputLegacy.java 文件源码 项目:tez 阅读 28 收藏 0 点赞 0 评论 0
@Test (timeout = 5000)
public void testNewAPI_MR() throws Exception {
  String outputPath = "/tmp/output";
  Job job = Job.getInstance();
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  job.getConfiguration().setBoolean("mapred.reducer.new-api", true);
  // the output is attached to reducer
  job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(true, output.useNewApi);
  assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
  assertNull(output.oldOutputFormat);
  assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.oldApiTaskAttemptContext);
  assertNotNull(output.newRecordWriter);
  assertNull(output.oldRecordWriter);
  assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
TestMROutputLegacy.java 文件源码 项目:tez 阅读 35 收藏 0 点赞 0 评论 0
@Test (timeout = 5000)
public void testOldAPI_MapperOnly() throws Exception {
  String outputPath = "/tmp/output";
  JobConf conf = new JobConf();
  conf.setOutputKeyClass(NullWritable.class);
  conf.setOutputValueClass(Text.class);
  conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class);
  org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));
  // the output is attached to mapper
  conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(conf);
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(false, output.useNewApi);
  assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
  assertNull(output.newOutputFormat);
  assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.newApiTaskAttemptContext);
  assertNotNull(output.oldRecordWriter);
  assertNull(output.newRecordWriter);
  assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
TestMROutputLegacy.java 文件源码 项目:tez 阅读 33 收藏 0 点赞 0 评论 0
@Test (timeout = 5000)
public void testNewAPI_MapperOnly() throws Exception {
  String outputPath = "/tmp/output";
  Job job = Job.getInstance();
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
  job.getConfiguration().setBoolean("mapred.mapper.new-api", true);
  // the output is attached to mapper
  job.getConfiguration().setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(job.getConfiguration());
  OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
      .setUserPayload(vertexPayload);
  DataSinkDescriptor sink = DataSinkDescriptor.create(od,
      OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null);

  OutputContext outputContext = createMockOutputContext(sink.getOutputDescriptor().getUserPayload());
  MROutputLegacy output = new MROutputLegacy(outputContext, 2);
  output.initialize();
  assertEquals(true, output.useNewApi);
  assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
  assertNull(output.oldOutputFormat);
  assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
  assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
  assertNull(output.oldApiTaskAttemptContext);
  assertNotNull(output.newRecordWriter);
  assertNull(output.oldRecordWriter);
  assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
FilteredCopyListing.java 文件源码 项目:incubator-falcon 阅读 26 收藏 0 点赞 0 评论 0
@Override
protected boolean shouldCopy(Path path, DistCpOptions options) {
    if (path.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
        return false;
    }
    return regex == null || regex.matcher(path.toString()).find();
}
TransformerOutputFormat.java 文件源码 项目:big_data 阅读 28 收藏 0 点赞 0 评论 0
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
    return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context);
}
DBOutputFormat.java 文件源码 项目:aliyun-maxcompute-data-collectors 阅读 34 收藏 0 点赞 0 评论 0
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
    throws IOException, InterruptedException {
  return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                                 context);
}
TestMRJobs.java 文件源码 项目:hadoop 阅读 31 收藏 0 点赞 0 评论 0
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

  // Make sure there are three files in the output-dir

  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
DBOutputFormat.java 文件源码 项目:hadoop 阅读 40 收藏 0 点赞 0 评论 0
public OutputCommitter getOutputCommitter(TaskAttemptContext context) 
    throws IOException, InterruptedException {
  return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                                 context);
}
TestMRJobs.java 文件源码 项目:aliyun-oss-hadoop-fs 阅读 51 收藏 0 点赞 0 评论 0
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
    ClassNotFoundException {

  LOG.info("\n\n\nStarting testRandomWriter().");
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
             + " not found. Not running test.");
    return;
  }

  RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
  mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
  mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
  Job job = randomWriterJob.createJob(mrCluster.getConfig());
  Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setSpeculativeExecution(false);
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setJarByClass(RandomTextWriterJob.class);
  job.setMaxMapAttempts(1); // speed up failures
  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  boolean succeeded = job.waitForCompletion(true);
  Assert.assertTrue(succeeded);
  Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

  // Make sure there are three files in the output-dir

  RemoteIterator<FileStatus> iterator =
      FileContext.getFileContext(mrCluster.getConfig()).listStatus(
          outputDir);
  int count = 0;
  while (iterator.hasNext()) {
    FileStatus file = iterator.next();
    if (!file.getPath().getName()
        .equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
      count++;
    }
  }
  Assert.assertEquals("Number of part files is wrong!", 3, count);
  verifyRandomWriterCounters(job);

  // TODO later:  add explicit "isUber()" checks of some sort
}
DBOutputFormat.java 文件源码 项目:aliyun-oss-hadoop-fs 阅读 37 收藏 0 点赞 0 评论 0
public OutputCommitter getOutputCommitter(TaskAttemptContext context) 
    throws IOException, InterruptedException {
  return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                                 context);
}


问题


面经


文章

微信
公众号

扫码关注公众号