@Override
public void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol)
throws ClassNotFoundException, IOException {
// Write a line of text into a file so that we can get
// a record to the map task.
Path dir = new Path(this.options.getTempDir());
Path p = new Path(dir, "sqoop-dummy-import-job-file.txt");
FileSystem fs = FileSystem.getLocal(this.options.getConf());
if (fs.exists(p)) {
boolean result = fs.delete(p, false);
assertTrue("Couldn't delete temp file!", result);
}
BufferedWriter w = new BufferedWriter(
new OutputStreamWriter(fs.create(p)));
w.append("This is a line!");
w.close();
FileInputFormat.addInputPath(job, p);
// And set the InputFormat itself.
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}
java类org.apache.hadoop.mapreduce.Job的实例源码
TestImportJob.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 29
收藏 0
点赞 0
评论 0
LoadJob.java 文件源码
项目:hadoop
阅读 25
收藏 0
点赞 0
评论 0
public Job call() throws IOException, InterruptedException,
ClassNotFoundException {
ugi.doAs(
new PrivilegedExceptionAction<Job>() {
public Job run() throws IOException, ClassNotFoundException,
InterruptedException {
job.setMapperClass(LoadMapper.class);
job.setReducerClass(LoadReducer.class);
job.setNumReduceTasks(jobdesc.getNumberReduces());
job.setMapOutputKeyClass(GridmixKey.class);
job.setMapOutputValueClass(GridmixRecord.class);
job.setSortComparatorClass(LoadSortComparator.class);
job.setGroupingComparatorClass(SpecGroupingComparator.class);
job.setInputFormatClass(LoadInputFormat.class);
job.setOutputFormatClass(RawBytesOutputFormat.class);
job.setPartitionerClass(DraftPartitioner.class);
job.setJarByClass(LoadJob.class);
job.getConfiguration().setBoolean(Job.USED_GENERIC_PARSER, true);
FileOutputFormat.setOutputPath(job, outdir);
job.submit();
return job;
}
});
return job;
}
MaxTemperatureWithCombiner.java 文件源码
项目:learn-to-hadoop
阅读 28
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("Usage: MaxTemperatureWithCombiner <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperatureWithCombiner.class);
job.setJobName("Max Temperature With Combiner");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setCombinerClass(MaxTemperatureReducer.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
TF_IDF.java 文件源码
项目:Wikipedia-Index
阅读 33
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJobName("TF-IDFCount");
job.setJarByClass(TF_IDF.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(TextArrayWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(TF_IDFMap.class);
job.setReducerClass(TF_IDFReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
boolean wait = job.waitForCompletion(true);
System.exit(wait ? 0 : 1);
}
TestChainErrors.java 文件源码
项目:hadoop
阅读 30
收藏 0
点赞 0
评论 0
/**
* Tests one of the maps consuming output.
*
* @throws Exception
*/
public void testChainMapNoOuptut() throws Exception {
Configuration conf = createJobConf();
String expectedOutput = "";
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
job.setJobName("chain");
ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class,
LongWritable.class, Text.class, null);
ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
LongWritable.class, Text.class, null);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
.readOutput(outDir, conf));
}
IntegrationTestTableMapReduceUtil.java 文件源码
项目:ditb
阅读 26
收藏 0
点赞 0
评论 0
/**
* Look for jars we expect to be on the classpath by name.
*/
@Test
public void testAddDependencyJars() throws Exception {
Job job = new Job();
TableMapReduceUtil.addDependencyJars(job);
String tmpjars = job.getConfiguration().get("tmpjars");
// verify presence of modules
assertTrue(tmpjars.contains("hbase-common"));
assertTrue(tmpjars.contains("hbase-protocol"));
assertTrue(tmpjars.contains("hbase-client"));
assertTrue(tmpjars.contains("hbase-hadoop-compat"));
assertTrue(tmpjars.contains("hbase-server"));
// verify presence of 3rd party dependencies.
assertTrue(tmpjars.contains("zookeeper"));
assertTrue(tmpjars.contains("netty"));
assertTrue(tmpjars.contains("protobuf"));
assertTrue(tmpjars.contains("guava"));
assertTrue(tmpjars.contains("htrace"));
}
MapReduceTestUtil.java 文件源码
项目:hadoop
阅读 31
收藏 0
点赞 0
评论 0
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir,
Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClass(DataCopyMapper.class);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClass(Text.class);
theJob.setOutputValueClass(Text.class);
theJob.setReducerClass(DataCopyReducer.class);
theJob.setNumReduceTasks(1);
return theJob;
}
CellCounter.java 文件源码
项目:ditb
阅读 31
收藏 0
点赞 0
评论 0
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("ERROR: Wrong number of parameters: " + args.length);
System.err.println("Usage: CellCounter ");
System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
"[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
System.err.println(" Note: -D properties will be applied to the conf used. ");
System.err.println(" Additionally, the following SCAN properties can be specified");
System.err.println(" to get fine grained control on what is counted..");
System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
"string : used to separate the rowId/column family name and qualifier name.");
System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
"operation to a limited subset of rows from the table based on regex or prefix pattern.");
System.exit(-1);
}
Job job = createSubmittableJob(conf, otherArgs);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
testDriver.java 文件源码
项目:Hadoop-Codes
阅读 33
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "test");
job.setMapperClass(testMapper.class);
job.setPartitionerClass(testPartitioner.class);
job.setReducerClass(testReducer.class);
job.setNumReduceTasks(10);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
MaxTempDriver.java 文件源码
项目:Hadoop-Codes
阅读 30
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "maxtemp");
job.setMapperClass(MaxTempMapper.class);
job.setReducerClass(MaxTempReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (!job.waitForCompletion(true))
return;
}
MergeJob.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 23
收藏 0
点赞 0
评论 0
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath)
throws IOException {
LOG.info("Trying to merge avro files");
final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf);
final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf);
if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) {
throw new IOException("Invalid schema for input directories. Schema for old data: ["
+ oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]");
}
LOG.debug("Avro Schema:" + oldPathSchema);
job.setInputFormatClass(AvroInputFormat.class);
job.setOutputFormatClass(AvroOutputFormat.class);
job.setMapperClass(MergeAvroMapper.class);
job.setReducerClass(MergeAvroReducer.class);
AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema);
}
SampleUploader.java 文件源码
项目:ditb
阅读 25
收藏 0
点赞 0
评论 0
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
TestTableInputFormat.java 文件源码
项目:ditb
阅读 37
收藏 0
点赞 0
评论 0
void testInputFormat(Class<? extends InputFormat> clazz)
throws IOException, InterruptedException, ClassNotFoundException {
final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
job.setInputFormatClass(clazz);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapperClass(ExampleVerifier.class);
job.setNumReduceTasks(0);
LOG.debug("submitting job.");
assertTrue("job failed!", job.waitForCompletion(true));
assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}
TestHFileOutputFormat2.java 文件源码
项目:ditb
阅读 31
收藏 0
点赞 0
评论 0
private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException,
InterruptedException, ClassNotFoundException {
Job job = new Job(conf, "testLocalMRIncrementalLoad");
job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
MutationSerialization.class.getName(), ResultSerialization.class.getName(),
KeyValueSerialization.class.getName());
setupRandomGeneratorMapper(job);
HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
FileOutputFormat.setOutputPath(job, outDir);
assertFalse(util.getTestFileSystem().exists(outDir)) ;
assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
assertTrue(job.waitForCompletion(true));
}
PerformanceEvaluation.java 文件源码
项目:ditb
阅读 24
收藏 0
点赞 0
评论 0
private void doMapReduce(final Class<? extends Test> cmd) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = Job.getInstance(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(PeInputFormat.class);
PeInputFormat.setInputPaths(job, inputDir);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
ExportJobBase.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 30
收藏 0
点赞 0
评论 0
@Override
protected void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol)
throws ClassNotFoundException, IOException {
if (options.getOdpsTable() != null) {
Configuration conf = job.getConfiguration();
setInputFormatClass(OdpsExportInputFormat.class);
conf.set(OdpsConstants.TABLE_NAME, options.getOdpsTable());
conf.set(OdpsConstants.ACCESS_ID, options.getOdpsAccessID());
conf.set(OdpsConstants.ACCESS_KEY, options.getOdpsAccessKey());
conf.set(OdpsConstants.ENDPOINT, options.getOdpsEndPoint());
conf.set(OdpsConstants.PROJECT, options.getOdpsProject());
String partitionSpec = options.getOdpsPartitionSpec();
if (partitionSpec != null) {
conf.set(OdpsConstants.PARTITION_SPEC, partitionSpec);
}
setMapperClass(OdpsExportMapper.class);
}
super.configureInputFormat(job, tableName, tableClassName, splitByCol);
if (!isHCatJob && options.getOdpsTable() == null) {
FileInputFormat.addInputPath(job, getInputPath());
}
}
TestCLI.java 文件源码
项目:hadoop
阅读 28
收藏 0
点赞 0
评论 0
@Test
public void testListAttemptIdsWithInvalidInputs() throws Exception {
JobID jobId = JobID.forName(jobIdStr);
Cluster mockCluster = mock(Cluster.class);
Job job = mock(Job.class);
CLI cli = spy(new CLI());
doReturn(mockCluster).when(cli).createCluster();
when(mockCluster.getJob(jobId)).thenReturn(job);
int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids",
jobIdStr, "JOB_SETUP", "running" });
int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids",
jobIdStr, "JOB_CLEANUP", "running" });
int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids",
jobIdStr, "REDUCE", "complete" });
assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1,
retCode_JOB_SETUP);
assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1,
retCode_JOB_CLEANUP);
assertEquals("complete is an invalid input,exit code should be -1", -1,
retCode_invalidTaskState);
}
TestJobCounters.java 文件源码
项目:hadoop
阅读 27
收藏 0
点赞 0
评论 0
@Test
public void testNewCounterC() throws Exception {
final Job job = createJob();
final Configuration conf = job.getConfiguration();
conf.setInt(JobContext.IO_SORT_FACTOR, 3);
createWordsFile(inFiles[3], conf);
createWordsFile(inFiles[4], conf);
long inputSize = 0;
inputSize += getFileSize(inFiles[0]);
inputSize += getFileSize(inFiles[1]);
inputSize += getFileSize(inFiles[2]);
inputSize += getFileSize(inFiles[3]);
inputSize += getFileSize(inFiles[4]);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
job, IN_DIR);
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
job, new Path(OUT_DIR, "outputN2"));
assertTrue(job.waitForCompletion(true));
final Counters c1 = Counters.downgrade(job.getCounters());
validateCounters(c1, 122880, 25600, 102400);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
DataDrivenDBInputFormat.java 文件源码
项目:hadoop
阅读 27
收藏 0
点赞 0
评论 0
/** Note that the "orderBy" column is called the "splitBy" in this version.
* We reuse the same field, but it's not strictly ordering it -- just partitioning
* the results.
*/
public static void setInput(Job job,
Class<? extends DBWritable> inputClass,
String tableName,String conditions,
String splitBy, String... fieldNames) {
DBInputFormat.setInput(job, inputClass, tableName, conditions, splitBy, fieldNames);
job.setInputFormatClass(DataDrivenDBInputFormat.class);
}
ValueAggregatorJob.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
public static JobControl createValueAggregatorJobs(String args[],
Class<? extends ValueAggregatorDescriptor>[] descriptors)
throws IOException {
JobControl theControl = new JobControl("ValueAggregatorJobs");
ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>();
Configuration conf = new Configuration();
if (descriptors != null) {
conf = setAggregatorDescriptors(descriptors);
}
Job job = createValueAggregatorJob(conf, args);
ControlledJob cjob = new ControlledJob(job, dependingJobs);
theControl.addJob(cjob);
return theControl;
}
SecondarySort.java 文件源码
项目:hadoop
阅读 26
收藏 0
点赞 0
评论 0
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: secondarysort <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "secondary sort");
job.setJarByClass(SecondarySort.class);
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
// group and partition by the first int in the pair
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(FirstGroupingComparator.class);
// the map output is IntPair, IntWritable
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
// the reduce output is Text, IntWritable
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Total.java 文件源码
项目:DocIT
阅读 36
收藏 0
点赞 0
评论 0
public static void total(String name, String in, String out)
throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set(QUERIED_NAME, name);
Job job = Job.getInstance(new Cluster(conf), conf);
job.setJarByClass(Total.class);
// in
if (!in.endsWith("/"))
in = in.concat("/");
in = in.concat("employees");
SequenceFileInputFormat.addInputPath(job, new Path(in));
job.setInputFormatClass(SequenceFileInputFormat.class);
// map
job.setMapperClass(TotalMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
// reduce
job.setCombinerClass(TotalReducer.class);
job.setReducerClass(TotalReducer.class);
// out
SequenceFileOutputFormat.setOutputPath(job, new Path(out));
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.waitForCompletion(true);
}
InitDriver.java 文件源码
项目:LDA
阅读 30
收藏 0
点赞 0
评论 0
public static void run(Configuration conf, Path[] inputPath, Path outputPath) throws IOException, ClassNotFoundException, InterruptedException {
String jobName = "init matrix";
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(twoDimensionIndexWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(twoDimensionIndexWritable.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(InitMapper.class);
job.setReducerClass(InitReducer.class);
job.setNumReduceTasks(1);
for(Path path : inputPath) {
FileInputFormat.addInputPath(job, path);
}
Path output = new Path(outputPath, "initDir");
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(LDADriver.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Init failed");
}
}
WALPlayer.java 文件源码
项目:ditb
阅读 25
收藏 0
点赞 0
评论 0
@Override
public int run(String[] args) throws Exception {
String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
if (otherArgs.length < 2) {
usage("Wrong number of arguments: " + otherArgs.length);
System.exit(-1);
}
Job job = createSubmittableJob(otherArgs);
return job.waitForCompletion(true) ? 0 : 1;
}
TestTableInputFormatScanBase.java 文件源码
项目:ditb
阅读 24
收藏 0
点赞 0
评论 0
/**
* Tests a MR scan using specific start and stop rows.
*
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
protected void testScan(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
String jobName = "Scan" + (start != null ? start.toUpperCase() : "Empty") +
"To" + (stop != null ? stop.toUpperCase() : "Empty");
LOG.info("Before map/reduce startup - job " + jobName);
Configuration c = new Configuration(TEST_UTIL.getConfiguration());
Scan scan = new Scan();
scan.addFamily(INPUT_FAMILY);
if (start != null) {
scan.setStartRow(Bytes.toBytes(start));
}
c.set(KEY_STARTROW, start != null ? start : "");
if (stop != null) {
scan.setStopRow(Bytes.toBytes(stop));
}
c.set(KEY_LASTROW, last != null ? last : "");
LOG.info("scan before: " + scan);
Job job = new Job(c, jobName);
TableMapReduceUtil.initTableMapperJob(
Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
job.setReducerClass(ScanReducer.class);
job.setNumReduceTasks(1); // one to get final "first" and "last" key
FileOutputFormat.setOutputPath(job,
new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
LOG.info("Started " + job.getJobName());
assertTrue(job.waitForCompletion(true));
LOG.info("After map/reduce completion - job " + jobName);
}
TableMapReduceUtil.java 文件源码
项目:ditb
阅读 30
收藏 0
点赞 0
评论 0
/**
* Use this before submitting a TableMap job. It will appropriately set up
* the job.
*
* @param table The table name to read from.
* @param scan The scan instance with the columns, time range etc.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @throws IOException When setting up the details fails.
*/
public static void initTableMapperJob(TableName table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
Job job) throws IOException {
initTableMapperJob(table.getNameAsString(),
scan,
mapper,
outputKeyClass,
outputValueClass,
job,
true);
}
ActiveUserRunner.java 文件源码
项目:big_data
阅读 32
收藏 0
点赞 0
评论 0
/**
* 初始化scan集合
*
* @param job
* @return
*/
private List<Scan> initScans(Job job) {
Configuration conf = job.getConfiguration();
// 获取运行时间: yyyy-MM-dd
String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
long startDate = TimeUtil.parseString2Long(date);
long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;
Scan scan = new Scan();
// 定义hbase扫描的开始rowkey和结束rowkey
scan.setStartRow(Bytes.toBytes("" + startDate));
scan.setStopRow(Bytes.toBytes("" + endDate));
FilterList filterList = new FilterList();
// 定义mapper中需要获取的列名
String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
};
filterList.addFilter(this.getColumnFilter(columns));
scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
scan.setFilter(filterList);
return Lists.newArrayList(scan);
}
TestBinaryTokenFile.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
private void setupBinaryTokenFile(Job job) {
// Credentials in the job will not have delegation tokens
// because security is disabled. Fetch delegation tokens
// and store in binary token file.
createBinaryTokenFile(job.getConfiguration());
job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY,
binaryTokenFileName.toString());
// NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY
// key now gets deleted from config,
// so it's not accessible in the job's config. So,
// we use another key to pass the file name into the job configuration:
job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME,
binaryTokenFileName.toString());
}
TestCLI.java 文件源码
项目:hadoop
阅读 25
收藏 0
点赞 0
评论 0
@Test
public void testListAttemptIdsWithValidInput() throws Exception {
JobID jobId = JobID.forName(jobIdStr);
Cluster mockCluster = mock(Cluster.class);
Job job = mock(Job.class);
CLI cli = spy(new CLI());
doReturn(mockCluster).when(cli).createCluster();
when(job.getTaskReports(TaskType.MAP)).thenReturn(
getTaskReports(jobId, TaskType.MAP));
when(job.getTaskReports(TaskType.REDUCE)).thenReturn(
getTaskReports(jobId, TaskType.REDUCE));
when(mockCluster.getJob(jobId)).thenReturn(job);
int retCode_MAP = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
"MAP", "running" });
// testing case insensitive behavior
int retCode_map = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
"map", "running" });
int retCode_REDUCE = cli.run(new String[] { "-list-attempt-ids", jobIdStr,
"REDUCE", "running" });
int retCode_completed = cli.run(new String[] { "-list-attempt-ids",
jobIdStr, "REDUCE", "completed" });
assertEquals("MAP is a valid input,exit code should be 0", 0, retCode_MAP);
assertEquals("map is a valid input,exit code should be 0", 0, retCode_map);
assertEquals("REDUCE is a valid input,exit code should be 0", 0,
retCode_REDUCE);
assertEquals(
"REDUCE and completed are a valid inputs to -list-attempt-ids,exit code should be 0",
0, retCode_completed);
verify(job, times(2)).getTaskReports(TaskType.MAP);
verify(job, times(2)).getTaskReports(TaskType.REDUCE);
}
MultiFileWordCount.java 文件源码
项目:hadoop
阅读 41
收藏 0
点赞 0
评论 0
public int run(String[] args) throws Exception {
if(args.length < 2) {
printUsage();
return 2;
}
Job job = Job.getInstance(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClass(MultiFileWordCount.class);
//set the InputFormat of the job to our InputFormat
job.setInputFormatClass(MyInputFormat.class);
// the keys are words (strings)
job.setOutputKeyClass(Text.class);
// the values are counts (ints)
job.setOutputValueClass(IntWritable.class);
//use the defined mapper
job.setMapperClass(MapClass.class);
//use the WordCount Reducer
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}