@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmean <in> <out>");
return 0;
}
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word mean");
job.setJarByClass(WordMean.class);
job.setMapperClass(WordMeanMapper.class);
job.setCombinerClass(WordMeanReducer.class);
job.setReducerClass(WordMeanReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
java类org.apache.hadoop.io.LongWritable的实例源码
WordMean.java 文件源码
项目:hadoop
阅读 27
收藏 0
点赞 0
评论 0
TestBadRecords.java 文件源码
项目:hadoop
阅读 20
收藏 0
点赞 0
评论 0
public void map(LongWritable key, Text val,
OutputCollector<LongWritable, Text> output, Reporter reporter)
throws IOException {
String str = val.toString();
LOG.debug("MAP key:" +key +" value:" + str);
if(MAPPER_BAD_RECORDS.get(0).equals(str)) {
LOG.warn("MAP Encountered BAD record");
System.exit(-1);
}
else if(MAPPER_BAD_RECORDS.get(1).equals(str)) {
LOG.warn("MAP Encountered BAD record");
throw new RuntimeException("Bad record "+str);
}
else if(MAPPER_BAD_RECORDS.get(2).equals(str)) {
try {
LOG.warn("MAP Encountered BAD record");
Thread.sleep(15*60*1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
output.collect(key, val);
}
TestIPC.java 文件源码
项目:hadoop
阅读 30
收藏 0
点赞 0
评论 0
@Test(timeout=60000)
public void testStandAloneClient() throws IOException {
Client client = new Client(LongWritable.class, conf);
InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10);
try {
client.call(new LongWritable(RANDOM.nextLong()),
address, null, null, 0, conf);
fail("Expected an exception to have been thrown");
} catch (IOException e) {
String message = e.getMessage();
String addressText = address.getHostName() + ":" + address.getPort();
assertTrue("Did not find "+addressText+" in "+message,
message.contains(addressText));
Throwable cause=e.getCause();
assertNotNull("No nested exception in "+e,cause);
String causeText=cause.getMessage();
assertTrue("Did not find " + causeText + " in " + message,
message.contains(causeText));
}
}
MM2.java 文件源码
项目:MRNMF
阅读 21
收藏 0
点赞 0
评论 0
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
int k = context.getConfiguration().getInt("k", -1);
double[] result = new double[k];
for (Text value : values) {
String[] ai = value.toString().split(",");
for (int j = 0; j < k; j++) {
result[j] += Double.parseDouble(ai[j]);
}
}
StringBuilder res = new StringBuilder(prefix);
for (int i = 0; i < k; i++) {
res.append(result[i]);
if (i < k - 1) {
res.append(",");
}
}
context.write(key, new Text(res.toString()));
}
TestLocalRunner.java 文件源码
项目:hadoop
阅读 25
收藏 0
点赞 0
评论 0
public void map(LongWritable key, Text val, Context c)
throws IOException, InterruptedException {
// Create a whole bunch of objects.
List<Integer> lst = new ArrayList<Integer>();
for (int i = 0; i < 20000; i++) {
lst.add(new Integer(i));
}
// Actually use this list, to ensure that it isn't just optimized away.
int sum = 0;
for (int x : lst) {
sum += x;
}
// throw away the list and run a GC.
lst = null;
System.gc();
c.write(new LongWritable(sum), val);
}
TestDFSIO.java 文件源码
项目:hadoop
阅读 20
收藏 0
点赞 0
评论 0
private void runIOTest(
Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
Path outputDir) throws IOException {
JobConf job = new JobConf(config, TestDFSIO.class);
FileInputFormat.setInputPaths(job, getControlDir(config));
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(mapperClass);
job.setReducerClass(AccumulatingReducer.class);
FileOutputFormat.setOutputPath(job, outputDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
TestIPC.java 文件源码
项目:hadoop-oss
阅读 27
收藏 0
点赞 0
评论 0
@Override
public void run() {
for (int i = 0; i < count; i++) {
try {
final long param = RANDOM.nextLong();
LongWritable value = call(client, param, server, conf);
if (value.get() != param) {
LOG.fatal("Call failed!");
failed = true;
break;
}
} catch (Exception e) {
LOG.fatal("Caught: " + StringUtils.stringifyException(e));
failed = true;
}
}
}
TestIPC.java 文件源码
项目:hadoop-oss
阅读 24
收藏 0
点赞 0
评论 0
@Test(timeout=60000)
public void testStandAloneClient() throws IOException {
Client client = new Client(LongWritable.class, conf);
InetSocketAddress address = new InetSocketAddress("127.0.0.1", 10);
try {
call(client, RANDOM.nextLong(), address, conf);
fail("Expected an exception to have been thrown");
} catch (IOException e) {
String message = e.getMessage();
String addressText = address.getHostName() + ":" + address.getPort();
assertTrue("Did not find "+addressText+" in "+message,
message.contains(addressText));
Throwable cause=e.getCause();
assertNotNull("No nested exception in "+e,cause);
String causeText=cause.getMessage();
assertTrue("Did not find " + causeText + " in " + message,
message.contains(causeText));
} finally {
client.stop();
}
}
TestIPC.java 文件源码
项目:hadoop-oss
阅读 29
收藏 0
点赞 0
评论 0
@Test(timeout=60000)
public void testIpcConnectTimeout() throws IOException {
// start server
Server server = new TestServer(1, true);
InetSocketAddress addr = NetUtils.getConnectAddress(server);
//Intentionally do not start server to get a connection timeout
// start client
Client.setConnectTimeout(conf, 100);
Client client = new Client(LongWritable.class, conf);
// set the rpc timeout to twice the MIN_SLEEP_TIME
try {
call(client, new LongWritable(RANDOM.nextLong()), addr,
MIN_SLEEP_TIME * 2, conf);
fail("Expected an exception to have been thrown");
} catch (SocketTimeoutException e) {
LOG.info("Get a SocketTimeoutException ", e);
}
client.stop();
}
GenericMRLoadGenerator.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
public List<InputSplit> getSplits(JobContext job)
throws IOException {
Configuration conf = job.getConfiguration();
Path src = new Path(conf.get(INDIRECT_INPUT_FILE, null));
FileSystem fs = src.getFileSystem(conf);
List<InputSplit> splits = new ArrayList<InputSplit>();
LongWritable key = new LongWritable();
Text value = new Text();
for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, conf);
sl.next(key, value);) {
splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
}
return splits;
}
TestChainErrors.java 文件源码
项目:hadoop
阅读 23
收藏 0
点赞 0
评论 0
/**
* Tests reducer consuming output.
*
* @throws Exception
*/
public void testChainReduceNoOuptut() throws Exception {
Configuration conf = createJobConf();
String expectedOutput = "";
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input);
job.setJobName("chain");
ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class,
LongWritable.class, Text.class, null);
ChainReducer.setReducer(job, ConsumeReduce.class, LongWritable.class,
Text.class, LongWritable.class, Text.class, null);
ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class,
LongWritable.class, Text.class, null);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil
.readOutput(outDir, conf));
}
TestChainErrors.java 文件源码
项目:hadoop
阅读 22
收藏 0
点赞 0
评论 0
/**
* Tests one of the mappers throwing exception.
*
* @throws Exception
*/
public void testChainFail() throws Exception {
Configuration conf = createJobConf();
Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
job.setJobName("chain");
ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class,
LongWritable.class, Text.class, null);
ChainMapper.addMapper(job, FailMap.class, LongWritable.class, Text.class,
IntWritable.class, Text.class, null);
ChainMapper.addMapper(job, Mapper.class, IntWritable.class, Text.class,
LongWritable.class, Text.class, null);
job.waitForCompletion(true);
assertTrue("Job Not failed", !job.isSuccessful());
}
TestFileSystem.java 文件源码
项目:hadoop
阅读 49
收藏 0
点赞 0
评论 0
public static void seekTest(FileSystem fs, boolean fastCheck)
throws Exception {
fs.delete(READ_DIR, true);
JobConf job = new JobConf(conf, TestFileSystem.class);
job.setBoolean("fs.test.fastCheck", fastCheck);
FileInputFormat.setInputPaths(job,CONTROL_DIR);
job.setInputFormat(SequenceFileInputFormat.class);
job.setMapperClass(SeekMapper.class);
job.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(job, READ_DIR);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
JobClient.runJob(job);
}
TestJoinTupleWritable.java 文件源码
项目:hadoop
阅读 20
收藏 0
点赞 0
评论 0
public void testNestedIterable() throws Exception {
Random r = new Random();
Writable[] writs = {
new BooleanWritable(r.nextBoolean()),
new FloatWritable(r.nextFloat()),
new FloatWritable(r.nextFloat()),
new IntWritable(r.nextInt()),
new LongWritable(r.nextLong()),
new BytesWritable("dingo".getBytes()),
new LongWritable(r.nextLong()),
new IntWritable(r.nextInt()),
new BytesWritable("yak".getBytes()),
new IntWritable(r.nextInt())
};
TupleWritable sTuple = makeTuple(writs);
assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
CompressionEmulationUtil.java 文件源码
项目:hadoop
阅读 22
收藏 0
点赞 0
评论 0
/**
* Emits random words sequence of desired size. Note that the desired output
* size is passed as the value parameter to this map.
*/
@Override
public void map(NullWritable key, LongWritable value, Context context)
throws IOException, InterruptedException {
//TODO Control the extra data written ..
//TODO Should the key\tvalue\n be considered for measuring size?
// Can counters like BYTES_WRITTEN be used? What will be the value of
// such counters in LocalJobRunner?
for (long bytes = value.get(); bytes > 0;) {
String randomKey = rtg.getRandomWord();
String randomValue = rtg.getRandomWord();
context.write(new Text(randomKey), new Text(randomValue));
bytes -= (randomValue.getBytes(charsetUTF8).length +
randomKey.getBytes(charsetUTF8).length);
}
}
TF.java 文件源码
项目:Wikipedia-Index
阅读 25
收藏 0
点赞 0
评论 0
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String doc = value.toString();
String text = slice(doc, "<text", "</text>", true);
if (text.length() < 1) return;
char txt[] = text.toLowerCase().toCharArray();
for (int i = 0; i < txt.length; ++i) {
if (!((txt[i] >= 'a' && txt[i] <= 'z') || (txt[i] >= 'A' && txt[i] <= 'Z')))
txt[i] = ' ';
}
String id = slice(doc, "<id>", "</id>", false);
if (id.length() < 1) return;
StringTokenizer itr = new StringTokenizer(String.valueOf(txt));
int sum = itr.countTokens();
while (itr.hasMoreTokens()) {
String s = itr.nextToken();
word.set(id + '-' + s);
IntWritable tmp[] = {new IntWritable(sum), new IntWritable(1)};
IntArrayWritable temp = new IntArrayWritable(tmp);
context.write(word, temp);
}
}
CombineShimRecordReader.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 30
收藏 0
点赞 0
评论 0
/**
* Actually instantiate the user's chosen RecordReader implementation.
*/
@SuppressWarnings("unchecked")
private void createChildReader() throws IOException, InterruptedException {
LOG.debug("ChildSplit operates on: " + split.getPath(index));
Configuration conf = context.getConfiguration();
// Determine the file format we're reading.
Class rrClass;
if (ExportJobBase.isSequenceFiles(conf, split.getPath(index))) {
rrClass = SequenceFileRecordReader.class;
} else {
rrClass = LineRecordReader.class;
}
// Create the appropriate record reader.
this.rr = (RecordReader<LongWritable, Object>)
ReflectionUtils.newInstance(rrClass, conf);
}
TestDBInputFormat.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
/**
* test DBInputFormat class. Class should split result for chunks
* @throws Exception
*/
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
JobConf configuration = new JobConf();
setupDriver(configuration);
DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
format.setConf(configuration);
format.setConf(configuration);
DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
Reporter reporter = mock(Reporter.class);
RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(
splitter, configuration, reporter);
configuration.setInt(MRJobConfig.NUM_MAPS, 3);
InputSplit[] lSplits = format.getSplits(configuration, 3);
assertEquals(5, lSplits[0].getLength());
assertEquals(3, lSplits.length);
// test reader .Some simple tests
assertEquals(LongWritable.class, reader.createKey().getClass());
assertEquals(0, reader.getPos());
assertEquals(0, reader.getProgress(), 0.001);
reader.close();
}
MatrixUpdater.java 文件源码
项目:MRNMF
阅读 24
收藏 0
点赞 0
评论 0
public void map(LongWritable key, Text value, Context context)
throws IOException, NumberFormatException, InterruptedException {
String[] vals = value.toString().split("\t");
if (!vals[1].contains(":")) {
vals[1] = "m:" + vals[1];
}
context.write(new LongWritable(Long.parseLong(vals[0])), new Text(vals[1]));
}
MatrixUpdater.java 文件源码
项目:MRNMF
阅读 22
收藏 0
点赞 0
评论 0
public void reduce(LongWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
boolean sqrt = context.getConfiguration().getBoolean("sqrt", false);
StringBuilder result = new StringBuilder();
String[] arrayNames = new String[] {"m", "a", "b"};
Map<String, double[]> arrays = new HashMap<>();
for (String arrayName : arrayNames) {
arrays.put(arrayName, new double[k]);
}
for (Text value : values) {
String[] keyVal = value.toString().split(":");
String[] xi = keyVal[1].split(",");
for (int j = 0; j < k; j++) {
arrays.get(keyVal[0])[j] = Double.parseDouble(xi[j]);
}
}
for (int j = 0; j < k; j++) {
double frac = arrays.get("a")[j] / arrays.get("b")[j];
if (sqrt) {
frac = Math.sqrt(frac);
}
result.append(arrays.get("m")[j] * frac);
if (j != k - 1)
result.append(",");
}
context.write(key, new Text(result.toString()));
}
DelimitedAndFixedWidthInputFormat.java 文件源码
项目:Hydrograph
阅读 22
收藏 0
点赞 0
评论 0
@Override
public RecordReader<LongWritable, Text> getRecordReader(
InputSplit genericSplit, JobConf job, Reporter reporter)
throws IOException {
reporter.setStatus(genericSplit.toString());
return new DelimitedAndFixedWidthRecordReader(job,
(FileSplit) genericSplit);
}
TestValueIterReset.java 文件源码
项目:hadoop
阅读 33
收藏 0
点赞 0
评论 0
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
IntWritable outKey = new IntWritable();
IntWritable outValue = new IntWritable();
for (int j = 0; j < NUM_TESTS; j++) {
for (int i = 0; i < NUM_VALUES; i++) {
outKey.set(j);
outValue.set(i);
context.write(outKey, outValue);
}
}
}
FailJob.java 文件源码
项目:hadoop
阅读 26
收藏 0
点赞 0
评论 0
public void map(LongWritable key, Text value, Context context
) throws IOException, InterruptedException {
if (context.getConfiguration().getBoolean(FAIL_MAP, true)) {
throw new RuntimeException("Intentional map failure");
}
context.write(key, NullWritable.get());
}
BaileyBorweinPlouffe.java 文件源码
项目:hadoop
阅读 27
收藏 0
点赞 0
评论 0
/** Create and setup a job */
private static Job createJob(String name, Configuration conf
) throws IOException {
final Job job = Job.getInstance(conf, NAME + "_" + name);
final Configuration jobconf = job.getConfiguration();
job.setJarByClass(BaileyBorweinPlouffe.class);
// setup mapper
job.setMapperClass(BbpMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// setup reducer
job.setReducerClass(BbpReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setNumReduceTasks(1);
// setup input
job.setInputFormatClass(BbpInputFormat.class);
// disable task timeout
jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
// do not use speculative execution
jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
return job;
}
TextImportMapper.java 文件源码
项目:aliyun-maxcompute-data-collectors
阅读 18
收藏 0
点赞 0
评论 0
@Override
public void map(LongWritable key, SqoopRecord val, Context context)
throws IOException, InterruptedException {
try {
// Loading of LOBs was delayed until we have a Context.
val.loadLargeObjects(lobLoader);
} catch (SQLException sqlE) {
throw new IOException(sqlE);
}
outkey.set(val.toString());
context.write(outkey, NullWritable.get());
}
TestLineRecordReader.java 文件源码
项目:hadoop
阅读 26
收藏 0
点赞 0
评论 0
public ArrayList<String> readRecords(URL testFileUrl, int splitSize)
throws IOException {
// Set up context
File testFile = new File(testFileUrl.getFile());
long testFileSize = testFile.length();
Path testFilePath = new Path(testFile.getAbsolutePath());
Configuration conf = new Configuration();
conf.setInt("io.file.buffer.size", 1);
// Gather the records returned by the record reader
ArrayList<String> records = new ArrayList<String>();
long offset = 0;
LongWritable key = new LongWritable();
Text value = new Text();
while (offset < testFileSize) {
FileSplit split =
new FileSplit(testFilePath, offset, splitSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split);
while (reader.next(key, value)) {
records.add(value.toString());
}
offset += splitSize;
}
return records;
}
TestDFSIO.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
@SuppressWarnings("deprecation")
private void createControlFile(FileSystem fs,
long nrBytes, // in bytes
int nrFiles
) throws IOException {
LOG.info("creating control file: "+nrBytes+" bytes, "+nrFiles+" files");
Path controlDir = getControlDir(config);
fs.delete(controlDir, true);
for(int i=0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(controlDir, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, config, controlFile,
Text.class, LongWritable.class,
CompressionType.NONE);
writer.append(new Text(name), new LongWritable(nrBytes));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: "+nrFiles+" files");
}
QuasiMonteCarlo.java 文件源码
项目:hadoop
阅读 29
收藏 0
点赞 0
评论 0
/**
* Reduce task done, write output to a file.
*/
@Override
public void cleanup(Context context) throws IOException {
//write output to a file
Configuration conf = context.getConfiguration();
Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
Path outFile = new Path(outDir, "reduce-out");
FileSystem fileSys = FileSystem.get(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, LongWritable.class, LongWritable.class,
CompressionType.NONE);
writer.append(new LongWritable(numInside), new LongWritable(numOutside));
writer.close();
}
TestJoinProperties.java 文件源码
项目:hadoop
阅读 24
收藏 0
点赞 0
评论 0
private static SequenceFile.Writer[] createWriters(Path testdir,
Configuration conf, int srcs, Path[] src) throws IOException {
for (int i = 0; i < srcs; ++i) {
src[i] = new Path(testdir, Integer.toString(i + 10, 36));
}
SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
for (int i = 0; i < srcs - 1; ++i) {
out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[i], IntWritable.class, IntWritable.class);
}
out[srcs - 1] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[srcs - 1], IntWritable.class, LongWritable.class);
return out;
}
FixedLengthInputFormat.java 文件源码
项目:hadoop
阅读 23
收藏 0
点赞 0
评论 0
@Override
public RecordReader<LongWritable, BytesWritable>
createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
int recordLength = getRecordLength(context.getConfiguration());
if (recordLength <= 0) {
throw new IOException("Fixed record length " + recordLength
+ " is invalid. It should be set to a value greater than zero");
}
return new FixedLengthRecordReader(recordLength);
}