当前日志采样格式为如下,请编写MapReduce计算第四列每个元素出现的个数

发布于 2020-01-10 22:27:08
关注者
1
被浏览
1281
1 个回答
  • 面试哥
    面试哥 2020-01-10
    为面试而生,有面试问题,就找面试哥。

    a,b,c,d

    a,s,d,f

    d,f,g,c 就如此格式,

    代码如下,比wordcount还要简单一点,代码差不多的

     

    package make.hadoop.com.four_column;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class four_column extends Configured implements Tool {
    	// 1、自己的map类
    	// 2、继承mapper类,<LongWritable, Text, Text,
    	// IntWritable>输入的key,输入的value,输出的key,输出的value
    	public static class MyMapper extends
    			Mapper<LongWritable, Text, Text, IntWritable> {
    		private IntWritable MapOutputkey = new IntWritable(1);
    		private Text MapOutputValue = new Text();
    
    	@Override
    	protected void map(LongWritable key, Text value, Context context)
    			throws IOException, InterruptedException {
     
    		String strs = value.toString();
    		// 分割数据
    		String str_four = strs.split(",")[3];
     
    		MapOutputValue.set(str_four);
    		System.out.println(str_four);
    		context.write(MapOutputValue, MapOutputkey);
     
    	}
    }
    // 2、自己的reduce类,这里的输入就是map方法的输出
    public static class MyReduce extends
    		Reducer<Text, IntWritable, Text, IntWritable> {
     
    	IntWritable countvalue = new IntWritable(1);
     
    	@Override
    	// map类的map方法的数据输入到reduce类的group方法中,得到<text,it(1,1)>,再将这个数据输入到reduce方法中
    	protected void reduce(Text inputkey, Iterable<IntWritable> inputvalue,
    			Context context) throws IOException, InterruptedException {
     
    		int sum = 0;
     
    		for (IntWritable i : inputvalue) {
    			System.out.println(i.get());
    			sum = sum + i.get();
    		}
    		// System.out.println("key: "+inputkey + "...."+sum);
    		countvalue.set(sum);
    		context.write(inputkey, countvalue);
    	}
    }
    // 3运行类,run方法,在测试的时候使用main函数,调用这个类的run方法来运行
     
    /**
     * param args 参数是接受main方得到的参数,在run中使用
     */
    public int run(String[] args) throws Exception {
     
    	Configuration conf = new Configuration();
     
    	Job job = Job.getInstance(this.getConf(), "four_column");
     
    	// set mainclass
    	job.setJarByClass(four_column.class);
     
    	// set mapper
    	job.setMapperClass(MyMapper.class);
    	job.setMapOutputKeyClass(Text.class);
    	job.setMapOutputValueClass(IntWritable.class);
     
    	// set reducer
    	job.setReducerClass(MyReduce.class);
    	job.setOutputKeyClass(Text.class);
    	job.setOutputValueClass(IntWritable.class);
     
    	// set path
    	Path inpath = new Path(args[0]);
    	FileInputFormat.setInputPaths(job, inpath);
    	Path outpath = new Path(args[1]);
    	FileOutputFormat.setOutputPath(job, outpath);
    	FileSystem fs = FileSystem.get(conf);
    	// 存在路径就删除
    	if (fs.exists(outpath)) {
    		fs.delete(outpath, true);
    	}
    	job.setNumReduceTasks(1);
     
    	boolean status = job.waitForCompletion(true);
     
    	if (!status) {
    		System.err.println("the job is error!!");
    	}
     
    	return status ? 0 : 1;
     
    }
    public static void main(String[] args) throws IOException,
    		ClassNotFoundException, InterruptedException {
     
    	Configuration conf = new Configuration();
     
    	int atatus;
    	try {
    		atatus = ToolRunner.run(conf, new four_column(), args);
    		System.exit(atatus);
    	} catch (Exception e) {
    		e.printStackTrace();
    	}
     
    }
    }
    

     

知识点
面圈网VIP题库

面圈网VIP题库全新上线,海量真题题库资源。 90大类考试,超10万份考试真题开放下载啦

去下载看看