hadoop wordcount入门

时间：2017-09-01 22:53:00 阅读：245 评论：0 收藏：0 [点我收藏+]

配置 ubuntu14.04 伪分布式 hadoop1.04

wordcount入门程序，摘自hadoop基础教程

import java.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {
　　　　　//map操作
	public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
		
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();
		
		public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
			String[] words = value.toString().split(" ");
			for(String str:words) {
				word.set(str);
				context.write(word, one);
			}
		}
	}
	
        //reduce操作
	public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{
			int total = 0;
			for(IntWritable val : values) {
				total++;
			}
			context.write(key, new IntWritable(total));
		}
	}
	public static void main(String[] args) {
		try{
			//创建Configuration对象，用于设置其他选项
			Configuration conf = new Configuration();
			//创建作业对象
			Job job = new Job(conf, "WordCount");
			//设置作业jarfile中主类名字
			job.setJarByClass(WordCount.class);
			//设置mapper类
			job.setMapperClass(WordCountMapper.class);
			//设置reduce类
			job.setReducerClass(WordCountReducer.class);
			//设置输出的类型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(IntWritable.class);
			//设置输入和输出文件路径
			FileInputFormat.addInputPath(job, new Path(args[0]));
			FileOutputFormat.setOutputPath(job, new Path(args[1]));
			//等待程序退出
			System.exit(job.waitForCompletion(true)?0:1);
		}catch(Exception e) {
			//system.out.println("出错");
		}
	}
}

hadoop wordcount入门

原文：http://www.cnblogs.com/xingxing1024/p/7465301.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)