配置 ubuntu14.04 伪分布式 hadoop1.04
wordcount入门程序, 摘自hadoop基础教程
import java.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
     //map操作
	public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
		
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();
		
		public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
			String[] words = value.toString().split(" ");
			for(String str:words) {
				word.set(str);
				context.write(word, one);
			}
		}
	}
	
        //reduce操作
	public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{
			int total = 0;
			for(IntWritable val : values) {
				total++;
			}
			context.write(key, new IntWritable(total));
		}
	}
	public static void main(String[] args) {
		try{
			//创建Configuration对象,用于设置其他选项
			Configuration conf = new Configuration();
			//创建作业对象
			Job job = new Job(conf, "WordCount");
			//设置作业jarfile中主类名字
			job.setJarByClass(WordCount.class);
			//设置mapper类
			job.setMapperClass(WordCountMapper.class);
			//设置reduce类
			job.setReducerClass(WordCountReducer.class);
			//设置输出的类型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(IntWritable.class);
			//设置输入和输出文件路径
			FileInputFormat.addInputPath(job, new Path(args[0]));
			FileOutputFormat.setOutputPath(job, new Path(args[1]));
			//等待程序退出
			System.exit(job.waitForCompletion(true)?0:1);
		}catch(Exception e) {
			//system.out.println("出错");
		}
	}
}
原文:http://www.cnblogs.com/xingxing1024/p/7465301.html