自动提交到hadoop系统,然后调用wordcount的任务,并下载输出的文件。
#!/bin/sh #从给定的路径获取文件列表,提交到hadoop系统,使用wordcount的功能统计单词数量 #echo "$#" if [ $# -ne 1 ]; then echo "请输入文件的路径" exit 1 fi #echo "$1" filecount=$(ls $1|wc -w) #echo $filecount if [ $filecount -lt 1 ]; then fi submitcount=0 wordsubmitlog=wordsubmitlog.log hadoopdellog=hadoopdellog.log for f in $1 ; do if [ -f $wordsubmitlog ]; then echo else #不存在文件的时候创建 touch $wordsubmitlog fi existsubmit=$(cat $wordsubmitlog|grep $f|wc -l) if [ $existsubmit -lt 1 ]; then echo "$f"|cat>>$wordsubmitlog hadoop fs -put $f /wz/wordcount/input/ #rm -f $f let submitcount=submitcount+1 fi done #有提交文件才执行 if [ $submitcount -ge 1 ]; then hadoop jar /usr/hadoop/hadoop-examples-1.2.1.jar wordcount /wz/wordcount/input /wz/wordcount/output rm -f part-r-* hadoop fs -get /wz/wordcount/output/part-r-* . #下载到当前目录 echo "`date +‘%Y-%m-%d %H:%M:%S‘`"|cat>>$hadoopdellog hadoop fs -rmr /wz/wordcount/input/*|cat>>$hadoopdellog hadoop fs -rmr /wz/wordcount/output|cat>>$hadoopdellog echo "执行完成" else echo "没有提交的文件" -- 插入 -- 30,4-18 95%
原文:http://www.cnblogs.com/ringwang/p/3634269.html