一、分析运行wordcount程序屏幕上打印信息
##运行wordcount单词频率统计程序,基于输出输出路径。
[[email protected] hadoop-1.2.1]$ hadoop jar hadoop-examples-1.2.1.jar wordcount /wc/mininput /wc/minoutput
##告知输入路径下有几个文件需要处理
16/03/14 05:08:59 INFO input.FileInputFormat: Total input paths to process : 2
##告知本地的snappy压缩算法的库是可用的,默认情况下,Linux是没有相应的库的
16/03/14 05:08:59 INFO util.NativeCodeLoader: Loaded the native-hadoop library
##加载本地的Hadoop库文件,默认情况下,在hadoop1.x中存放在$HADOOP_HOME/c++/Linux-amd64-64/lib(针对64bitLinux操作系统)
16/03/14 05:08:59 WARN snappy.LoadSnappy: Snappy native library not loaded
##运行Job的相关进度信息
###加载本地的snappy压缩算法的库,默认情况下,Linux是没有相应的库的,当没有配置时不显示
###运行Job的ID
16/03/14 05:08:59 INFO mapred.JobClient: Running job: job_201603140438_0001
###Job运行时,Map task 和 Reduce task的运行进度
16/03/14 05:09:00 INFO mapred.JobClient: map 0% reduce 0%
16/03/14 05:09:12 INFO mapred.JobClient: map 100% reduce 0%
16/03/14 05:09:21 INFO mapred.JobClient: map 100% reduce 33%
16/03/14 05:09:23 INFO mapred.JobClient: map 100% reduce 100%
###Job运行完成
16/03/14 05:09:24 INFO mapred.JobClient: Job complete: job_201603140438_0001
##显示整个Job运行过程中,各类计算器Counter的值
##总共有29中计数器Counter
16/03/14 05:09:24 INFO mapred.JobClient: Counters: 29
16/03/14 05:09:24 INFO mapred.JobClient: Map-Reduce Framework ##计数器个数:16
16/03/14 05:09:24 INFO mapred.JobClient: Spilled Records=66
16/03/14 05:09:24 INFO mapred.JobClient: Map output materialized bytes=440
16/03/14 05:09:24 INFO mapred.JobClient: Reduce input records=33
16/03/14 05:09:24 INFO mapred.JobClient: Virtual memory (bytes) snapshot=5798617088
16/03/14 05:09:24 INFO mapred.JobClient: Map input records=18
16/03/14 05:09:24 INFO mapred.JobClient: SPLIT_RAW_BYTES=244
16/03/14 05:09:24 INFO mapred.JobClient: Map output bytes=395
16/03/14 05:09:24 INFO mapred.JobClient: Reduce shuffle bytes=440
16/03/14 05:09:24 INFO mapred.JobClient: Physical memory (bytes) snapshot=411492352
16/03/14 05:09:24 INFO mapred.JobClient: Reduce input groups=33
16/03/14 05:09:24 INFO mapred.JobClient: Combine output records=33
16/03/14 05:09:24 INFO mapred.JobClient: Reduce output records=33
16/03/14 05:09:24 INFO mapred.JobClient: Map output records=37
16/03/14 05:09:24 INFO mapred.JobClient: Combine input records=37
16/03/14 05:09:24 INFO mapred.JobClient: CPU time spent (ms)=2100
16/03/14 05:09:24 INFO mapred.JobClient: Total committed heap usage (bytes)=337780736
16/03/14 05:09:24 INFO mapred.JobClient: File Input Format Counters ##计数器个数:1
16/03/14 05:09:24 INFO mapred.JobClient: Bytes Read=261
16/03/14 05:09:24 INFO mapred.JobClient: FileSystemCounters ##计数器个数:4
16/03/14 05:09:24 INFO mapred.JobClient: HDFS_BYTES_READ=505
16/03/14 05:09:24 INFO mapred.JobClient: FILE_BYTES_WRITTEN=164155
16/03/14 05:09:24 INFO mapred.JobClient: FILE_BYTES_READ=434
16/03/14 05:09:24 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=296
16/03/14 05:09:24 INFO mapred.JobClient: Job Counters ##计数器个数:7
16/03/14 05:09:24 INFO mapred.JobClient: Launched map tasks=2
16/03/14 05:09:24 INFO mapred.JobClient: Launched reduce tasks=1
16/03/14 05:09:24 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=10635
16/03/14 05:09:24 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
16/03/14 05:09:24 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=18759
16/03/14 05:09:24 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
16/03/14 05:09:24 INFO mapred.JobClient: Data-local map tasks=2
16/03/14 05:09:24 INFO mapred.JobClient: File Output Format Counters ##计数器个数:1
16/03/14 05:09:24 INFO mapred.JobClient: Bytes Written=296
1 package org.dragon.hadoop.mapreduce.counter;
2
3 import java.io.IOException;
4 import java.util.StringTokenizer;
5
6 import org.apache.hadoop.conf.Configuration;
7 import org.apache.hadoop.fs.Path;
8 import org.apache.hadoop.io.IntWritable;
9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.mapreduce.Job;
12 import org.apache.hadoop.mapreduce.Mapper;
13 import org.apache.hadoop.mapreduce.Reducer;
14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16 import org.apache.hadoop.util.GenericOptionsParser;
17
18 /**
19 * function:在wordcount中自定义计数器样例
20 * @author ZhuXY
21 * @time 2016-3-14 上午10:48:29
22 *
23 */
24 public class DIYCounter {
25 static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
26
27 private Text word = new Text();
28 private final static IntWritable one = new IntWritable(1);
29
30 // 快捷键alt+shift+s
31 // map方法每次只读取一行数据,换句话说就是每行启动一个map函数
32 @Override
33 protected void map(LongWritable key, Text value, Context context)
34 throws IOException, InterruptedException {
35
36 //###############################################################
37 context.getCounter("DIYCOUTER_MAP_REDUCE", "MAP_INPUT_KEYVALUES")
38 .increment(1L);
39 //###############################################################
40
41 // 获取每行数据的值
42 String lineValue = value.toString();
43
44 // 进行分割
45 StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
46
47 // 遍历
48 while (stringTokenizer.hasMoreElements()) {
49
50 // 获取每个值
51 String worldValue = stringTokenizer.nextToken();
52
53 // 设置map, 输入的key值
54 word.set(worldValue);
55 context.write(word, one); // 如果出现就出现一次,存在每行出现几次,这时候键的值一样,多个键值对
56 }
57 }
58 }
59
60 // Reducer 区域
61 static class MyReducer extends
62 Reducer<Text, IntWritable, Text, IntWritable> {
63
64 private IntWritable resultIntWritable = new IntWritable();
65
66 // 这里key还是key。但是value变成了values
67 @Override
68 protected void reduce(Text key, Iterable<IntWritable> values,
69 Context context) throws IOException, InterruptedException {
70
71 context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_INPUT_KEYVALUES")
72 .increment(1L);
73
74 // 用于累加的变量
75 int sum = 0;
76 // 循环遍历Interable
77 for (IntWritable value : values) {
78 // 累加
79 sum += value.get();
80 }
81
82 // 设置总次数
83 resultIntWritable.set(sum);
84 context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_OUTPUT_KEYVALUES")
85 .increment(1L);
86 context.write(key, resultIntWritable);
87 }
88 }
89
90 // Client 区域
91 public static void main(String[] args) throws Exception {
92
93 // 获取配置文件信息
94 Configuration configuration = new Configuration();
95
96 // 配置输入输出文件路径
97 args = new String[] {
98 "hdfs://hadoop-master.dragon.org:9000/wc/mininput",
99 "hdfs://hadoop-master.dragon.org:9000/wc/wcoutput" };
100
101 // 当命令格式不正确的时候,提示,类似于shell中的命令提示
102 String[] otherArgs = new GenericOptionsParser(configuration, args)
103 .getRemainingArgs();
104 if (otherArgs.length != 2) {
105 System.err.println("Usage: wordcount <in> <out>");
106 System.exit(2);
107 }
108
109 // 创建job。设置配置文件信息和Job名称
110 Job job = new Job(configuration, "wc");
111
112 // 1、设置Job运行的类
113 job.setJarByClass(DIYCounter.class);
114
115 // 2、设置Mapper类和Reducer类
116 job.setMapperClass(MyMapper.class);
117 job.setReducerClass(MyReducer.class);
118
119 // 3、设置输入文件的目录和输出文件目录
120 FileInputFormat.addInputPath(job, new Path(args[0]));
121 FileOutputFormat.setOutputPath(job, new Path(args[1]));
122
123 // 4、设置输出结果的key和value的类型
124 job.setOutputKeyClass(Text.class);
125 job.setOutputValueClass(IntWritable.class);
126
127 // 5、提交Job等待运行结果,并在客户端显示运行信息
128 boolean isSuccess = job.waitForCompletion(true);
129
130 // 6、结束程序
131 System.exit(isSuccess ? 0 : 1);
132 }
133 }
View计数器 Code