一、分析運作wordcount程式螢幕上列印資訊
##運作wordcount單詞頻率統計程式,基于輸出輸出路徑。
[[email protected] hadoop-1.2.1]$ hadoop jar hadoop-examples-1.2.1.jar wordcount /wc/mininput /wc/minoutput
##告知輸入路徑下有幾個檔案需要處理
16/03/14 05:08:59 INFO input.FileInputFormat: Total input paths to process : 2
##告知本地的snappy壓縮算法的庫是可用的,預設情況下,Linux是沒有相應的庫的
16/03/14 05:08:59 INFO util.NativeCodeLoader: Loaded the native-hadoop library
##加載本地的Hadoop庫檔案,預設情況下,在hadoop1.x中存放在$HADOOP_HOME/c++/Linux-amd64-64/lib(針對64bitLinux作業系統)
16/03/14 05:08:59 WARN snappy.LoadSnappy: Snappy native library not loaded
##運作Job的相關進度資訊
###加載本地的snappy壓縮算法的庫,預設情況下,Linux是沒有相應的庫的,當沒有配置時不顯示
###運作Job的ID
16/03/14 05:08:59 INFO mapred.JobClient: Running job: job_201603140438_0001
###Job運作時,Map task 和 Reduce task的運作進度
16/03/14 05:09:00 INFO mapred.JobClient: map 0% reduce 0%
16/03/14 05:09:12 INFO mapred.JobClient: map 100% reduce 0%
16/03/14 05:09:21 INFO mapred.JobClient: map 100% reduce 33%
16/03/14 05:09:23 INFO mapred.JobClient: map 100% reduce 100%
###Job運作完成
16/03/14 05:09:24 INFO mapred.JobClient: Job complete: job_201603140438_0001
##顯示整個Job運作過程中,各類電腦Counter的值
##總共有29中計數器Counter
16/03/14 05:09:24 INFO mapred.JobClient: Counters: 29
16/03/14 05:09:24 INFO mapred.JobClient: Map-Reduce Framework ##計數器個數:16
16/03/14 05:09:24 INFO mapred.JobClient: Spilled Records=66
16/03/14 05:09:24 INFO mapred.JobClient: Map output materialized bytes=440
16/03/14 05:09:24 INFO mapred.JobClient: Reduce input records=33
16/03/14 05:09:24 INFO mapred.JobClient: Virtual memory (bytes) snapshot=5798617088
16/03/14 05:09:24 INFO mapred.JobClient: Map input records=18
16/03/14 05:09:24 INFO mapred.JobClient: SPLIT_RAW_BYTES=244
16/03/14 05:09:24 INFO mapred.JobClient: Map output bytes=395
16/03/14 05:09:24 INFO mapred.JobClient: Reduce shuffle bytes=440
16/03/14 05:09:24 INFO mapred.JobClient: Physical memory (bytes) snapshot=411492352
16/03/14 05:09:24 INFO mapred.JobClient: Reduce input groups=33
16/03/14 05:09:24 INFO mapred.JobClient: Combine output records=33
16/03/14 05:09:24 INFO mapred.JobClient: Reduce output records=33
16/03/14 05:09:24 INFO mapred.JobClient: Map output records=37
16/03/14 05:09:24 INFO mapred.JobClient: Combine input records=37
16/03/14 05:09:24 INFO mapred.JobClient: CPU time spent (ms)=2100
16/03/14 05:09:24 INFO mapred.JobClient: Total committed heap usage (bytes)=337780736
16/03/14 05:09:24 INFO mapred.JobClient: File Input Format Counters ##計數器個數:1
16/03/14 05:09:24 INFO mapred.JobClient: Bytes Read=261
16/03/14 05:09:24 INFO mapred.JobClient: FileSystemCounters ##計數器個數:4
16/03/14 05:09:24 INFO mapred.JobClient: HDFS_BYTES_READ=505
16/03/14 05:09:24 INFO mapred.JobClient: FILE_BYTES_WRITTEN=164155
16/03/14 05:09:24 INFO mapred.JobClient: FILE_BYTES_READ=434
16/03/14 05:09:24 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=296
16/03/14 05:09:24 INFO mapred.JobClient: Job Counters ##計數器個數:7
16/03/14 05:09:24 INFO mapred.JobClient: Launched map tasks=2
16/03/14 05:09:24 INFO mapred.JobClient: Launched reduce tasks=1
16/03/14 05:09:24 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=10635
16/03/14 05:09:24 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
16/03/14 05:09:24 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=18759
16/03/14 05:09:24 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
16/03/14 05:09:24 INFO mapred.JobClient: Data-local map tasks=2
16/03/14 05:09:24 INFO mapred.JobClient: File Output Format Counters ##計數器個數:1
16/03/14 05:09:24 INFO mapred.JobClient: Bytes Written=296
1 package org.dragon.hadoop.mapreduce.counter;
2
3 import java.io.IOException;
4 import java.util.StringTokenizer;
5
6 import org.apache.hadoop.conf.Configuration;
7 import org.apache.hadoop.fs.Path;
8 import org.apache.hadoop.io.IntWritable;
9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.mapreduce.Job;
12 import org.apache.hadoop.mapreduce.Mapper;
13 import org.apache.hadoop.mapreduce.Reducer;
14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16 import org.apache.hadoop.util.GenericOptionsParser;
17
18 /**
19 * function:在wordcount中自定義計數器樣例
20 * @author ZhuXY
21 * @time 2016-3-14 上午10:48:29
22 *
23 */
24 public class DIYCounter {
25 static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
26
27 private Text word = new Text();
28 private final static IntWritable one = new IntWritable(1);
29
30 // 快捷鍵alt+shift+s
31 // map方法每次隻讀取一行資料,換句話說就是每行啟動一個map函數
32 @Override
33 protected void map(LongWritable key, Text value, Context context)
34 throws IOException, InterruptedException {
35
36 //###############################################################
37 context.getCounter("DIYCOUTER_MAP_REDUCE", "MAP_INPUT_KEYVALUES")
38 .increment(1L);
39 //###############################################################
40
41 // 擷取每行資料的值
42 String lineValue = value.toString();
43
44 // 進行分割
45 StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
46
47 // 周遊
48 while (stringTokenizer.hasMoreElements()) {
49
50 // 擷取每個值
51 String worldValue = stringTokenizer.nextToken();
52
53 // 設定map, 輸入的key值
54 word.set(worldValue);
55 context.write(word, one); // 如果出現就出現一次,存在每行出現幾次,這時候鍵的值一樣,多個鍵值對
56 }
57 }
58 }
59
60 // Reducer 區域
61 static class MyReducer extends
62 Reducer<Text, IntWritable, Text, IntWritable> {
63
64 private IntWritable resultIntWritable = new IntWritable();
65
66 // 這裡key還是key。但是value變成了values
67 @Override
68 protected void reduce(Text key, Iterable<IntWritable> values,
69 Context context) throws IOException, InterruptedException {
70
71 context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_INPUT_KEYVALUES")
72 .increment(1L);
73
74 // 用于累加的變量
75 int sum = 0;
76 // 循環周遊Interable
77 for (IntWritable value : values) {
78 // 累加
79 sum += value.get();
80 }
81
82 // 設定總次數
83 resultIntWritable.set(sum);
84 context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_OUTPUT_KEYVALUES")
85 .increment(1L);
86 context.write(key, resultIntWritable);
87 }
88 }
89
90 // Client 區域
91 public static void main(String[] args) throws Exception {
92
93 // 擷取配置檔案資訊
94 Configuration configuration = new Configuration();
95
96 // 配置輸入輸出檔案路徑
97 args = new String[] {
98 "hdfs://hadoop-master.dragon.org:9000/wc/mininput",
99 "hdfs://hadoop-master.dragon.org:9000/wc/wcoutput" };
100
101 // 當指令格式不正确的時候,提示,類似于shell中的指令提示
102 String[] otherArgs = new GenericOptionsParser(configuration, args)
103 .getRemainingArgs();
104 if (otherArgs.length != 2) {
105 System.err.println("Usage: wordcount <in> <out>");
106 System.exit(2);
107 }
108
109 // 建立job。設定配置檔案資訊和Job名稱
110 Job job = new Job(configuration, "wc");
111
112 // 1、設定Job運作的類
113 job.setJarByClass(DIYCounter.class);
114
115 // 2、設定Mapper類和Reducer類
116 job.setMapperClass(MyMapper.class);
117 job.setReducerClass(MyReducer.class);
118
119 // 3、設定輸入檔案的目錄和輸出檔案目錄
120 FileInputFormat.addInputPath(job, new Path(args[0]));
121 FileOutputFormat.setOutputPath(job, new Path(args[1]));
122
123 // 4、設定輸出結果的key和value的類型
124 job.setOutputKeyClass(Text.class);
125 job.setOutputValueClass(IntWritable.class);
126
127 // 5、送出Job等待運作結果,并在用戶端顯示運作資訊
128 boolean isSuccess = job.waitForCompletion(true);
129
130 // 6、結束程式
131 System.exit(isSuccess ? 0 : 1);
132 }
133 }
View計數器 Code