Mapreduce求TopK最大值

2023-04-11 23:49:32

package suanfa;


import java.io.IOException;
import java.net.URI;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;


public class TopK {

public static final String INPUT_PATH="hdfs://192.168.0.9:9000/seq100w.txt";
public static final String OUTPUT_PATH="hdfs://192.168.0.9:9000/maxseq";

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf,TopK.class.getSimpleName());

//設定輸入路徑
FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
//設定輸入格式化
job.setInputFormatClass(TextInputFormat.class);

//設定自定義map
job.setMapperClass(MyMapper.class);
//設定map輸出類型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);

//分區
job.setPartitionerClass(HashPartitioner.class);
//設定reduce任務
job.setNumReduceTasks(1);

//排序、分組

//規約

//設定自定義reduce類
job.setReducerClass(MyReduce.class);
//設定reduce輸出類型
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);

//删除已存在的路徑
FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
Path path=new Path(OUTPUT_PATH);
if(fileSystem.exists(path)){
fileSystem.delete(path,true);
}

//設定輸出路徑
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
//設定輸出格式化類
job.setOutputFormatClass(TextOutputFormat.class);

//送出任務
job.waitForCompletion(true);
}

static class MyMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable>{
public long max=Long.MIN_VALUE;
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
long temp=Long.parseLong(value.toString());
if(temp>max){
max=temp;
}
}

@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}

static class MyReduce extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{
public long max=Long.MIN_VALUE;
@Override
protected void reduce(LongWritable k2, Iterable<NullWritable> v2s,Context context)
throws IOException, InterruptedException {
long temp=k2.get();
if(temp>max){
max=temp;
}
}

@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}
}

Mapreduce求TopK最大值

繼續閱讀

MapReduce運作Wordcount時一直卡在INFO mapreduce.Job: Running job，web檢視一直處于accepted階段

ubuntu hadoop2.6.1，terminal下運作wordcount

MapReduce(一)：入門級程式wordcount及其分析

HiveQl語句應用執行個體：WordCount具體步驟如下：

hadoop操作遇到的問題問題一：輸出檔案已存在

用mapreduce計算wordCount和手機流量統計程式運作過程WordCount統計手機流量統計

Hadoop之運作wordcount

jdk1.7+Eclipse+Maven3.5+Hadoop2.7.3建構hadoop項目

Eclipse運作WordCount（詳細版）相關連接配接Eclipse運作WordCount

專家訪談：搜尋開源力量：Lucene技術前景

hadoop 用MR實作join操作

Centos7 下 Hadoop 2.6.4 分布式叢集環境搭建摘要叢集準備安裝JDK 安裝 Hadoop 2.6.4 部署 slaver1-slaver4 啟動 hadoop 叢集成功了

MapReduce的幾個企業級經典面試案例MapReduce的幾個企業級經典面試案例

ubuntu14.04下安裝hbse1.0.1.1

User Defined Hadoop DataType

Ambari介紹和架構原理