天天看點

每日總結

Mapreduce執行個體——Map端join

依賴:

<dependency>

      <groupId>org.apache.hadoop</groupId>

      <artifactId>hadoop-common</artifactId>

      <version>3.2.0</version>

    </dependency>

    <dependency>

      <artifactId>hadoop-mapreduce-client-app</artifactId>

      <artifactId>hadoop-hdfs</artifactId>

      <groupId>org.slf4j</groupId>

      <artifactId>slf4j-log4j12</artifactId>

      <version>1.7.30</version>

      <artifactId>hadoop-client</artifactId>

</dependency>

實驗代碼:

package mapreduce;

import java.io.BufferedReader;

import java.io.FileReader;

import java.io.IOException;

import java.net.URI;

import java.net.URISyntaxException;

import java.util.HashMap;

import java.util.Map;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MapJoin {

    public static class MyMapper extends Mapper<Object, Text, Text, Text> {

        private Map<String, String> dict = new HashMap<>();

        @Override

        protected void setup(Context context) throws IOException,

                InterruptedException {

            String fileName = context.getLocalCacheFiles()[0].getName();

            System.out.println(fileName);

            BufferedReader reader = new BufferedReader(new FileReader(fileName));

            String codeandname = null;

            while (null != (codeandname = reader.readLine())) {

                String str[] = codeandname.split("\t");

                dict.put(str[0], str[2] + "\t" + str[3]);

            }

            reader.close();

        }

        protected void map(Object key, Text value, Context context)

                throws IOException, InterruptedException {

            String[] kv = value.toString().split("\t");

            if (dict.containsKey(kv[1])) {

                context.write(new Text(kv[1]), new Text(dict.get(kv[1]) + "\t" + kv[2]));

    }

    public static class MyReducer extends Reducer<Text, Text, Text, Text> {

        protected void reduce(Text key, Iterable<Text> values, Context context)

            for (Text text : values) {

                context.write(key, text);

    public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {

        Job job = Job.getInstance();

        job.setJobName("mapjoin");

        job.setJarByClass(MapJoin.class);

        job.setMapperClass(MyMapper.class);

        job.setReducerClass(MyReducer.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(Text.class);

        Path in = new Path("hdfs://hadoop102:8020/mymapreduce2/in/order_items1");

        Path out = new Path("hdfs://hadoop102:8020/mymapreduce2/out4");

        FileInputFormat.addInputPath(job, in);

        FileOutputFormat.setOutputPath(job, out);

        URI uri = new URI("hdfs://hadoop102:8020/mymapreduce2/in/orders1");

        job.addCacheFile(uri);

        System.exit(job.waitForCompletion(true) ? 0 : 1);

}

每日總結
每日總結
每日總結