天天看點

MapReduce當中Partitioner的用法

mapreduce當中partitioner的用法

partitioner的用法:

馬克-to-win @ 馬克java社群:防盜版實名手機尾号:73203。如果現在我們的需求變成,輸出放在兩個檔案當中,按照關鍵字的首個字母的26個字母來分,頭13個放在一個檔案當中,以此類推, 這時我們就要用到partition的技術。

package com;

import java.io.ioexception;

import java.util.stringtokenizer;

import org.apache.hadoop.conf.configuration;

import org.apache.hadoop.fs.path;

import org.apache.hadoop.io.intwritable;

import org.apache.hadoop.io.text;

import org.apache.hadoop.mapreduce.job;

import org.apache.hadoop.mapreduce.mapper;

import org.apache.hadoop.mapreduce.partitioner;

import org.apache.hadoop.mapreduce.reducer;

import org.apache.hadoop.mapreduce.lib.input.fileinputformat;

import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;

import org.apache.hadoop.util.genericoptionsparser;

public class wordcountmark_to_win {

    public static class tokenizermapper extends mapper<object, text, text, intwritable> {

        private intwritable one = new intwritable(1);

        private text word = new text();

        public void map(object key, text value, context context) throws ioexception, interruptedexception {

            system.out.println("key is 馬克-to-win @ 馬克java社群:"+key.tostring()+" value is "+value.tostring());

            stringtokenizer itr = new stringtokenizer(value.tostring());

            while (itr.hasmoretokens()) {

                word.set(itr.nexttoken());

                context.write(word, one);

            }

        }

    }

    public static class partitionclass extends partitioner<text, intwritable> {

        public int getpartition(text key, intwritable value, int numpartitions)

        // numpartitions參數值從主函數中的job.setnumreducetasks()獲得馬克-to-win @ 馬克java社群:

        {

            int distancemark_to_win=26/numpartitions;

            int result ;

            if((key.charat(0) - 'a')<distancemark_to_win)

                result=0;

            else

               result = 1;

            return result;