mapreduce當中partitioner的用法
partitioner的用法:
馬克-to-win @ 馬克java社群:防盜版實名手機尾号:73203。如果現在我們的需求變成,輸出放在兩個檔案當中,按照關鍵字的首個字母的26個字母來分,頭13個放在一個檔案當中,以此類推, 這時我們就要用到partition的技術。
package com;
import java.io.ioexception;
import java.util.stringtokenizer;
import org.apache.hadoop.conf.configuration;
import org.apache.hadoop.fs.path;
import org.apache.hadoop.io.intwritable;
import org.apache.hadoop.io.text;
import org.apache.hadoop.mapreduce.job;
import org.apache.hadoop.mapreduce.mapper;
import org.apache.hadoop.mapreduce.partitioner;
import org.apache.hadoop.mapreduce.reducer;
import org.apache.hadoop.mapreduce.lib.input.fileinputformat;
import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;
import org.apache.hadoop.util.genericoptionsparser;
public class wordcountmark_to_win {
public static class tokenizermapper extends mapper<object, text, text, intwritable> {
private intwritable one = new intwritable(1);
private text word = new text();
public void map(object key, text value, context context) throws ioexception, interruptedexception {
system.out.println("key is 馬克-to-win @ 馬克java社群:"+key.tostring()+" value is "+value.tostring());
stringtokenizer itr = new stringtokenizer(value.tostring());
while (itr.hasmoretokens()) {
word.set(itr.nexttoken());
context.write(word, one);
}
}
}
public static class partitionclass extends partitioner<text, intwritable> {
public int getpartition(text key, intwritable value, int numpartitions)
// numpartitions參數值從主函數中的job.setnumreducetasks()獲得馬克-to-win @ 馬克java社群:
{
int distancemark_to_win=26/numpartitions;
int result ;
if((key.charat(0) - 'a')<distancemark_to_win)
result=0;
else
result = 1;
return result;