天天看點

實驗Hbase的test中的TestTableMapReduce類--修改成簡單易懂點的

不積跬步,無以至千裡!

這幾天狂看mapreduce對hbase進行操作的例子,消化吸收,熟能生巧,掌握mapreduce和hbase互動的各個細節,以及整體流程等,整體流程等年前寫一篇總結下,這幾天先狂看吧

看,複制,修改,運作,遇到各種問題,解決,慢慢的就熟了。

這個類是幹啥的呢,其實就是對hbase的某表進行簡單操作,不過用的是mapreduce,即效率高,當然,看這個,主要吸收其用mapreduce操作hbase的思路及如何程式設計。

這個類功能是:将表中所有行 列族f1的某列q1的值倒序後放在另一列族f2的列q2,所謂倒序即 abc變成cba,456變成654,其中q1和q2是可以相同的,因為不同列族嘛。

上代碼了:

[java] view plaincopyprint?

package mapreduce.hbase;

import java.io.File;

import java.io.IOException;

import java.util.Map;

import java.util.NavigableMap;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileUtil;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.HConstants;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer;

import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class TestMRHBase {

private static final Log LOG = LogFactory.getLog(TestMRHBase.class);  
static final String MULTI_REGION_TABLE_NAME = "mr_test"; // 表名   
static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");// 原列族名   
static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");// 寫入的列族名   
static final byte[] QUALIFIER_NAME = Bytes.toBytes("data");// 列名   

  
public static class TestMap extends  
        TableMapper {  
      
    protected void map(ImmutableBytesWritable key, Result value,  
            Context context) throws IOException, InterruptedException {  
        if (value.size() != 1) {  
            throw new IOException("There should only be one column! ");  
        }  
        Map<byte[], NavigableMap<byte[], NavigableMapbyte[]>>> cf = value  
                .getMap();  
        if (!cf.containsKey(INPUT_FAMILY)) {  
            throw new IOException("Wrong input columns. Missing: '"  
                    + Bytes.toString(INPUT_FAMILY) + "'.");  
        }  
        // Get the original value and reverse it   
        String originalValue = new String(value.getValue(INPUT_FAMILY,  
                QUALIFIER_NAME), HConstants.UTF8_ENCODING);  
        StringBuilder newValue = new StringBuilder(originalValue);  
        newValue.reverse();  
        // Now set the value to be collected   
        Put outval = new Put(key.get());  
        outval.add(OUTPUT_FAMILY, QUALIFIER_NAME, Bytes.toBytes(newValue  
                .toString()));  
        context.write(key, outval);  
    }  

}  

  
public void testMultiRegionTable() throws IOException,  
        InterruptedException, ClassNotFoundException {  
    Configuration conf = HBaseConfiguration.create();  
    conf.set("hbase.zookeeper.quorum", "node2,node4,node3");  
    runTestOnTable(new HTable(new Configuration(conf),  
            MULTI_REGION_TABLE_NAME));  
}  

private void runTestOnTable(HTable table) throws IOException,  
        InterruptedException, ClassNotFoundException {  

    Job job = null;  
    try {  
        LOG.info("Before map/reduce startup");  
        job = new Job(table.getConfiguration(), "process column contents");  
        job.setNumReduceTasks(1);  
        job.setJarByClass(TestMRHBase.class);  
        Scan scan = new Scan();  
        scan.addFamily(INPUT_FAMILY);  
        TableMapReduceUtil.initTableMapperJob(Bytes.toString(table  
                .getTableName()), scan, TestMap.class,  
                ImmutableBytesWritable.class, Put.class, job);  
        TableMapReduceUtil.initTableReducerJob(Bytes.toString(table  
                .getTableName()), IdentityTableReducer.class, job);  
        FileOutputFormat.setOutputPath(job, new Path("test"));  
        LOG.info("Started " + Bytes.toString(table.getTableName()));  
        job.waitForCompletion(true);  
        LOG.info("After map/reduce completion");  

        // verify map-reduce results   
        // verify(Bytes.toString(table.getTableName()));   
    } finally {  
        // mrCluster.shutdown();   
        if (job != null) {  
            FileUtil.fullyDelete(new File(job.getConfiguration().get(  
                    "hadoop.tmp.dir")));  
        }  
    }  
}  

  
public static void main(String[] args) throws Exception {  
    TestMRHBase testMRHBase = new TestMRHBase();  
    testMRHBase.testMultiRegionTable();  

}             

}

過程中遇到一些異常,唉…… 一定要長記性啊,否則,大好青春不能總被同樣的錯誤耗費掉啊

1.classnotfoundexception:又是這個,這次

賣QQ号碼平台

的錯誤是因為沒job.setJarByClass

2.java.lang.NoSuchMethodException: mapreduce.hbase.TestMRHBase$TestMap.()

這是為什麼呢?

因為類中類TestMap沒有設定成static類型,結果map任務時找不到此map的init,隻要類中map類設成static即可

其他的一個小問題就不說了。

在拿來别人的東西時,如果想修改下,一定要讀透了

取其精華,去其糟粕