天天看点

遍历HBase表中的数据(把hbase中存储的图片写到本地)

这段代码十分简单,我写这篇博客的唯一原因就是告诉广大像我一样的新手一句话:还是用maven把.....

先把代码撸上:

package com.***.**;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.TableName;
import org.apache.hadoop.hbase.util.Bytes;

import com.sun.tools.javac.comp.Todo;

public class hbaseTest {
	
	private static Configuration conf = HBaseConfiguration.create();
	static {
		conf.set("hbase.zookeeper.quorum", "192.168.4.227:2181,192.168.4.226:2181,192.168.4.225:2181");
	}
	 
	 public static void scanTable(String tableName) throws IOException{
		 String parentPath = "D:\\StreetViewData\\";
		 Connection conn = ConnectionFactory.createConnection(conf);
		 HTable table = (HTable)conn.getTable(org.apache.hadoop.hbase.TableName.valueOf(tableName));
		 
		 Scan scan = new Scan();
		 ResultScanner results = table.getScanner(scan);
		 for(Result result:results) {//遍历所有的Rowkey(行)
			 for(Cell cell:result.rawCells()) {//遍历某一行中的所有cell
				 
				 String hangjian = new String(CellUtil.cloneRow(cell));//行键
				 String liezu = new String(CellUtil.cloneFamily(cell));//列族
				 String lieming = new String(CellUtil.cloneQualifier(cell));//列名
				 
				 String value = new String(CellUtil.cloneValue(cell));//值
				 byte[] imageByte = CellUtil.cloneValue(cell);
				 byte[] lResult = Bytes.toString(imageByte).getBytes("ISO-8859-1");//一开始的时候我把二进制写到本地之后怎么也不能看,最后发现编码不对,很烦
				 
				 long timestamp = cell.getTimestamp();//时间戳
				 String geoid = hangjian.substring(0, hangjian.indexOf("_"));
				 String imageName = hangjian.substring(hangjian.indexOf("_"));
				 String dirPath = parentPath + geoid;
				 File dirFile = new File(dirPath);
				 
				 if (lieming.equals("image")) {//当列名是image时,说明这个cell中的值是我们需要的二进制影像,把它存到我们想要的位置
					 System.out.println("you image");
					 if(dirFile.exists()) {
						 String imgPath = dirPath + "\\" + imageName + ".jpg";
						 File imgFile = new File(imgPath);
						 FileOutputStream foStream = new FileOutputStream(imgFile);
						 foStream.write(lResult);
						 foStream.close();
					 }
					 else {
						 dirFile.mkdir(); 
						 String imgPath = dirPath + "\\" + imageName + ".tiff";
						 File imgFile = new File(imgPath);
						 FileOutputStream foStream = new FileOutputStream(imgFile);
						 foStream.write(lResult);
						 foStream.close();			 
					 }
				 }		 
			 }
		 }
             results.close();  
	     table.close();  
	     conn.close();  
	 }
	 
	 public static void main(String[] args) throws Exception {
		 System.out.println("start");
		 scanTable("testnamespace:testtablename");//冒号前是hbase表的namespace,后边是表名
	 }
}
           

代码很简单,但是我最一开始的时候是直接在eclipse中创建了个java project去写的,结果写完一运行发现少包。根据日志添加包之后再次运行发现又少另外一个包,再运行再添加,耽误了一个多小时还没搞定,然后我就又弄了个Maven项目,瞬间就搞定了。我以后绝对不会在eclipse的new中选择java project了....最后附上maven的pom.xml中的依赖:

<dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.3</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.6</version>
        </dependency>

        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>

  </dependencies>