这段代码十分简单,我写这篇博客的唯一原因就是告诉广大像我一样的新手一句话:还是用maven把.....
先把代码撸上:
package com.***.**;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.TableName;
import org.apache.hadoop.hbase.util.Bytes;
import com.sun.tools.javac.comp.Todo;
public class hbaseTest {
private static Configuration conf = HBaseConfiguration.create();
static {
conf.set("hbase.zookeeper.quorum", "192.168.4.227:2181,192.168.4.226:2181,192.168.4.225:2181");
}
public static void scanTable(String tableName) throws IOException{
String parentPath = "D:\\StreetViewData\\";
Connection conn = ConnectionFactory.createConnection(conf);
HTable table = (HTable)conn.getTable(org.apache.hadoop.hbase.TableName.valueOf(tableName));
Scan scan = new Scan();
ResultScanner results = table.getScanner(scan);
for(Result result:results) {//遍历所有的Rowkey(行)
for(Cell cell:result.rawCells()) {//遍历某一行中的所有cell
String hangjian = new String(CellUtil.cloneRow(cell));//行键
String liezu = new String(CellUtil.cloneFamily(cell));//列族
String lieming = new String(CellUtil.cloneQualifier(cell));//列名
String value = new String(CellUtil.cloneValue(cell));//值
byte[] imageByte = CellUtil.cloneValue(cell);
byte[] lResult = Bytes.toString(imageByte).getBytes("ISO-8859-1");//一开始的时候我把二进制写到本地之后怎么也不能看,最后发现编码不对,很烦
long timestamp = cell.getTimestamp();//时间戳
String geoid = hangjian.substring(0, hangjian.indexOf("_"));
String imageName = hangjian.substring(hangjian.indexOf("_"));
String dirPath = parentPath + geoid;
File dirFile = new File(dirPath);
if (lieming.equals("image")) {//当列名是image时,说明这个cell中的值是我们需要的二进制影像,把它存到我们想要的位置
System.out.println("you image");
if(dirFile.exists()) {
String imgPath = dirPath + "\\" + imageName + ".jpg";
File imgFile = new File(imgPath);
FileOutputStream foStream = new FileOutputStream(imgFile);
foStream.write(lResult);
foStream.close();
}
else {
dirFile.mkdir();
String imgPath = dirPath + "\\" + imageName + ".tiff";
File imgFile = new File(imgPath);
FileOutputStream foStream = new FileOutputStream(imgFile);
foStream.write(lResult);
foStream.close();
}
}
}
}
results.close();
table.close();
conn.close();
}
public static void main(String[] args) throws Exception {
System.out.println("start");
scanTable("testnamespace:testtablename");//冒号前是hbase表的namespace,后边是表名
}
}
代码很简单,但是我最一开始的时候是直接在eclipse中创建了个java project去写的,结果写完一运行发现少包。根据日志添加包之后再次运行发现又少另外一个包,再运行再添加,耽误了一个多小时还没搞定,然后我就又弄了个Maven项目,瞬间就搞定了。我以后绝对不会在eclipse的new中选择java project了....最后附上maven的pom.xml中的依赖:
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies>