1、导包
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.3</version>
</dependency>
2、连接hadoop
public class hadoopconn {
@Test
public void getConn()throws Exception{
Configuration configuration = new Configuration();
//可以设置一些属性,设置副本数
configuration.set("dfs.replication","1");
//uri获取连接的地址
URI uri = new URI("hdfs://master:9000");
//使用静态方法连接
FileSystem fs = FileSystem.get(uri, configuration);
//进行操作,创建一个目录
boolean b = fs.mkdirs(new Path("/test"));
System.out.println(b);
}
@Test
public void getDelete()throws Exception{
//后面的boolean表示迭代删除,true:可以,false:不可以
boolean b = fs.delete(new Path("/test"), true);
System.out.println(b);
}
}
3、查看目录下文件详细信息
//查看data文件下目录列表
@Test
public void getStatus()throws Exception{
FileStatus[] fileStatuses = fs.listStatus(new Path("/data"));
//获取一个一个单个文件
for (FileStatus f : fileStatuses) {
System.out.println(f.getBlockSize()); //block块128
System.out.println(f.getLen()); //文件实际大小
System.out.println(f.getGroup()); //组
System.out.println(f.getOwner()); //所属者
System.out.println(f.getPath()); //路径
。。。。。。
}
}
4、查看文件里面的详细信息(存储块多少,哪个节点等信息)
//查看data文件下目录列表
@Test
public void getStatus()throws Exception{
FileStatus[] fileStatuses = fs.listStatus(new Path("/data"));
//获取一个一个单个文件
for (FileStatus f : fileStatuses) {
System.out.println(f.getBlockSize()); //块128
System.out.println(f.getLen()); //文件大小
System.out.println(f.getGroup()); //组
System.out.println(f.getOwner()); //所属者
System.out.println(f.getPath()); //路径
}
}
//获取文件存储详细信息
@Test
public void getBlock() throws Exception {
BlockLocation[] bl = fs.getFileBlockLocations(new Path("/data/student.txt"), 0, 100000000);
for (BlockLocation b : bl) {
//文件存储的节点,由于每个块只能存128,所有可能有多个块存多个节点上
String[] hosts = b.getHosts();
for (String host : hosts) {
System.out.println(host);
}
//文件数据的大小
System.out.println(b.getLength());
//还是每个节点数据的主机机器
String[] names = b.getNames();
for (String name : names) {
System.out.println(name);
}
//存完128之后剩余的量为偏移量
System.out.println(b.getOffset());
//节点
String[] topologyPaths = b.getTopologyPaths();
for (String s : topologyPaths) {
System.out.println(s);
}
}
}
}
5、读取文件,使用open打开文件获取流,读取
@Test
public void getopen() throws Exception{
//使用open方法打开文件文件
FSDataInputStream inputStream = fs.open(new Path("/data/student.txt"));
BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
String line;
while ((line=br.readLine())!=null){
System.out.println(line);
}
br.close();
inputStream.close();
}
6、上传写入文件,hdfs上文件不能被修改,只能覆盖,不能追加
使用create创建文件保存数据
@Test
public void getPut()throws Exception{
FSDataOutputStream os = fs.create(new Path("/data/test.txt"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(os));
bw.write("你好");
bw.flush();
bw.close();
}
}