1. HDFS操作常用Shell指令
1.1 檢視指令使用方法
啟動Hadoop
start-dfs.sh

檢視各種指令
hdfs dfs -help
1.2 HDFS目錄操作
1.2.1 目錄操作方法
檢視HDFS下所有的目錄
hdfs dfs -ls
建立一個input_test的目錄
hdfs dfs -mkdir input_test
删除input_test的目錄
hdfs dfs -rm -r input_test
1.2.2 檔案操作方法
檢視HDFS中一個檔案in0.txt的内容
hdfs dfs -cat in0.txt
把HDFS中的in0.txt檔案内容下載下傳到本地系統/home/zqc/download
hdfs dfs -get in0.txt /home/zqc/download
檔案上傳到HDFS out檔案夾中
hdfs dfs -put /home/zqc/score.txt out
把檔案從HDFS的一個目錄複制到另外一個目錄
hdfs dfs -cp out/score.txt wordcount/input
2. 利用HDFS的Web管理界面
3. HDFS程式設計實踐
在IDEA中建立項目
為項目添加需要用到的JAR包
編寫Java應用程式
編譯運作程式
應用程式的部署
3.1 題目1
編寫 FileUtils 類,其中包含檔案下載下傳與上傳函數的實作,要求如下:
A. 函數UploadFile()向HDFS上傳任意文本檔案,如果指定的檔案在HDFS中已經存在,由使用者指定是追加到原有檔案末尾還是覆寫原有的檔案;
B. 函數DownloadFile()從HDFS中下載下傳指定檔案,如果本地檔案與要下載下傳的檔案名稱相同,則自動對下載下傳的檔案重命名;
C. 在本地Download檔案夾中建立文本檔案 localfile.txt ,在main函數中編寫邏輯實作将其上傳到hdfs的input檔案夾中;
import java.io.*;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class FileUtils {
public static void appendToFile(Configuration conf, String LocalPath, String UploadPath) {
Path uploadpath = new Path(UploadPath);
try (FileSystem fs = FileSystem.get(conf); FileInputStream in = new FileInputStream(LocalPath);) {
FSDataOutputStream out = fs.append(uploadpath);
byte[] data = new byte[1024];
int read = -1;
while ((read = in.read(data)) > 0) {
out.write(data, 0, read);
}
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void coverFile(Configuration conf, String LocalPath, String UploadPath) {
Path uploadpath = new Path(UploadPath);
try (FileSystem fs = FileSystem.get(conf); FileInputStream in = new FileInputStream(LocalPath);) {
FSDataOutputStream out = fs.create(uploadpath);
byte[] data = new byte[1024];
int read = -1;
while ((read = in.read(data)) > 0) {
out.write(data, 0, read);
}
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void UploadFile(Configuration conf, String LocalPath, String UploadPath) {
try {
FileSystem fs = FileSystem.get(conf);
Path localpath = new Path(LocalPath);
Path uploadpath = new Path(UploadPath);
if (fs.exists(uploadpath)) {
System.out.println("File \"" + UploadPath + "\" exist!");
System.out.println("1. append\t2. cover");
Scanner sc = new Scanner(System.in);
String s = sc.nextLine();
if (s.equals("1")) {
try {
appendToFile(conf, LocalPath, UploadPath);
} catch (Exception e) {
e.printStackTrace();
}
} else {
try {
coverFile(conf, LocalPath, UploadPath);
} catch (Exception e) {
e.printStackTrace();
}
}
} else {
System.out.println("File \"" + UploadPath + "\" not exist!");
InputStream in = new FileInputStream(LocalPath);
OutputStream out = fs.create(uploadpath);
IOUtils.copyBytes(in, out, 4096, true);
System.out.println("File uploaded successfully!");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void DownloadFile(Configuration conf, String LocalPath, String DownloadPath) {
Path downloadpath = new Path(DownloadPath);
try (FileSystem fs = FileSystem.get(conf)) {
File f = new File(LocalPath);
if (f.exists()) {
System.out.println(LocalPath + " exits!");
Integer i = Integer.valueOf(0);
while (true) {
f = new File(LocalPath + "_" + i.toString());
if (!f.exists()) {
LocalPath = LocalPath + "_" + i.toString();
break;
} else {
i++;
continue;
}
}
System.out.println("rename: " + LocalPath);
}
Path localpath = new Path(LocalPath);
fs.copyToLocalFile(downloadpath, localpath);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
conf.set("fs.defaultFS", "hdfs://localhost:9000");
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
String LocalPath = "/home/zqc/Downloads/localfile.txt";
String UploadPath = "/user/zqc/input/localfile.txt";
// String DownloadPath = "/user/hadoop/input/score.txt";
UploadFile(conf, LocalPath, UploadPath);
// DownloadFile(conf, LocalPath, DownloadPath);
// try {
// String CreateDir = "/home/zqc/Downloads/";
// String FileName = "localfile.txt";
// String HDFSDir = "/user/hadoop/input";
// File file = new File(CreateDir, FileName);
// if (file.createNewFile()) {
// FileSystem hdfs = FileSystem.get(conf);
// Path localpath = new Path(CreateDir + FileName);
// Path hdfspath = new Path(HDFSDir);
// hdfs.copyFromLocalFile(localpath, hdfspath);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
}
}
3.2 題目2
A. 程式設計實作一個類“MyFSDataInputStream”,該類繼承“org.apache.hadoop.fs.FSDataInputStream”,要求如下:實作按行讀取HDFS中指定檔案的方法“readLine()”,如果讀到檔案末尾,則傳回空,否則傳回檔案一行的文本。
B. 在main函數中編寫邏輯實作按行讀取input檔案夾中的file.txt (檢視附件)檔案,将長度超過15個字元的行在控制台中列印出來;
import java.io.*;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadLine {
public class MyFSDataInputStream extends FSDataInputStream {
public MyFSDataInputStream(InputStream in) {
super(in);
}
}
public static String readline(Configuration conf, String filepath) throws IOException {
Path path = new Path(filepath);
FileSystem fs = FileSystem.get(URI.create("hdfs://localhost:9000"), conf);
FSDataInputStream in = fs.open(path);
BufferedReader d = new BufferedReader(new InputStreamReader(in));
String line = null;
while ((line = d.readLine()) != null) {
System.out.println(line);
}
d.close();
in.close();
return null;
}
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://localhost:9000");
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
String filepath = "/user/zqc/input/file.txt";
try {
Path path = new Path(filepath);
FileSystem fs = FileSystem.get(URI.create("hdfs://localhost:9000"), conf);
FSDataInputStream in = fs.open(path);
BufferedReader d = new BufferedReader(new InputStreamReader(in));
String line = null;
while ((line = d.readLine()) != null) {
if (line.length() > 15) {
System.out.println(line);
}
}
d.close();
in.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}