檔案格式:SequenceFile
------------------
1.SequenceFile
Key-Value對方式。
2.不是文本檔案,是二進制檔案。
3.可切割
因為有同步點。
reader.sync(pos); //定位到pos之後的第一個同步點。
writer.sync(); //寫入同步點
4.壓縮方式
不壓縮
record壓縮 //隻壓縮value
塊壓縮 //按照多個record形成一個block.
package com.it18zhang.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.junit.Test;
import java.io.IOException;
/**
*序列檔案
*/
public class TestSeqFile {
/**
* 寫操作
*/
@Test
public void save() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf,p, IntWritable.class, Text.class);
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
//添加一個同步點
// 同步标示用于在讀取檔案時能夠從任意位置開始識别記錄邊界
writer.sync();
}
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
if(i % 2 == 0){
writer.sync();
}
}
writer.close();
}
/**
* 寫操作
*/
@Test
public void zipGzip() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Writer writer = SequenceFile.createWriter(fs,
conf,
p,
IntWritable.class,
Text.class,
SequenceFile.CompressionType.BLOCK,
new GzipCodec());
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
//添加一個同步點
writer.sync();
}
for(int i = 0 ; i < 10 ; i ++){
writer.append(new IntWritable(i),new Text("tom" + i));
if(i % 2 == 0){
writer.sync();
}
}
writer.close();
}
/**
* 讀操作,循環輸出所有key-value
*/
@Test
public void read() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
while(reader.next(key,value)){
System.out.println(key.get() + " : " + value.toString());
}
reader.close();
}
/**
* 讀操作,得到目前value
*/
@Test
public void read2() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
while(reader.next(key)){
reader.getCurrentValue(value);
System.out.println(value.toString());
}
reader.close();
}
/**
* 讀操作
*/
@Test
public void read3() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
reader.seek(288);
reader.next(key,value);
System.out.println(value.toString());
reader.close();
}
/**
*
* 操縱同步點
*/
@Test
public void read4() throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","file:///");
FileSystem fs = FileSystem.get(conf);
Path p = new Path("d:/seq/1.seq") ;
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p , conf);
IntWritable key = new IntWritable();
Text value = new Text() ;
reader.sync(648);
while(reader.next(key,value)){
System.out.println(reader.getPosition() + " " + key.get() + "-" + value.toString());
}
reader.close();
}
}
檔案格式:MapFile
--------------------
1.Key-value
2.key按升序寫入(可重複)。
3.mapFile對應一個目錄,目錄下有index和data檔案,都是序列檔案。
4.index檔案劃分key區間,用于快速定位。