天天看點

使用Java實作對檔案的拆分

指定拆分後的檔案數量對檔案進行拆分

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.List;

/**
 * 對檔案進行拆分
 *
 * @author Saint
 * @version 1.0
 * @date 2020/7/15 14:44
 */
public class TxtSplit {

    public static List<String> splitFile(String filePath, int fileCount) throws IOException {

        //傳回的集合
        List<String> list = new ArrayList<>();

        int index = filePath.lastIndexOf("\\");
        int endIndex = filePath.lastIndexOf(".");
        //新的檔案字首
        String fileNamePre = filePath.substring(index + 1, endIndex);
        //新的檔案路徑字首
        String filePathPre = filePath.substring(0, index + 1);

        FileInputStream fis = new FileInputStream(filePath);
        FileChannel inputChannel = fis.getChannel();
        final long fileSize = inputChannel.size();
        //平均值
        long average = fileSize / fileCount;
        //緩存塊大小,200B
        long bufferSize = 200;
        // 申請一個緩存區
        ByteBuffer byteBuffer = ByteBuffer.allocate(Integer.valueOf(bufferSize + ""));
        //子檔案開始位置
        long startPosition = 0;
        //子檔案結束位置
        long endPosition = average < bufferSize ? 0 : average - bufferSize;
        for (int i = 0; i < fileCount; i++) {
            if (i + 1 != fileCount) {
                // 讀取資料
                int read = inputChannel.read(byteBuffer, endPosition);
                readW:
                while (read != -1) {
                    //切換讀模式
                    byteBuffer.flip();
                    byte[] array = byteBuffer.array();
                    for (int j = 0; j < array.length; j++) {
                        byte b = array[j];
                        //判斷\n\r
                        if (b == 10 || b == 13) {
                            endPosition += j;
                            break readW;
                        }
                    }
                    endPosition += bufferSize;
                    //重置緩存塊指針
                    byteBuffer.clear();
                    read = inputChannel.read(byteBuffer, endPosition);
                }
            } else {
                //最後一個檔案直接指向檔案末尾
                endPosition = fileSize;
            }
            String fileName = filePathPre + fileNamePre + (i + 1) + ".txt";
            FileOutputStream fos = new FileOutputStream(fileName);
            FileChannel outputChannel = fos.getChannel();
            //通道傳輸檔案資料
            inputChannel.transferTo(startPosition, endPosition - startPosition, outputChannel);
            //将檔案添加到傳回的集合中
            list.add(fileName);
            outputChannel.close();
            fos.close();
            startPosition = endPosition + 1;
            endPosition += average;
        }
        inputChannel.close();
        fis.close();

        return list;
    }

    /*public static void main(String[] args) throws IOException {

        long startTime = System.currentTimeMillis();
        splitFile("D:\\資料.txt",3);
        long endTime = System.currentTimeMillis();
        System.out.println("耗費時間: " + (endTime - startTime) + " ms");

    }*/
}      
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;

/**
 * 對單個TXT文本進行排序
 *
 * @author Saint
 * @version 1.0
 * @date 2020/7/15 15:53
 */
public class TxtSingleSort {

    /**
     * 對單個檔案中的内容按行排序
     *
     * @param filePath 檔案路徑
     * @return 傳回一個檔案名
     */
    public static File SortedSingleTxt(String filePath) {
        int index = filePath.lastIndexOf("\\");
        int endIndex = filePath.lastIndexOf(".");
        //新的檔案字首
        String fileNamePre = filePath.substring(index + 1, endIndex);
        //新的檔案路徑字首
        String filePathPre = filePath.substring(0, index + 1);
        String outFilePath = filePathPre + fileNamePre + "_sorted.txt";

        File fileIn = new File(filePath);
        File fileOut = new File(outFilePath);
        //排序使用
        ArrayList<String> arrayList = null;
        BufferedReader in = null;
        BufferedWriter out = null;

        try {
            //從檔案中讀
            in = new BufferedReader(new InputStreamReader(new FileInputStream(fileIn)));
            //輸出到檔案
            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileOut)));
            //給ArrayList設定容量為1百萬 + 1千
            arrayList = new ArrayList<>(1001000);
            String line;
            while ((line = in.readLine()) != null) {
                //去掉拆分檔案時,子檔案最上方的空格
                if (line.trim().length() > 0) {
                    arrayList.add(line);
                }
            }
        } catch (FileNotFoundException ex) {
            ex.printStackTrace();
        } catch (IOException ex) {
            ex.printStackTrace();
        } finally {
            try {
                in.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        Collections.sort(arrayList);
        int i = 0;
        int size = arrayList.size();
        //将排序後的内容輸出到檔案中
        for (String s : arrayList) {
            try {
                out.write(s);
                i++;
                //添加最後一個元素之後不再添加空行
                if (i != size) {
                    out.newLine();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        try {
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        //删除未排序的舊檔案
        fileIn.delete();

        return fileOut;
    }

    /*public static void main(String[] args) {
        long startTime = System.currentTimeMillis();
        SortedSingleTxt("D:\\xzhou.saint\\0088\\sort\\0088大資料1.txt");
        SortedSingleTxt("D:\\xzhou.saint\\0088\\sort\\0088大資料2.txt");
        SortedSingleTxt("D:\\xzhou.saint\\0088\\sort\\0088大資料3.txt");
        long endTime = System.currentTimeMillis();
        System.out.println("耗費時間: " + (endTime - startTime) + " ms");
    }*/

}