天天看點

用檔案映射的方式讀取 txt 檔案點雲資料

文章目錄

      • 說明
      • 代碼
      • 結果對比

說明

參考部落格:點雲讀取速度比較——QTextStream、C++檔案流、C++檔案映射

代碼

#include <iostream>
#include <vector>
#include <string>
#include <chrono>
#include <fstream>
#include <sstream>
#include "windows.h"

std::string path = "G:/Data/YYElse/yy.txt";

int txt1(std::vector<std::vector<double>> *const pVecCloud, bool delNAN) {

    // 用于測試時間差
    auto beginTime = std::chrono::high_resolution_clock::now();

    HANDLE hSrcFile = CreateFileA(path.c_str(), GENERIC_READ, 0, NULL, OPEN_ALWAYS, 0, NULL);
    if (hSrcFile == INVALID_HANDLE_VALUE) return 0;
    LARGE_INTEGER tInt2;
    GetFileSizeEx(hSrcFile, &tInt2);
    __int64 dwRemainSize = tInt2.QuadPart;
    __int64 dwFileSize = dwRemainSize;
    HANDLE hSrcFileMapping = CreateFileMapping(hSrcFile, NULL, PAGE_READONLY, tInt2.HighPart, tInt2.LowPart, NULL);
    if (hSrcFileMapping == INVALID_HANDLE_VALUE) {
        std::cout << " > Lose ...\n";
        return 0;
    }
    SYSTEM_INFO SysInfo;
    GetSystemInfo(&SysInfo);
    DWORD dwGran = SysInfo.dwAllocationGranularity;
    const int BUFFERBLOCKSIZE = dwGran * 1024;

    // 用于标記你讀取的是什麼格式的點雲資料
    const int XYZI_FC = 4;

    bool AlreadySetFiledCount = false;//是否已經設定了資料寬度
    int usefulFiledCount = 0; // 有效檔案行數

    int totalRows = 0;  //檔案總行數:
    int FieldIndex = 0; //每一個小數字的填充位置
    int FieldCount = 0; //每一行中整數字位置,用來判定資料列數究竟是XYZARGB。
    double arrXYZ_I[XYZI_FC];
    char  strLine[1024] = { 0 };

    std::cout << " > Start ...\n";
    std::vector<std::vector<double>>().swap(*pVecCloud); // 清空原始的資料
    while (dwRemainSize > 0) {
        DWORD dwBlock = dwRemainSize < BUFFERBLOCKSIZE ? dwRemainSize : BUFFERBLOCKSIZE;
        __int64 qwFileOffset = dwFileSize - dwRemainSize;
        PBYTE pSrc = (PBYTE)MapViewOfFile(hSrcFileMapping, FILE_MAP_READ, (DWORD)(qwFileOffset >> 32), (DWORD)(qwFileOffset & 0xFFFFFFFF), dwBlock);
        PBYTE pSrcBak = pSrc;
        for (int i = 0; i < dwBlock; i++) {
            //這樣的處理方式有一個很大的缺點
            //當整個檔案的最後一行不是空一行的話,整個資料會少一行。
            //但是一般預設情況下整個資料的最後一行是有一個換行的
            if (*pSrc == '\n') {
                //整行讀完了====================================================
                if (FieldIndex != 0) { //先處理一次字段。
                    strLine[FieldIndex] = '\0';//在末尾處加上符号。
                    arrXYZ_I[FieldCount++] = atof(strLine);
                    FieldIndex = 0;
                }

                usefulFiledCount = XYZI_FC;

                std::vector<double> vTemp;
                {
                    vTemp.push_back(arrXYZ_I[0]);
                    vTemp.push_back(arrXYZ_I[1]);
                    vTemp.push_back(arrXYZ_I[2]);
                    vTemp.push_back(arrXYZ_I[3]);
                }
                (*pVecCloud).push_back(vTemp);
                totalRows++;
                FieldCount = 0;//字段位置清零
                memset(strLine, 0, sizeof(strLine));//數字字元數組清空
            } else if ((*pSrc >= '0' && *pSrc <= '9') || *pSrc == '.' || *pSrc == '-' || *pSrc == 'e' || *pSrc == '+') {
                // 若果以以上内容結尾,則跳過該行
                strLine[FieldIndex++] = *pSrc;
            } else {
                //此時為行内分割===關鍵是連續幾次無用字元==============================
                if (FieldIndex != 0) {
                    //一個字段處理完畢
                    strLine[FieldIndex] = '\0';
                    arrXYZ_I[FieldCount++] = atof(strLine);
                    FieldIndex = 0;
                }
            }
            pSrc++;
        }
        UnmapViewOfFile(pSrcBak);
        dwRemainSize -= dwBlock;
    }
    CloseHandle(hSrcFileMapping);
    CloseHandle(hSrcFile);

    auto endTime = std::chrono::high_resolution_clock::now();
    auto elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(endTime - beginTime);
    std::cout << "time cost:" << elapsedTime.count() << std::endl;
    std::cout << " > End ...\n";
    return totalRows;
}

int txt2(std::vector<std::vector<double>> *const pVecCloud, bool delNAN) {
    std::ifstream ifs(path, std::ios::in);
    if (!ifs) {
        return -200;
    }

    // 用于測試時間差
    auto beginTime = std::chrono::high_resolution_clock::now();

    // double maxX, minX, maxY, minY, maxZ, minZ; // 定義最值,用于擷取偏移量
    // maxX = maxY = maxZ = -INFINITY;
    // minX = minY = minZ = INFINITY;
    int _pointCount = 0;
    std::string lineStr;
    while (getline(ifs, lineStr)) {
        std::stringstream ss(lineStr);
        std::string str;
        std::vector<double> lineArray;
        bool flag = true;
        int col = 0;
        while (getline(ss, str, ' ')) {
            if (col >= 4) break; // 隻讀取 XYZI
            if (delNAN == true) {
                if (str != "NAN" && str != "nan") {
                    lineArray.push_back(std::stod(str));
                } else {
                    flag = false; break;
                }
            } else {
                lineArray.push_back(std::stod(str));
            }
            col++;
        }
        if (lineArray.size() <= 3) return -1; // 說明沒有反射強度
        if (flag == true) {
            pVecCloud->push_back(lineArray);
            // 更新偏移量
            // double x = lineArray[0];
            // double y = lineArray[1];
            // double z = lineArray[2];
            // maxX = std::fmax(maxX, x); minX = std::fmin(minX, x);
            // maxY = std::fmax(maxY, y); minY = std::fmin(minY, y);
            // maxZ = std::fmax(maxZ, z); minZ = std::fmin(minZ, z);
            _pointCount++;
        }
    }

    auto endTime = std::chrono::high_resolution_clock::now();
    auto elapsedTime = std::chrono::duration_cast<std::chrono::seconds>(endTime - beginTime);
    std::cout << "time cost:" << elapsedTime.count() << std::endl;

    return _pointCount;
}


int main() {

    std::vector<std::vector<double>> pVecCloud1;
    txt1(&pVecCloud1, true);
    std::cout << pVecCloud1.size() << std::endl;

    std::cout << "\n--------------------------\n\n";

    std::vector<std::vector<double>> pVecCloud2;
    txt2(&pVecCloud2, true);
    std::cout << pVecCloud2.size() << std::endl;

    return 0;
}
           

結果對比

實測 2000萬+ 的點雲資料

檔案映射方式:43s

檔案流讀取:59s

繼續閱讀