天天看點

NVIDIA Jetson xavier上用cuda實作圖像的resize

具體的api接口見https://github.com/cumtchw/cuda_utils

下面是調用上述接口對圖檔進行resize的一個demo.

#include <fstream>
#include <iostream>
#include <thread>
#include <string>
#include <vector>
#include <cuda_runtime_api.h>
#include "opencv2/opencv.hpp"
#include <unistd.h>

#include "cuda_utils_sdk.h"
#include "cudaConverter.h"

using namespace cv;
using namespace std;

int main(int argc, char ** argv)
{

    int img_w_{};
    int img_h_{};
    int img_step1_{};
    vector<float> preprocess_output_data_;
    size_t convert_input_size_{};
    size_t convert_output_size_{};
    uchar* convert_input_{};
    float* convert_output_{};
    int m_maxBatchSize = 1;
    std::vector<float> mean_data_{ 0, 0, 0 };
    int w = 1024;
    int h = 1024;
    int c =3;
    int error{};

    cv::Mat src0 = cv::imread("./test0.jpeg");  //720*405
    cv::Mat src1 = cv::imread("./test1.jpeg");

    vector<cv::Mat> resize_imgs;
    resize_imgs.push_back(src0);
    //resize_imgs.push_back(src1);
    
    if (preprocess_output_data_.empty())   //變量定義為:vector<float> preprocess_output_data_;
    {
        preprocess_output_data_.resize(m_maxBatchSize * c * w * h * sizeof(float));
    }

    for(size_t i = 0; i < resize_imgs.size(); ++i)
    {
        auto &  mat = resize_imgs[i];
        img_w_ = mat.cols;
        img_h_ = mat.rows;
        img_step1_ = mat.step1();
        if (convert_input_ == nullptr)
        {
            convert_input_size_ = m_maxBatchSize * img_step1_ * img_h_ * sizeof(unsigned char);
            cout<<"m_maxBatchSize:"<<m_maxBatchSize<<",img_step1_:"<<img_step1_<<",img_h_:"<<img_h_<<endl;
            cout<<"convert_input_size_:"<<convert_input_size_<<endl;
            error = cuAllocMapped((void**)&convert_input_, convert_input_size_);
            if (error != 0) 
            {
                cout<<"cuAllocMapped failed for input buffer!"<<endl;
            }
            else
            {
                cout<<"cuAllocMapped success for input buffer!"<<endl;
            }
        }
        else
        {
            assert(mat.step1() * mat.rows <= convert_input_size_);
        }


        if (convert_output_ == nullptr) 
        {
            convert_output_size_ = m_maxBatchSize * c * w * h * sizeof(float);
            cout<<"convert_output_size_:"<<convert_output_size_<<endl;
            error = cuAllocMapped((void**)&convert_output_, convert_output_size_);
            if (error != 0) 
            {
                cout<<"cuAllocMapped failed for output buffer!"<<endl;
            }
            else
            {
                cout<<"cuAllocMapped success for output buffer!"<<endl;
            }

        }

        //transfer data from opencv mat buffer to gpu
        size_t buff_size = img_step1_ * img_h_ * sizeof(unsigned char);
        memcpy(convert_input_ + i * buff_size, mat.data, buff_size);
        cout<<"convert_input_ size:"<<buff_size<<endl;
        printf("===========function:%s,line:%d\n", __FUNCTION__, __LINE__);
        cuStreamSynchronize(nullptr);
        printf("===========function:%s,line:%d\n", __FUNCTION__, __LINE__);
    }

    if(3 == c)
    {
        printf("===========function:%s,line:%d\n", __FUNCTION__, __LINE__);
        cu::cudaResizeConvert(convert_input_, img_w_, img_h_, img_step1_, ImageFormat::IMAGE_BGR8, convert_output_, w, h, ImageFormat::IMAGE_RGB32F_PLANAR
        ,resize_imgs.size() , mean_data_.data(), 1, (cudaStream_t)nullptr);
        printf("===========function:%s,line:%d\n", __FUNCTION__, __LINE__);
        cuStreamSynchronize(nullptr);
    }

    printf("===========function:%s,line:%d\n", __FUNCTION__, __LINE__);
    uint64_t output_size = c * w * h * sizeof(float) * resize_imgs.size();
    cout<<"c:"<<c<<",w:"<<w<<",h:"<<h<<"sizeof(float):"<<sizeof(float)<<",resize_imgs.size():"<<resize_imgs.size()<<endl;

    memcpy(preprocess_output_data_.data(), convert_output_, output_size);
    cout<<"preprocess_output_data_.size():"<<preprocess_output_data_.size()<<endl;
    cv::Mat resMat = Mat(w, h, CV_32FC3);
        
    for(int i=0; i < h; i++)//i表示在第幾行.
    {
        for(int j =0;j<w;j++)//j表示在第幾列.
        {
            resMat.at<Vec3f>(i,j)[2] = preprocess_output_data_.at(0*w*h + i*w + j);
            resMat.at<Vec3f>(i,j)[1] = preprocess_output_data_.at(1*w*h + i*w + j);
            resMat.at<Vec3f>(i,j)[0] = preprocess_output_data_.at(2*w*h + i*w + j); 
        }
    }
    imwrite("./result1.jpg", resMat);

    printf("this is in the main\n");
    return 0;
}