ncnn vulkan 运行模型和cpu运行结果对不齐

whitzsjs  于 4个月前  发布在  其他
关注(0)|答案(2)|浏览(40)

运行代码 如下
#include <stdio.h>
#include
#include
#include
#include
#include "platform.h"
#include "net.h"
#include "gpu.h"
#include "trace.h"
#include

using namespace std;

/**

  • \brief read file content to specific buffer
  • \tparam T needed buf element type
  • \param filename
  • \param filename
  • \param [out] size return size of file
  • \return unique_ptr to allocated buffer
    */
    template
    std::unique_ptr<T[]> read_file(const std::string &filename, size_t &size)
    {
    std::ifstream is(filename, std::ios::in | std::ios::binary);
    if (is)
    {
    // get length of file:
    is.seekg(0, is.end);
    size = is.tellg();
    is.seekg(0, is.beg);
// allocate memory:
 auto buffer = (T *)new T[size];

 // read data as a block:
 is.read(reinterpret_cast<char *>(buffer), size / sizeof(T));
 is.close();

 return std::unique_ptr<T[]>(buffer);

}

return nullptr;
}

/**

  • \brief save buffer to specified file, in raw format
  • \tparam T needed buf element type
  • \param file_path
  • \param buf
  • \param size will save size elements at most
    */
    template
    void save_file(const std::string &file_path, const T *buf, int size)
    {
    FILE *fp = fopen(file_path.c_str(), "wb");
    if (!fp)
    {
    return;
    }
    maybe_unused size_t fsize = fwrite(buf, sizeof(T), size, fp);
    fclose(fp);
    }

enum MODE{
CPU_MODE=0,
GPU_MODE
};

int demo( MODE type)
{
{
SCOPED_TIMER("ncnn-Net");
ncnn::Net Demo;
ncnn::VulkanDevice vkdev;
//gpu set
if(type == GPU_MODE)
{
Demo.opt.use_fp16_packed = false;
Demo.opt.use_fp16_storage = false;
Demo.opt.use_fp16_arithmetic = false;
Demo.opt.use_int8_storage = false;
Demo.opt.use_int8_arithmetic = false;
ncnn::create_gpu_instance();
printf("gpu count = %d\n", ncnn::get_gpu_count());

//Demo.set_vulkan_device(&vkdev);
        Demo.opt.use_vulkan_compute = true;
    }

    //laod model
    int res = Demo.load_param("./demo.param");
    printf("result of load_param_bin = %d\n",res);
    Demo.load_model("./demo.bin");
    printf("result of load_model = %d\n",res);

    //Demo.opt.use_int8_inference = false;
    //Demo.opt.use_winograd_convolution = false;
    //Demo.opt.use_fp16_storage = false;

    //打印option
    printf("Demo.opt.use_fp16_storage = %d\n", Demo.opt.use_fp16_storage);
    printf("Demo.opt.use_bf16_storage = %d\n", Demo.opt.use_bf16_storage);
    printf("Demo.opt.use_image_storage = %d\n", Demo.opt.use_image_storage);
    printf("Demo.opt.use_int8_inference = %d\n", Demo.opt.use_int8_inference);
    printf("Demo.opt.num_threads = %d\n", Demo.opt.num_threads);
    printf("Demo.opt.use_winograd_convolution = %d\n", Demo.opt.use_winograd_convolution);
    printf("Demo.opt.use_weight_fp16_storage = %d\n", Demo.opt.use_weight_fp16_storage);
    printf("Demo.opt.use_fp16_arithmetic = %d\n", Demo.opt.use_fp16_arithmetic);
    printf("Demo.opt.use_int8_arithmetic = %d\n", Demo.opt.use_int8_arithmetic);

    {
       
        ncnn::Mat out;
        for(int i =0; i< 10; i++){

             ncnn::Extractor ex = Demo.create_extractor();
            //ex.set_light_mode(true);
            ex.set_num_threads(4);
            ncnn::Mat in_image(1072, 832, 4);
            // in_image.fill(1.0f);
            // ex.input("inp", in_image);

            in_image.fill(1.0f);
            ex.input("inp", in_image);
            std::chrono::time_point<std::chrono::high_resolution_clock> p0 = std::chrono::high_resolution_clock::now();
            {
                SCOPED_TIMER("ncnn-extract");
                ex.extract("add_out", out);
            }
            std::chrono::time_point<std::chrono::high_resolution_clock> p1 = std::chrono::high_resolution_clock::now();
            cout << "ncnn extract time:" << (float)std::chrono::duration_cast<std::chrono::microseconds>(p1 - p0).count() / 1000 << "ms" << endl;
            printf("out.w = %d out.h = %d out.d = %d out.c = %d \n",out.w,out.h,out.d,out.c);
        }

        std::vector<float> res;
        for(int i=0; i<(int)out.w*out.h*out.d*out.c; i++ )
            res.push_back(*((float*)out.data+i));
        if(type == GPU_MODE)
        {
            save_file("./add_out_cpu",res.data(),res.size());
        }else{
            save_file("./add_out_gpu",res.data(),res.size());
        }
        

        printf("destroy forloop\n");
    }
    printf("destroy Extractor\n");
    //gpu set
    if(type == GPU_MODE)
    {
        ncnn::destroy_gpu_instance();
    }
}

return 0;

}

int main(int argc, const char **argv)
{
MODE type = (MODE)atoi(argv[1]);
printf("type = %d\n", type);
demo(type);
return 0;
}

模型文件:
test_ncnn.zip

wvyml7n5

wvyml7n51#

我觉得对齐了才有问题吧,GPU是纯硬件去算的,精度没CPU高,大概的方向对了就行

k5hmc34c

k5hmc34c2#

#include "net.h"
void pretty_print(const ncnn::Mat& m)
{
    for (int q=0; q<m.c; q++)
    {
        const float* ptr = m.channel(q);
        for (int y=0; y<m.h; y++)
        {
            for (int x=0; x<m.w; x++)
            {
                printf("%f ", ptr[x]);
            }
            ptr += m.w;
            printf("\n");
        }
        printf("------------------------\n");
    }
}
int main()
{
    ncnn::Net net;

    net.opt.use_vulkan_compute = true;

    net.opt.use_fp16_packed = false;
    net.opt.use_fp16_storage = false;
    net.opt.use_fp16_arithmetic = false;
    net.opt.use_int8_storage = false;
    net.opt.use_int8_arithmetic = false;

    net.load_param("demo.param");
    net.load_model("demo.bin");

    {
    ncnn::Extractor ex = net.create_extractor();

    ncnn::Mat in0(1072, 832, 4);
    in0.fill(0.f);

    ex.input("inp", in0);

    ncnn::Mat out0;
    ex.extract("add_out", out0);

    fprintf(stderr, "out0 %d %d %d %d\n", out0.w, out0.h, out0.d, out0.c);

    pretty_print(out0);
    }

    return 0;
}

在小数点后第六位是有差别的,这是正常现象

相关问题