运行代码 如下
#include <stdio.h>
#include
#include
#include
#include
#include "platform.h"
#include "net.h"
#include "gpu.h"
#include "trace.h"
#include
using namespace std;
/**
- \brief read file content to specific buffer
- \tparam T needed buf element type
- \param filename
- \param filename
- \param [out] size return size of file
- \return unique_ptr to allocated buffer
*/
template
std::unique_ptr<T[]> read_file(const std::string &filename, size_t &size)
{
std::ifstream is(filename, std::ios::in | std::ios::binary);
if (is)
{
// get length of file:
is.seekg(0, is.end);
size = is.tellg();
is.seekg(0, is.beg);
// allocate memory:
auto buffer = (T *)new T[size];
// read data as a block:
is.read(reinterpret_cast<char *>(buffer), size / sizeof(T));
is.close();
return std::unique_ptr<T[]>(buffer);
}
return nullptr;
}
/**
- \brief save buffer to specified file, in raw format
- \tparam T needed buf element type
- \param file_path
- \param buf
- \param size will save
size
elements at most
*/
template
void save_file(const std::string &file_path, const T *buf, int size)
{
FILE *fp = fopen(file_path.c_str(), "wb");
if (!fp)
{
return;
}
maybe_unused size_t fsize = fwrite(buf, sizeof(T), size, fp);
fclose(fp);
}
enum MODE{
CPU_MODE=0,
GPU_MODE
};
int demo( MODE type)
{
{
SCOPED_TIMER("ncnn-Net");
ncnn::Net Demo;
ncnn::VulkanDevice vkdev;
//gpu set
if(type == GPU_MODE)
{
Demo.opt.use_fp16_packed = false;
Demo.opt.use_fp16_storage = false;
Demo.opt.use_fp16_arithmetic = false;
Demo.opt.use_int8_storage = false;
Demo.opt.use_int8_arithmetic = false;
ncnn::create_gpu_instance();
printf("gpu count = %d\n", ncnn::get_gpu_count());
//Demo.set_vulkan_device(&vkdev);
Demo.opt.use_vulkan_compute = true;
}
//laod model
int res = Demo.load_param("./demo.param");
printf("result of load_param_bin = %d\n",res);
Demo.load_model("./demo.bin");
printf("result of load_model = %d\n",res);
//Demo.opt.use_int8_inference = false;
//Demo.opt.use_winograd_convolution = false;
//Demo.opt.use_fp16_storage = false;
//打印option
printf("Demo.opt.use_fp16_storage = %d\n", Demo.opt.use_fp16_storage);
printf("Demo.opt.use_bf16_storage = %d\n", Demo.opt.use_bf16_storage);
printf("Demo.opt.use_image_storage = %d\n", Demo.opt.use_image_storage);
printf("Demo.opt.use_int8_inference = %d\n", Demo.opt.use_int8_inference);
printf("Demo.opt.num_threads = %d\n", Demo.opt.num_threads);
printf("Demo.opt.use_winograd_convolution = %d\n", Demo.opt.use_winograd_convolution);
printf("Demo.opt.use_weight_fp16_storage = %d\n", Demo.opt.use_weight_fp16_storage);
printf("Demo.opt.use_fp16_arithmetic = %d\n", Demo.opt.use_fp16_arithmetic);
printf("Demo.opt.use_int8_arithmetic = %d\n", Demo.opt.use_int8_arithmetic);
{
ncnn::Mat out;
for(int i =0; i< 10; i++){
ncnn::Extractor ex = Demo.create_extractor();
//ex.set_light_mode(true);
ex.set_num_threads(4);
ncnn::Mat in_image(1072, 832, 4);
// in_image.fill(1.0f);
// ex.input("inp", in_image);
in_image.fill(1.0f);
ex.input("inp", in_image);
std::chrono::time_point<std::chrono::high_resolution_clock> p0 = std::chrono::high_resolution_clock::now();
{
SCOPED_TIMER("ncnn-extract");
ex.extract("add_out", out);
}
std::chrono::time_point<std::chrono::high_resolution_clock> p1 = std::chrono::high_resolution_clock::now();
cout << "ncnn extract time:" << (float)std::chrono::duration_cast<std::chrono::microseconds>(p1 - p0).count() / 1000 << "ms" << endl;
printf("out.w = %d out.h = %d out.d = %d out.c = %d \n",out.w,out.h,out.d,out.c);
}
std::vector<float> res;
for(int i=0; i<(int)out.w*out.h*out.d*out.c; i++ )
res.push_back(*((float*)out.data+i));
if(type == GPU_MODE)
{
save_file("./add_out_cpu",res.data(),res.size());
}else{
save_file("./add_out_gpu",res.data(),res.size());
}
printf("destroy forloop\n");
}
printf("destroy Extractor\n");
//gpu set
if(type == GPU_MODE)
{
ncnn::destroy_gpu_instance();
}
}
return 0;
}
int main(int argc, const char **argv)
{
MODE type = (MODE)atoi(argv[1]);
printf("type = %d\n", type);
demo(type);
return 0;
}
模型文件:
test_ncnn.zip
2条答案
按热度按时间wvyml7n51#
我觉得对齐了才有问题吧,GPU是纯硬件去算的,精度没CPU高,大概的方向对了就行
k5hmc34c2#
在小数点后第六位是有差别的,这是正常现象