error log | 日志或报错信息 | ログ
想把RobustVideoMatting(稳健视频抠图模型)用ncnn跑起来,经过一番折腾后成功转成静态维度的onnx,并且使用了onnxsim去除了胶水op,用onnxruntime可以顺利跑通转换后的模型;然后将其转换成ncnn,在转换时报错:
onnx2ncnn rvm_mobilenetv3_fp32-480-480-sim.onnx rvm_mobilenetv3_fp32-480-480.param rvm_mobilenetv3_fp32-480-480.bin
Unsupported split axis !
Unsupported split axis !
在使用ncnn的c和python接口进行推理时,出现错误,c提示空指针,python的则是直接崩溃。
所有ex.input返回的错误码都是0,应该是正常的,但是在ex.extract("fgr",fgr)开始,返回的错误码是-100,data是一个空指针,导致cv::Mat初始化出错,程序崩溃。折腾了2天也没解决,目前依然没有头绪🤔,于是决定来这里占个坑~ 🙄
model | 模型 | モデル
- original model
onnx-model.zip
ncnn-model.zip
这是其中一个被我转换成480x480静态输入的onnx模型,以及对应的ncnn模型。其他不同输入维度模型,问题应该相同。
how to reproduce | 复现步骤 | 再現方法
- python测试代码
import ncnn
import numpy as np
import torch
def infer_ncnn_rvm():
param_path = "./checkpoint/rvm_mobilenetv3_fp32-480-480-opt.param"
bin_path = "./checkpoint/rvm_mobilenetv3_fp32-480-480-opt.bin"
net = ncnn.Net()
net.load_param(param_path)
net.load_model(bin_path)
input_names = net.input_names()
output_names = net.output_names()
print(input_names)
print(output_names)
src = torch.randn((480, 480, 3)).cpu().numpy()
r1i = np.zeros([240, 240, 16])
r2i = np.zeros([120, 120, 20])
r3i = np.zeros([60, 60, 40])
r4i = np.zeros([30, 30, 64])
print(src.shape)
print(r1i.shape)
print(r2i.shape)
print(r3i.shape)
print(r4i.shape)
src_in = ncnn.Mat(src)
r1i_in = ncnn.Mat(r1i)
r2i_in = ncnn.Mat(r2i)
r3i_in = ncnn.Mat(r3i)
r4i_in = ncnn.Mat(r4i)
ex = net.create_extractor()
ex.input("src", src_in)
ex.input("r1i", r1i_in)
ex.input("r2i", r2i_in)
ex.input("r3i", r3i_in)
ex.input("r4i", r4i_in)
print("ex.input done")
fgr = ex.extract("fgr") # 运行到这个直接崩溃
print("ex.extract fgr done")
pha = ex.extract("pha")
r1o = ex.extract("r1o")
r2o = ex.extract("r2o")
r3o = ex.extract("r3o")
r4o = ex.extract("r4o")
print("fgr".center(100, "="))
print(fgr.shape)
print(fgr)
if __name__ == "__main__":
infer_ncnn_rvm()
- C++代码
// 一些自定义的类
typedef struct MattingContentType
{
cv::Mat fgr_mat; // fore ground mat 3 channel (R,G,B) 0.~1. or 0~255
cv::Mat pha_mat; // alpha(matte) 0.~1.
cv::Mat merge_mat; // merge bg and fg according pha
bool flag;
MattingContentType(): flag(false)
{};
} MattingContent;
class BasicNCNNHandler
{
protected:
ncnn::Net *net = nullptr;
const char *log_id = nullptr;
const char *param_path = nullptr;
const char *bin_path = nullptr;
protected:
const unsigned int num_threads; // initialize at runtime.
protected:
explicit BasicNCNNHandler(const std::string &_param_path,
const std::string &_bin_path,
unsigned int _num_threads = 1);
virtual ~BasicNCNNHandler();
// un-copyable
protected:
BasicNCNNHandler(const BasicNCNNHandler &) = delete; //
BasicNCNNHandler(BasicNCNNHandler &&) = delete; //
BasicNCNNHandler &operator=(const BasicNCNNHandler &) = delete; //
BasicNCNNHandler &operator=(BasicNCNNHandler &&) = delete; //
private:
virtual void transform(const cv::Mat &mat, ncnn::Mat &in) = 0;
private:
void initialize_handler();
};
BasicNCNNHandler::BasicNCNNHandler(
const std::string &_param_path, const std::string &_bin_path, unsigned int _num_threads) :
log_id(_param_path.data()), param_path(_param_path.data()),
bin_path(_bin_path.data()), num_threads(_num_threads)
{
initialize_handler();
}
void BasicNCNNHandler::initialize_handler()
{
// init net, change this setting for better performance.
net = new ncnn::Net();
net->opt.use_vulkan_compute = false; // default
net->opt.use_fp16_arithmetic = false;
net->load_param(param_path);
net->load_model(bin_path);
}
BasicNCNNHandler::~BasicNCNNHandler()
{
if (net) delete net;
net = nullptr;
}
// 主要的代码
NCNNRobustVideoMatting::NCNNRobustVideoMatting(
const std::string &_param_path, const std::string &_bin_path,
unsigned int _num_threads, int _input_height,
int _input_width, unsigned int _variant_type
) :
BasicNCNNHandler(_param_path, _bin_path, _num_threads),
input_height(_input_height), input_width(_input_width),
variant_type(_variant_type)
{
initialize_context();
}
void NCNNRobustVideoMatting::initialize_context()
{
if (variant_type == VARIANT::MOBILENETV3)
{
if (input_width == 1920 && input_height == 1080)
{
r1i = ncnn::Mat(240, 135, 16); // w,h,c in NCNN
r2i = ncnn::Mat(120, 68, 20);
r3i = ncnn::Mat(60, 34, 40);
r4i = ncnn::Mat(30, 17, 64);
} // hxw 480x640 480x480 640x480
else
{
r1i = ncnn::Mat(input_width / 2, input_height / 2, 16);
r2i = ncnn::Mat(input_width / 4, input_height / 4, 20);
r3i = ncnn::Mat(input_width / 8, input_height / 8, 40);
r4i = ncnn::Mat(input_width / 16, input_height / 16, 64);
}
} // RESNET50
else
{
if (input_width == 1920 && input_height == 1080)
{
r1i = ncnn::Mat(240, 135, 16);
r2i = ncnn::Mat(120, 68, 32);
r3i = ncnn::Mat(60, 34, 64);
r4i = ncnn::Mat(30, 17, 128);
} // hxw 480x640 480x480 640x480
else
{
r1i = ncnn::Mat(input_width / 2, input_height / 2, 16);
r2i = ncnn::Mat(input_width / 4, input_height / 4, 20);
r3i = ncnn::Mat(input_width / 8, input_height / 8, 40);
r4i = ncnn::Mat(input_width / 16, input_height / 16, 64);
}
}
// init 0.
r1i.fill(0.f);
r2i.fill(0.f);
r3i.fill(0.f);
r4i.fill(0.f);
context_is_initialized = true;
}
void NCNNRobustVideoMatting::transform(const cv::Mat &mat, ncnn::Mat &in)
{
// BGR NHWC -> RGB NCHW & resize
int h = mat.rows;
int w = mat.cols;
in = ncnn::Mat::from_pixels_resize(
mat.data, ncnn::Mat::PIXEL_BGR2RGB,
w, h, input_width, input_height
);
in.substract_mean_normalize(mean_vals, norm_vals);
}
void NCNNRobustVideoMatting::detect_video(const std::string &video_path,
const std::string &output_path,
std::vector<MattingContent> &contents,
bool save_contents, unsigned int writer_fps)
{
// 0. init video capture
cv::VideoCapture video_capture(video_path);
const unsigned int width = video_capture.get(cv::CAP_PROP_FRAME_WIDTH);
const unsigned int height = video_capture.get(cv::CAP_PROP_FRAME_HEIGHT);
const unsigned int frame_count = video_capture.get(cv::CAP_PROP_FRAME_COUNT);
if (!video_capture.isOpened())
{
std::cout << "Can not open video: " << video_path << "\n";
return;
}
// 1. init video writer
cv::VideoWriter video_writer(output_path, cv::VideoWriter::fourcc('m', 'p', '4', 'v'),
writer_fps, cv::Size(width, height));
if (!video_writer.isOpened())
{
std::cout << "Can not open writer: " << output_path << "\n";
return;
}
// 2. matting loop
cv::Mat mat;
unsigned int i = 0;
while (video_capture.read(mat))
{
i += 1;
types::MattingContent content;
this->detect(mat, content);
// 3. save contents and writing out.
if (content.flag)
{
if (save_contents) contents.push_back(content);
if (!content.merge_mat.empty()) video_writer.write(content.merge_mat);
}
// 4. check context states.
if (!context_is_update) break;
}
// 5. release
video_capture.release();
video_writer.release();
}
void NCNNRobustVideoMatting::detect(const cv::Mat &mat, MattingContent &content)
{
if (mat.empty()) return;
int img_h = mat.rows;
int img_w = mat.cols;
if (!context_is_initialized) return;
// 1. make input tensor
ncnn::Mat src;
this->transform(mat, src);
// 2. inference & extract
auto extractor = net->create_extractor();
extractor.set_light_mode(false); // default
extractor.set_num_threads(num_threads);
extractor.input("src", src);
extractor.input("r1i", r1i);
extractor.input("r2i", r2i);
extractor.input("r3i", r3i);
extractor.input("r4i", r4i); // 运行到这里都没问题
// 3. generate matting
this->generate_matting(extractor, content, img_h, img_w);
// 4. update context (needed for video detection.)
context_is_update = false; // init state.
this->update_context(extractor);
}
void NCNNRobustVideoMatting::generate_matting(ncnn::Extractor &extractor,
MattingContent &content,
int img_h, int img_w)
{
ncnn::Mat fgr, pha;
extractor.extract("fgr", fgr);
extractor.extract("pha", pha);
float *fgr_ptr = (float *) fgr.data; // 此处data是空指针
float *pha_ptr = (float *) pha.data;
const unsigned int channel_step = input_height * input_width;
// fast assign & channel transpose(CHW->HWC).
cv::Mat rmat(input_height, input_width, CV_32FC1, fgr_ptr); // nullptr导致cv::Mat初始化错误
cv::Mat gmat(input_height, input_width, CV_32FC1, fgr_ptr + channel_step);
cv::Mat bmat(input_height, input_width, CV_32FC1, fgr_ptr + 2 * channel_step);
cv::Mat pmat(input_height, input_width, CV_32FC1, pha_ptr); // ref only, zero-copy.
rmat *= 255.f;
bmat *= 255.f;
gmat *= 255.f;
cv::Mat rest = 1.f - pmat;
cv::Mat mbmat = bmat.mul(pmat) + rest * 153.f;
cv::Mat mgmat = gmat.mul(pmat) + rest * 255.f;
cv::Mat mrmat = rmat.mul(pmat) + rest * 120.f;
std::vector<cv::Mat> fgr_channel_mats, merge_channel_mats;
fgr_channel_mats.push_back(bmat);
fgr_channel_mats.push_back(gmat);
fgr_channel_mats.push_back(rmat);
merge_channel_mats.push_back(mbmat);
merge_channel_mats.push_back(mgmat);
merge_channel_mats.push_back(mrmat);
content.pha_mat = pmat;
cv::merge(fgr_channel_mats, content.fgr_mat);
cv::merge(merge_channel_mats, content.merge_mat);
content.fgr_mat.convertTo(content.fgr_mat, CV_8UC3);
content.merge_mat.convertTo(content.merge_mat, CV_8UC3);
if (img_w != input_width || img_h != input_height)
{
cv::resize(content.pha_mat, content.pha_mat, cv::Size(img_w, img_h));
cv::resize(content.fgr_mat, content.fgr_mat, cv::Size(img_w, img_h));
cv::resize(content.merge_mat, content.merge_mat, cv::Size(img_w, img_h));
}
content.flag = true;
}
void NCNNRobustVideoMatting::update_context(ncnn::Extractor &extractor)
{
ncnn::Mat r1o, r2o, r3o, r4o;
extractor.extract("r1o", r1o);
extractor.extract("r2o", r2o);
extractor.extract("r3o", r3o);
extractor.extract("r4o", r4o);
r1i.clone_from(r1o); // deepcopy
r2i.clone_from(r2o); // deepcopy
r3i.clone_from(r3o); // deepcopy
r4i.clone_from(r4o); // deepcopy
context_is_update = true;
}
// 头文件
class NCNNRobustVideoMatting : public BasicNCNNHandler
{
public:
explicit NCNNRobustVideoMatting(const std::string &_param_path,
const std::string &_bin_path,
unsigned int _num_threads = 1,
int _input_height = 480,
int _input_width = 640,
unsigned int _variant_type = VARIANT::MOBILENETV3); //
~NCNNRobustVideoMatting() override = default;
private:
const float mean_vals[3] = {0.f, 0.f, 0.f}; // RGB
const float norm_vals[3] = {1.f / 255.f, 1.f / 255.f, 1.f / 255.f};
// hardcode input node names, hint only.
// downsample_ratio has been freeze while onnx exported
// and, the input size of each input has been freeze, also.
std::vector<const char *> input_node_names = {
"src",
"r1i",
"r2i",
"r3i",
"r4i"
};
// hardcode output node names, hint only.
std::vector<const char *> output_node_names = {
"fgr",
"pha",
"r1o",
"r2o",
"r3o",
"r4o"
};
bool context_is_update = false;
bool context_is_initialized = false;
private:
enum VARIANT
{
MOBILENETV3 = 0,
RESNET50 = 1
};
// will be update inner video matting process.
ncnn::Mat r1i, r2i, r3i, r4i;
// input size & variant_type, initialize at runtime.
const int input_height;
const int input_width;
const unsigned int variant_type;
private:
void transform(const cv::Mat &mat, ncnn::Mat &in) override;
void initialize_context();
void generate_matting(ncnn::Extractor &extractor,
MattingContent &content,
int img_h, int img_w);
void update_context(ncnn::Extractor &extractor);
public:
/**
* Image Matting Using RVM(https://github.com/PeterL1n/RobustVideoMatting)
* @param mat: cv::Mat BGR HWC
* @param content: MattingContent to catch the detected results.
* See https://github.com/PeterL1n/RobustVideoMatting/blob/master/documentation/inference_zh_Hans.md
*/
void detect(const cv::Mat &mat, MattingContent &content);
/**
* Video Matting Using RVM(https://github.com/PeterL1n/RobustVideoMatting)
* @param video_path: eg. xxx/xxx/input.mp4
* @param output_path: eg. xxx/xxx/output.mp4
* @param contents: vector of MattingContent to catch the detected results.
* @param save_contents: false by default, whether to save MattingContent.
* See https://github.com/PeterL1n/RobustVideoMatting/blob/master/documentation/inference_zh_Hans.md
* @param writer_fps: FPS for VideoWriter, 20 by default.
*/
void detect_video(const std::string &video_path,
const std::string &output_path,
std::vector<MattingContent> &contents,
bool save_contents = false,
unsigned int writer_fps = 20);
- C++测试demo
static void test_ncnn_rvm()
{
// UNLUCKY: Test Failed!
std::string param_path = "rvm_mobilenetv3_fp32-480-480-opt.param";
std::string bin_path = "rvm_mobilenetv3_fp32-480-480-opt.bin";
std::string video_path = "test_rvm_1.mp4";
std::string output_path = "test_rvm_1_ncnn.mp4";
auto *rvm = new NCNNRobustVideoMatting(param_path, bin_path, 1, 480, 480, 0); // 1 threads
td::vector<MattingContent> contents;
// 1. video matting.
rvm->detect_video(video_path, output_path, contents, false);
delete rvm;
}
int main(__unused int argc, __unused char *argv[])
{
test_ncnn_rvm();
return 0;
}
非常感谢~
3条答案
按热度按时间mrzz3bfm1#
我也遇到了相似的问题,也卡了两三天没有解决了呜呜呜呜呜,但是好像直接extract返回值都得不到,ex.input是正常的0,安卓的log里面显示
2021-10-12 22:21:43.834 19393-19393/? A/DEBUG: signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0 2021-10-12 22:21:43.834 19393-19393/? A/DEBUG: Cause: null pointer dereference
希望来蹭一蹭能够得到解决!
3j86kqsm2#
同样遇到这个问题,有解决方案吗?或者解决的方向?
jv4diomz3#
遇到了同样的问题,extract出来的值是nan,而且我的模型有2个输出值,net.output_names()只能输出一个name