Paddle paddle-trt bert 模型多batchsize 出错

jq6vz3qz  于 2021-12-07  发布在  Java
关注(0)|答案(0)|浏览(207)

为使您的问题得到快速解决,在建立Issue前,请您先通过如下方式搜索是否有相似问题:【搜索issue关键字】【使用labels筛选】【官方文档】

如果您没有查询到相似问题,为快速解决您的提问,建立issue时请提供如下细节信息:

  • 标题:简洁、精准描述您的问题,例如“最新预测库的API文档在哪儿 ”
  • 版本、环境信息:

   1)PaddlePaddle版本:1.8
   2)CPU:
   3)GPU:
Nvidia T4
Driver API Version: 10.2,
Runtime API Version: 10.2
CUDNN 7.6.5
TensorRT:6.0.1.8
   4)系统环境:Ubuntu

-预测信息
   1)C++预测:请您提供预测库安装包的版本信息,及其中的version.txt文件
   2)CMake包含路径的完整命令
   3)API信息(如调用请提供)
   4)预测库来源:官网下载/特殊环境(如BCLOUD编译)

  • 复现信息:
  • 问题描述:请详细描述您的问题,同步贴出报错信息、日志/代码关键片段

通过1.8分支编译的预测库,然后用trt跑bert的模型,在batch size =8 的时候报错,bs=1、2、4都是可以跑过的

想问一下应该怎么改,然后就是咱们有没有tensorRT动态图或者是bert的demo

附代码

include <gflags/gflags.h>

#include <glog/logging.h>
#include
#include
#include
#include
#include
#include "paddle/include/paddle_inference_api.h"

namespace paddle {
using paddle::AnalysisConfig;

DEFINE_string(dirname, "./bert", "Directory of the inference model.");
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast(diff);
return counter.count() / 1000.0;
}

std::vectorstd::string split(const std::string& s, char seperator) {
std::vectorstd::string output;
std::string::size_type prev_pos = 0, pos = 0;
while ((pos = s.find(seperator, pos)) != std::string::npos) {
std::string substring(s.substr(prev_pos, pos-prev_pos));
output.push_back(substring);
prev_pos = ++pos;
}
output.push_back(s.substr(prev_pos, pos-prev_pos)); // Last word
return output;
}

void PrepareTRTConfig(AnalysisConfig *config, int batch_size) {
config->SetModel(FLAGS_dirname + "/model",
FLAGS_dirname + "/params");
config->EnableUseGpu(100, 0);
// We use ZeroCopyTensor here, so we set config->SwitchUseFeedFetchOps(false)
config->SwitchUseFeedFetchOps(false);
config->EnableTensorRtEngine(1 << 30, batch_size, 32, AnalysisConfig::Precision::kFloat32, true, false);
std::map<std::string, std::vector> min_input_shape = {{"src_ids",{1,1,1}}, {"pos_ids", {1,1,1}}, {"sent_ids", {1,1,1}}, {"input_mask", {1,1,1}} };
std::map<std::string, std::vector> max_input_shape = {{"src_ids",{1024, 128, 1}}, {"pos_ids", {1024, 128, 1}}, {"sent_ids", {1024, 128, 1}}, {"input_mask", {1024, 128,1}}};
std::map<std::string, std::vector> opt_input_shape = {{"src_ids",{16, 64, 1}}, {"pos_ids", {16, 64, 1}}, {"sent_ids", {16, 64, 1}}, {"input_mask", {16, 64, 1}}};
config->SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, opt_input_shape);
}

bool test_map_cnn(int batch_size, int repeat) {
AnalysisConfig config;
PrepareTRTConfig(&config, batch_size);
auto predictor = CreatePaddlePredictor(config);

int input_num = 128 * batch_size;
// prepare inputs
int *input_src_ids = new int[input_num];
memset(input_src_ids, 0, input_num * sizeof(int));
int *input_pos_ids = new int[input_num];
memset(input_pos_ids, 0, input_num * sizeof(int));
int *input_sent_ids = new int[input_num];
memset(input_sent_ids, 0, input_num * sizeof(int));
float *input_input_mask = new float[input_num];
memset(input_input_mask, 0, input_num * sizeof(float));
//int input_golden_label = new int[batch_size];
//memset(input_golden_label, 0, batch_size
sizeof(int));
// wzx
auto input_names = predictor->GetInputNames();

auto input_1 = predictor->GetInputTensor(input_names[0]);
auto input_2 = predictor->GetInputTensor(input_names[1]);
auto input_3 = predictor->GetInputTensor(input_names[2]);
auto input_4 = predictor->GetInputTensor(input_names[3]);
input_1->Reshape({batch_size, 128, 1});
input_1->copy_from_cpu(input_src_ids);
input_2->Reshape({batch_size, 128, 1});
input_2->copy_from_cpu(input_pos_ids);
input_3->Reshape({batch_size, 128, 1});
input_3->copy_from_cpu(input_sent_ids);
input_4->Reshape({batch_size, 128, 1});
input_4->copy_from_cpu(input_input_mask);

// run
auto time1 = time();
for (size_t i = 0; i < repeat; i++) {
CHECK(predictor->ZeroCopyRun());
}
auto time2 = time();
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 1000.0 << "ms" << std::endl;

// get the output
std::vector out_data;

return true;
}
} // namespace paddle

int main() {
for (int i = 2; i < 5; i++) {
paddle::test_map_cnn(1 << i, 1000);
}
return 0;
}

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题