为使您的问题得到快速解决,在建立Issue前,请您先通过如下方式搜索是否有相似问题:【搜索issue关键字】【使用labels筛选】【官方文档】
如果您没有查询到相似问题,为快速解决您的提问,建立issue时请提供如下细节信息:
- 标题:简洁、精准描述您的问题,例如“最新预测库的API文档在哪儿 ”
- 版本、环境信息:
1)PaddlePaddle版本:1.8
2)CPU:
3)GPU:
Nvidia T4
Driver API Version: 10.2,
Runtime API Version: 10.2
CUDNN 7.6.5
TensorRT:6.0.1.8
4)系统环境:Ubuntu
-预测信息
1)C++预测:请您提供预测库安装包的版本信息,及其中的version.txt文件
2)CMake包含路径的完整命令
3)API信息(如调用请提供)
4)预测库来源:官网下载/特殊环境(如BCLOUD编译)
- 复现信息:
- 问题描述:请详细描述您的问题,同步贴出报错信息、日志/代码关键片段
通过1.8分支编译的预测库,然后用trt跑bert的模型,在batch size =8 的时候报错,bs=1、2、4都是可以跑过的
想问一下应该怎么改,然后就是咱们有没有tensorRT动态图或者是bert的demo
附代码
include <gflags/gflags.h>
#include <glog/logging.h>
#include
#include
#include
#include
#include
#include "paddle/include/paddle_inference_api.h"
namespace paddle {
using paddle::AnalysisConfig;
DEFINE_string(dirname, "./bert", "Directory of the inference model.");
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast(diff);
return counter.count() / 1000.0;
}
std::vectorstd::string split(const std::string& s, char seperator) {
std::vectorstd::string output;
std::string::size_type prev_pos = 0, pos = 0;
while ((pos = s.find(seperator, pos)) != std::string::npos) {
std::string substring(s.substr(prev_pos, pos-prev_pos));
output.push_back(substring);
prev_pos = ++pos;
}
output.push_back(s.substr(prev_pos, pos-prev_pos)); // Last word
return output;
}
void PrepareTRTConfig(AnalysisConfig *config, int batch_size) {
config->SetModel(FLAGS_dirname + "/model",
FLAGS_dirname + "/params");
config->EnableUseGpu(100, 0);
// We use ZeroCopyTensor here, so we set config->SwitchUseFeedFetchOps(false)
config->SwitchUseFeedFetchOps(false);
config->EnableTensorRtEngine(1 << 30, batch_size, 32, AnalysisConfig::Precision::kFloat32, true, false);
std::map<std::string, std::vector> min_input_shape = {{"src_ids",{1,1,1}}, {"pos_ids", {1,1,1}}, {"sent_ids", {1,1,1}}, {"input_mask", {1,1,1}} };
std::map<std::string, std::vector> max_input_shape = {{"src_ids",{1024, 128, 1}}, {"pos_ids", {1024, 128, 1}}, {"sent_ids", {1024, 128, 1}}, {"input_mask", {1024, 128,1}}};
std::map<std::string, std::vector> opt_input_shape = {{"src_ids",{16, 64, 1}}, {"pos_ids", {16, 64, 1}}, {"sent_ids", {16, 64, 1}}, {"input_mask", {16, 64, 1}}};
config->SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, opt_input_shape);
}
bool test_map_cnn(int batch_size, int repeat) {
AnalysisConfig config;
PrepareTRTConfig(&config, batch_size);
auto predictor = CreatePaddlePredictor(config);
int input_num = 128 * batch_size;
// prepare inputs
int *input_src_ids = new int[input_num];
memset(input_src_ids, 0, input_num * sizeof(int));
int *input_pos_ids = new int[input_num];
memset(input_pos_ids, 0, input_num * sizeof(int));
int *input_sent_ids = new int[input_num];
memset(input_sent_ids, 0, input_num * sizeof(int));
float *input_input_mask = new float[input_num];
memset(input_input_mask, 0, input_num * sizeof(float));
//int input_golden_label = new int[batch_size];
//memset(input_golden_label, 0, batch_sizesizeof(int));
// wzx
auto input_names = predictor->GetInputNames();
auto input_1 = predictor->GetInputTensor(input_names[0]);
auto input_2 = predictor->GetInputTensor(input_names[1]);
auto input_3 = predictor->GetInputTensor(input_names[2]);
auto input_4 = predictor->GetInputTensor(input_names[3]);
input_1->Reshape({batch_size, 128, 1});
input_1->copy_from_cpu(input_src_ids);
input_2->Reshape({batch_size, 128, 1});
input_2->copy_from_cpu(input_pos_ids);
input_3->Reshape({batch_size, 128, 1});
input_3->copy_from_cpu(input_sent_ids);
input_4->Reshape({batch_size, 128, 1});
input_4->copy_from_cpu(input_input_mask);
// run
auto time1 = time();
for (size_t i = 0; i < repeat; i++) {
CHECK(predictor->ZeroCopyRun());
}
auto time2 = time();
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 1000.0 << "ms" << std::endl;
// get the output
std::vector out_data;
return true;
}
} // namespace paddle
int main() {
for (int i = 2; i < 5; i++) {
paddle::test_map_cnn(1 << i, 1000);
}
return 0;
}
暂无答案!
目前还没有任何答案,快来回答吧!