用paddle1.5.2 GPU版本训练,每100次训练时间输出,从一开始的70秒,不断增加到500秒
dkqlctbz1#
请问你具体的训练参数是怎么设置的呢
jum4pzuy2#
我用的PyReader,设置的参数batch_size=2,capacity=16,iterable=True, use_double_buffer=Trueoptimizer选择adam,program直接用的default的
ycggw6v23#
可以查看一下对应的显存变化情况么,看显存是否也在变化
pxq42qpu4#
显存占用没有变化(28807Mib/32480Mib),但是占用很高,GPU使用率不断变化
js5cn81o5#
这个可能需要查看下你的代码,看下是不是哪里有增加计算量的行为
mpbci0fu6#
网络训练主体如下:
def main(): # define logs... view_l = fluid.layers.data(name='view_l', shape=[3, 384, 768], dtype='float32') view_r = fluid.layers.data(name='view_r', shape=[3, 384, 768], dtype='float32') disp_l = fluid.layers.data(name='disp_l', shape=[1, 384, 768], dtype='float32') model = iResNet(args.max_disp) predict_final, r_res2_predict, r_res1_predict, r_res0 = model(view_l, view_r) avg_loss = model_loss(predict_final, r_res2_predict, r_res1_predict, r_res0, disp_l, args.max_disp) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() data_src = DataSource(args.data_base, args.data_list, batch_size=2, target_h=384, target_w=768)() train_reader = fluid.io.PyReader( feed_list=[view_l, view_r, disp_l], capacity=16, iterable=True, use_double_buffer=True) train_reader.decorate_sample_list_generator(data_src, places=[place]) adam_optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=1e-4, decay_steps=20000, decay_rate=0.8, staircase=True), beta1=0.9, beta2=0.999 ) adam_optimizer.minimize(avg_loss) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) main_program = fluid.default_main_program() step = 0 train_time = 0 for epoc in range(args.num_epochs): for data_id, data_train in enumerate(train_reader()): train_time -= time.time() loss_val, disp_val = exe.run( main_program, feed=data_train, fetch_list=[avg_loss, predict_final + r_res0], return_numpy=True) train_time += time.time() loss_scalar.add_record(step, loss_val[0]) step += 1 if data_id % 100 == 0: log.info('epoch = {0}, id = {1}, train used time {2:-0.2f}s loss = {3}'.format( epoc, data_id, train_time, loss_val[0])) train_time = 0 # save tmp images... fluid.io.save_inference_model('./etc/models', feeded_var_names=['view_l', 'view_r'], target_vars=[predict_final, r_res2_predict, r_res1_predict, r_res0], executor=exe)
网络结构定义:
class iResNet(object): def __init__(self, max_disp): self.dsize = max_disp def __call__(self, view_l, view_r, *args,**kwargs): with fluid.unique_name.guard('stem/'): conv1a, conv2a, conv_temp4a, up_1a2a = self.stem_block(view_l) with fluid.unique_name.guard('stem/'): conv1b, conv2b, conv_temp4b, up_1b2b = self.stem_block(view_r) with fluid.unique_name.guard('des/'): pred_final = self.estimation_sub_network(conv1a, conv2a, conv_temp4a, conv_temp4b, up_1a2a) with fluid.unique_name.guard('drs/'): r_res2_pred, r_res1_pred, r_res0 = self.refinement_sub_network(conv1a, conv1b, up_1a2a, up_1b2b, pred_final) return pred_final, r_res2_pred, r_res1_pred, r_res0 def stem_block(self, input): conv1a = conv2d_leaky_relu(input, num_filters=64, filter_size=7, stride=2, padding=3, name='conv1') up_1a = deconv2d_leaky_relu(conv1a, num_filters=32, filter_size=4, stride=2, padding=1, name='up_1') conv2a = conv2d_leaky_relu(conv1a, num_filters=128, filter_size=5, stride=2, padding=2, name='conv2') conv_temp1a = conv2d_leaky_relu(conv2a, num_filters=256, filter_size=3, stride=2, padding=1, name='conv_temp1') conv_temp2a = conv2d_leaky_relu(conv_temp1a, num_filters=256, filter_size=3, stride=1, padding=1, name='conv_temp2') conv_temp3a = deconv2d_leaky_relu(conv_temp2a, num_filters=128, filter_size=4, stride=2, padding=1, name='conv_temp3') conv_temp4a = fluid.layers.concat(input=[conv_temp3a, conv2a], axis=1, name=None) up_2a = deconv2d_leaky_relu(conv_temp4a, num_filters=32, filter_size=8, stride=4, padding=2, name='up_2') conv_temp5a = fluid.layers.concat(input=[up_1a, up_2a], axis=1, name=None) up_1a2a = deconv2d_leaky_relu(conv_temp5a, num_filters=32, filter_size=1, stride=1, padding=0, name='up_12') return conv1a, conv2a, conv_temp4a, up_1a2a def estimation_sub_network(self, conv1a, conv2a, conv_temp4a, conv_temp4b, up_1a2a): corr1d = self.build_cost_volume(conv_temp4a, conv_temp4b, self.dsize // 4) conv_redir = conv2d_leaky_relu(conv2a, num_filters=64, filter_size=1, stride=1, padding=0) conv3_input = fluid.layers.concat([corr1d, conv_redir], axis=1) conv3 = conv2d_leaky_relu(conv3_input, num_filters=256, filter_size=5, stride=2, padding=2) conv3_1 = conv2d_leaky_relu(conv3, num_filters=256, filter_size=3, stride=1, padding=1) conv4 = conv2d_leaky_relu(conv3_1, num_filters=512, filter_size=3, stride=2, padding=1) conv4_1 = conv2d_leaky_relu(conv4, num_filters=512, filter_size=3, stride=1, padding=1) conv5 = conv2d_leaky_relu(conv4_1, num_filters=512, filter_size=3, stride=2, padding=1) conv5_1 = conv2d_leaky_relu(conv5, num_filters=512, filter_size=3, stride=1, padding=1) conv6 = conv2d_leaky_relu(conv5_1, num_filters=1024, filter_size=3, stride=2, padding=1) conv6_1 = conv2d_leaky_relu(conv6, num_filters=1024, filter_size=3, stride=1, padding=1) predict6 = conv2d(conv6_1, num_filters=1, filter_size=3, stride=1, padding=1) # disp6 = fluid.layers.relu(predict6) disp6_up = deconv2d(predict6, num_filters=1, filter_size=4, stride=2, padding=1) uconv5 = deconv2d_leaky_relu(conv6_1, num_filters=512, filter_size=4, stride=2, padding=1) iconv5_input = fluid.layers.concat([uconv5, disp6_up, conv5_1], axis=1) iconv5 = conv2d(iconv5_input, num_filters=512, filter_size=3, stride=1, padding=1) predict5 = conv2d(iconv5, num_filters=1, filter_size=3, stride=1, padding=1) # disp5 = fluid.layers.relu(predict5) disp5_up = deconv2d(predict5, num_filters=1, filter_size=4, stride=2, padding=1) uconv4 = deconv2d_leaky_relu(iconv5, num_filters=256, filter_size=4, stride=2, padding=1) iconv4_input = fluid.layers.concat([uconv4, disp5_up, conv4_1], axis=1) iconv4 = conv2d(iconv4_input, num_filters=256, filter_size=3, stride=1, padding=1) predict4 = conv2d(iconv4, num_filters=1, filter_size=3, stride=1, padding=1) # disp4 = fluid.layers.relu(predict4) disp4_up = deconv2d(predict4, num_filters=1, filter_size=4, stride=2, padding=1) uconv3 = deconv2d_leaky_relu(iconv4, num_filters=128, filter_size=4, stride=2, padding=1) iconv3_input = fluid.layers.concat([uconv3, disp4_up, conv3_1], axis=1) iconv3 = conv2d(iconv3_input, num_filters=128, filter_size=3, stride=1, padding=1) predict3 = conv2d(iconv3, num_filters=1, filter_size=3, stride=1, padding=1) # disp3 = fluid.layers.relu(predict3) disp3_up = deconv2d(predict3, num_filters=1, filter_size=4, stride=2, padding=1) uconv2 = deconv2d_leaky_relu(iconv3, num_filters=64, filter_size=4, stride=2, padding=1) # 64 + 1 + 256 = 321 iconv2_input = fluid.layers.concat([uconv2, disp3_up, conv_temp4a], axis=1) iconv2 = conv2d(iconv2_input, num_filters=64, filter_size=3, stride=1, padding=1) predict2 = conv2d(iconv2, num_filters=1, filter_size=3, stride=1, padding=1) # disp2 = fluid.layers.relu(predict2) disp2_up = deconv2d(predict2, num_filters=1, filter_size=4, stride=2, padding=1) uconv1 = deconv2d_leaky_relu(iconv2, num_filters=32, filter_size=4, stride=2, padding=1) iconv1_input = fluid.layers.concat([uconv1, disp2_up, conv1a], axis=1) iconv1 = conv2d(iconv1_input, num_filters=32, filter_size=3, stride=1, padding=1) predict1 = conv2d(iconv1, num_filters=1, filter_size=3, stride=1, padding=1) # disp1 = fluid.layers.relu(predict1) disp1_up = deconv2d(predict1, num_filters=1, filter_size=4, stride=2, padding=1) uconv0 = deconv2d_leaky_relu(iconv1, num_filters=32, filter_size=4, stride=2, padding=1) iconv0_input = fluid.layers.concat([uconv0, disp1_up, up_1a2a], axis=1) iconv0 = conv2d(iconv0_input, num_filters=32, filter_size=3, stride=1, padding=1) predict0 = conv2d(iconv0, num_filters=1, filter_size=3, stride=1, padding=1) # disp0 = fluid.layers.relu(predict0) predict6_0 = deconv2d(predict6, num_filters=1, filter_size=128, stride=64, padding=32) predict5_0 = deconv2d(predict5, num_filters=1, filter_size=64, stride=32, padding=16) predict4_0 = deconv2d(predict4, num_filters=1, filter_size=32, stride=16, padding=8) predict3_0 = deconv2d(predict3, num_filters=1, filter_size=16, stride=8, padding=4) predict2_0 = deconv2d(predict2, num_filters=1, filter_size=8, stride=4, padding=2) predict1_0 = deconv2d(predict1, num_filters=1, filter_size=4, stride=2, padding=1) predict_concat = fluid.layers.concat( [predict6_0, predict5_0, predict4_0, predict3_0, predict2_0, predict1_0, predict0], axis=1) predict_final = conv2d(predict_concat, num_filters=1, filter_size=1, stride=1, padding=0) return predict_final def refinement_sub_network(self, conv1a, conv1b, up_1a2a, up_1b2b, pred_final): transformer = SpatialTransformer() minus_predict_final = -1 * pred_final w_up_1b2b = transformer(up_1b2b, minus_predict_final) r_conv0_input = fluid.layers.concat([fluid.layers.abs(up_1a2a - w_up_1b2b), pred_final, up_1a2a], axis=1) # 1 r_conv0 = conv2d_leaky_relu(r_conv0_input, num_filters=32, filter_size=3, stride=1, padding=1) # 1 r_conv1 = conv2d_leaky_relu(r_conv0, num_filters=64, filter_size=3, stride=2, padding=1) # 1 / 2 c_conv1a = conv2d_leaky_relu(conv1a, num_filters=16, filter_size=3, stride=1, padding=1, name='c_conv1ab') # 1 / 2 c_conv1b = conv2d_leaky_relu(conv1b, num_filters=16, filter_size=3, stride=1, padding=1, name='c_conv1ab') # 1 / 2 r_corr = self.build_cost_volume(c_conv1a, c_conv1b, self.dsize // 5) r_conv1_1_input = fluid.layers.concat([r_conv1, r_corr], axis=1) r_conv1_1 = conv2d_leaky_relu(r_conv1_1_input, num_filters=64, filter_size=3, stride=1, padding=1) r_conv2 = conv2d_leaky_relu(r_conv1_1, num_filters=128, filter_size=3, stride=2, padding=1) r_conv2_1 = conv2d_leaky_relu(r_conv2, num_filters=128, filter_size=3, stride=1, padding=1) r_res2_predict = conv2d(r_conv2_1, num_filters=1, filter_size=3, stride=1, padding=1) r_res2_up = deconv2d(r_res2_predict, num_filters=1, filter_size=4, stride=2, padding=1) r_uconv1 = deconv2d_leaky_relu(r_conv2_1, num_filters=64, filter_size=4, stride=2, padding=1) r_iconv1_input = fluid.layers.concat([r_uconv1, r_res2_up, r_conv1_1], axis=1) r_iconv1 = conv2d(r_iconv1_input, num_filters=64, filter_size=3, stride=1, padding=1) r_res1_predict = conv2d(r_iconv1, num_filters=1, filter_size=3, stride=1, padding=1) r_res1_up = deconv2d(r_res1_predict, num_filters=1, filter_size=4, stride=2, padding=1) r_uconv0 = deconv2d_leaky_relu(r_iconv1, num_filters=32, filter_size=4, stride=2, padding=1) r_iconv0_input = fluid.layers.concat([r_uconv0, r_res1_up, r_conv0], axis=1) r_iconv0 = conv2d(r_iconv0_input, num_filters=32, filter_size=3, stride=1, padding=1) r_res0 = conv2d(r_iconv0, num_filters=1, filter_size=3, stride=1, padding=1) return r_res2_predict, r_res1_predict, r_res0 def build_cost_volume(self, feat_map_l, feat_map_r, d_size): """ build cost volume :param feat_map_l: left feature map :param feat_map_r: right feature map :param d_size: disparity size :return: cost volume """ lyrs = list() corr = fluid.layers.reduce_mean(fluid.layers.elementwise_mul(feat_map_l, feat_map_r), dim=1) lyrs.append(corr) for d in range(d_size + 1): # moving the features by disparity d can be done by padding zeros shifted = fluid.layers.pad(feat_map_r[:, :, :, :-1 - d], paddings=[0, 0, 0, 0, 0, 0, d + 1, 0]) corr = fluid.layers.reduce_mean(fluid.layers.elementwise_mul(feat_map_l, shifted), dim=1) lyrs.append(corr) volume = fluid.layers.stack(lyrs, axis=1) return volume
vlurs2pr7#
你好,该代码难以复现,能提供较为完整的代码么
i86rm4rw8#
https://github.com/yanyq1990/iresnet-paddle.git
b1zrtrql9#
问题已解决,refinement_sub_network中的c_conv1a、c_conv1b的name一样,改成不同的时间就不会增长
9条答案
按热度按时间dkqlctbz1#
请问你具体的训练参数是怎么设置的呢
jum4pzuy2#
请问你具体的训练参数是怎么设置的呢
我用的PyReader,设置的参数batch_size=2,capacity=16,iterable=True, use_double_buffer=True
optimizer选择adam,program直接用的default的
ycggw6v23#
可以查看一下对应的显存变化情况么,看显存是否也在变化
pxq42qpu4#
可以查看一下对应的显存变化情况么,看显存是否也在变化
显存占用没有变化(28807Mib/32480Mib),但是占用很高,GPU使用率不断变化
js5cn81o5#
这个可能需要查看下你的代码,看下是不是哪里有增加计算量的行为
mpbci0fu6#
这个可能需要查看下你的代码,看下是不是哪里有增加计算量的行为
网络训练主体如下:
网络结构定义:
vlurs2pr7#
你好,该代码难以复现,能提供较为完整的代码么
i86rm4rw8#
你好,该代码难以复现,能提供较为完整的代码么
https://github.com/yanyq1990/iresnet-paddle.git
b1zrtrql9#
问题已解决,refinement_sub_network中的c_conv1a、c_conv1b的name一样,改成不同的时间就不会增长