Paddle 训练过程中训练时间不断增加

5kgi1eie  于 2022-10-23  发布在  其他
关注(0)|答案(9)|浏览(245)

用paddle1.5.2 GPU版本训练,每100次训练时间输出,从一开始的70秒,不断增加到500秒

dkqlctbz

dkqlctbz1#

请问你具体的训练参数是怎么设置的呢

jum4pzuy

jum4pzuy2#

请问你具体的训练参数是怎么设置的呢

我用的PyReader,设置的参数batch_size=2,capacity=16,iterable=True, use_double_buffer=True
optimizer选择adam,program直接用的default的

ycggw6v2

ycggw6v23#

可以查看一下对应的显存变化情况么,看显存是否也在变化

pxq42qpu

pxq42qpu4#

可以查看一下对应的显存变化情况么,看显存是否也在变化

显存占用没有变化(28807Mib/32480Mib),但是占用很高,GPU使用率不断变化

js5cn81o

js5cn81o5#

这个可能需要查看下你的代码,看下是不是哪里有增加计算量的行为

mpbci0fu

mpbci0fu6#

这个可能需要查看下你的代码,看下是不是哪里有增加计算量的行为

网络训练主体如下:

def main():
    # define logs...

    view_l = fluid.layers.data(name='view_l', shape=[3, 384, 768], dtype='float32')
    view_r = fluid.layers.data(name='view_r', shape=[3, 384, 768], dtype='float32')
    disp_l = fluid.layers.data(name='disp_l', shape=[1, 384, 768], dtype='float32')

    model = iResNet(args.max_disp)
    predict_final, r_res2_predict, r_res1_predict, r_res0 = model(view_l, view_r)
    avg_loss = model_loss(predict_final, r_res2_predict, r_res1_predict, r_res0, disp_l, args.max_disp)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()

    data_src = DataSource(args.data_base, args.data_list, batch_size=2, target_h=384, target_w=768)()
    train_reader = fluid.io.PyReader(
        feed_list=[view_l, view_r, disp_l], capacity=16, iterable=True, use_double_buffer=True)
    train_reader.decorate_sample_list_generator(data_src, places=[place])

    adam_optimizer = fluid.optimizer.Adam(
        learning_rate=fluid.layers.exponential_decay(
            learning_rate=1e-4,
            decay_steps=20000,
            decay_rate=0.8,
            staircase=True),
        beta1=0.9,
        beta2=0.999
    )

    adam_optimizer.minimize(avg_loss)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
    main_program = fluid.default_main_program()

    step = 0

    train_time = 0

    for epoc in range(args.num_epochs):

        for data_id, data_train in enumerate(train_reader()):

            train_time -= time.time()
            loss_val, disp_val = exe.run(
                main_program,
                feed=data_train,
                fetch_list=[avg_loss, predict_final + r_res0],
                return_numpy=True)
            train_time += time.time()

            loss_scalar.add_record(step, loss_val[0])
            step += 1

            if data_id % 100 == 0:
                log.info('epoch = {0}, id = {1}, train used time {2:-0.2f}s loss = {3}'.format(
                    epoc, data_id, train_time, loss_val[0]))
                train_time = 0

             # save tmp images...

        fluid.io.save_inference_model('./etc/models',
                                      feeded_var_names=['view_l', 'view_r'],
                                      target_vars=[predict_final, r_res2_predict, r_res1_predict, r_res0],
                                      executor=exe)

网络结构定义:

class iResNet(object):
    def __init__(self, max_disp):
        self.dsize = max_disp

    def __call__(self, view_l, view_r, *args,**kwargs):
        with fluid.unique_name.guard('stem/'):
            conv1a, conv2a, conv_temp4a, up_1a2a = self.stem_block(view_l)
        with fluid.unique_name.guard('stem/'):
            conv1b, conv2b, conv_temp4b, up_1b2b = self.stem_block(view_r)

        with fluid.unique_name.guard('des/'):
            pred_final = self.estimation_sub_network(conv1a, conv2a, conv_temp4a, conv_temp4b, up_1a2a)
        with fluid.unique_name.guard('drs/'):
            r_res2_pred, r_res1_pred, r_res0 = self.refinement_sub_network(conv1a, conv1b, up_1a2a, up_1b2b, pred_final)

        return pred_final, r_res2_pred, r_res1_pred, r_res0

    def stem_block(self, input):
        conv1a = conv2d_leaky_relu(input, num_filters=64, filter_size=7, stride=2, padding=3, name='conv1') 
        up_1a = deconv2d_leaky_relu(conv1a, num_filters=32, filter_size=4, stride=2, padding=1, name='up_1') 
        conv2a = conv2d_leaky_relu(conv1a, num_filters=128, filter_size=5, stride=2, padding=2, name='conv2')
        conv_temp1a = conv2d_leaky_relu(conv2a, num_filters=256, filter_size=3, stride=2, padding=1, name='conv_temp1') 
        conv_temp2a = conv2d_leaky_relu(conv_temp1a, num_filters=256, filter_size=3, stride=1, padding=1, name='conv_temp2')
        conv_temp3a = deconv2d_leaky_relu(conv_temp2a, num_filters=128, filter_size=4, stride=2, padding=1, name='conv_temp3')

        conv_temp4a = fluid.layers.concat(input=[conv_temp3a, conv2a], axis=1, name=None)
        up_2a = deconv2d_leaky_relu(conv_temp4a, num_filters=32, filter_size=8, stride=4, padding=2, name='up_2')

        conv_temp5a = fluid.layers.concat(input=[up_1a, up_2a], axis=1, name=None)
        up_1a2a = deconv2d_leaky_relu(conv_temp5a, num_filters=32, filter_size=1, stride=1, padding=0, name='up_12')

        return conv1a, conv2a, conv_temp4a, up_1a2a

    def estimation_sub_network(self, conv1a, conv2a, conv_temp4a, conv_temp4b, up_1a2a):
        corr1d = self.build_cost_volume(conv_temp4a, conv_temp4b, self.dsize // 4)

        conv_redir = conv2d_leaky_relu(conv2a, num_filters=64, filter_size=1, stride=1, padding=0)
        conv3_input = fluid.layers.concat([corr1d, conv_redir], axis=1)
        conv3 = conv2d_leaky_relu(conv3_input, num_filters=256, filter_size=5, stride=2, padding=2)
        conv3_1 = conv2d_leaky_relu(conv3, num_filters=256, filter_size=3, stride=1, padding=1)
        conv4 = conv2d_leaky_relu(conv3_1, num_filters=512, filter_size=3, stride=2, padding=1)
        conv4_1 = conv2d_leaky_relu(conv4, num_filters=512, filter_size=3, stride=1, padding=1)
        conv5 = conv2d_leaky_relu(conv4_1, num_filters=512, filter_size=3, stride=2, padding=1)
        conv5_1 = conv2d_leaky_relu(conv5, num_filters=512, filter_size=3, stride=1, padding=1)
        conv6 = conv2d_leaky_relu(conv5_1, num_filters=1024, filter_size=3, stride=2, padding=1)
        conv6_1 = conv2d_leaky_relu(conv6, num_filters=1024, filter_size=3, stride=1, padding=1)
        predict6 = conv2d(conv6_1, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp6 = fluid.layers.relu(predict6)
        disp6_up = deconv2d(predict6, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv5 = deconv2d_leaky_relu(conv6_1, num_filters=512, filter_size=4, stride=2, padding=1)
        iconv5_input = fluid.layers.concat([uconv5, disp6_up, conv5_1], axis=1)
        iconv5 = conv2d(iconv5_input, num_filters=512, filter_size=3, stride=1, padding=1)
        predict5 = conv2d(iconv5, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp5 = fluid.layers.relu(predict5)
        disp5_up = deconv2d(predict5, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv4 = deconv2d_leaky_relu(iconv5, num_filters=256, filter_size=4, stride=2, padding=1)
        iconv4_input = fluid.layers.concat([uconv4, disp5_up, conv4_1], axis=1)
        iconv4 = conv2d(iconv4_input, num_filters=256, filter_size=3, stride=1, padding=1)
        predict4 = conv2d(iconv4, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp4 = fluid.layers.relu(predict4)
        disp4_up = deconv2d(predict4, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv3 = deconv2d_leaky_relu(iconv4, num_filters=128, filter_size=4, stride=2, padding=1)
        iconv3_input = fluid.layers.concat([uconv3, disp4_up, conv3_1], axis=1)
        iconv3 = conv2d(iconv3_input, num_filters=128, filter_size=3, stride=1, padding=1)
        predict3 = conv2d(iconv3, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp3 = fluid.layers.relu(predict3)
        disp3_up = deconv2d(predict3, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv2 = deconv2d_leaky_relu(iconv3, num_filters=64, filter_size=4, stride=2, padding=1)

        # 64 + 1 + 256 = 321
        iconv2_input = fluid.layers.concat([uconv2, disp3_up, conv_temp4a], axis=1)
        iconv2 = conv2d(iconv2_input, num_filters=64, filter_size=3, stride=1, padding=1)
        predict2 = conv2d(iconv2, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp2 = fluid.layers.relu(predict2)
        disp2_up = deconv2d(predict2, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv1 = deconv2d_leaky_relu(iconv2, num_filters=32, filter_size=4, stride=2, padding=1)
        iconv1_input = fluid.layers.concat([uconv1, disp2_up, conv1a], axis=1)
        iconv1 = conv2d(iconv1_input, num_filters=32, filter_size=3, stride=1, padding=1)
        predict1 = conv2d(iconv1, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp1 = fluid.layers.relu(predict1)
        disp1_up = deconv2d(predict1, num_filters=1, filter_size=4, stride=2, padding=1)
        uconv0 = deconv2d_leaky_relu(iconv1, num_filters=32, filter_size=4, stride=2, padding=1)
        iconv0_input = fluid.layers.concat([uconv0, disp1_up, up_1a2a], axis=1)
        iconv0 = conv2d(iconv0_input, num_filters=32, filter_size=3, stride=1, padding=1)
        predict0 = conv2d(iconv0, num_filters=1, filter_size=3, stride=1, padding=1)
        # disp0 = fluid.layers.relu(predict0)
        predict6_0 = deconv2d(predict6, num_filters=1, filter_size=128, stride=64, padding=32)
        predict5_0 = deconv2d(predict5, num_filters=1, filter_size=64, stride=32, padding=16)
        predict4_0 = deconv2d(predict4, num_filters=1, filter_size=32, stride=16, padding=8)
        predict3_0 = deconv2d(predict3, num_filters=1, filter_size=16, stride=8, padding=4)              
        predict2_0 = deconv2d(predict2, num_filters=1, filter_size=8, stride=4, padding=2)              
        predict1_0 = deconv2d(predict1, num_filters=1, filter_size=4, stride=2, padding=1)             
        predict_concat = fluid.layers.concat(
            [predict6_0, predict5_0, predict4_0, predict3_0, predict2_0, predict1_0, predict0], axis=1) 
        predict_final = conv2d(predict_concat, num_filters=1, filter_size=1, stride=1, padding=0)       

        return predict_final

    def refinement_sub_network(self, conv1a, conv1b, up_1a2a, up_1b2b, pred_final):
        transformer = SpatialTransformer()
        minus_predict_final = -1 * pred_final
        w_up_1b2b = transformer(up_1b2b, minus_predict_final)
        r_conv0_input = fluid.layers.concat([fluid.layers.abs(up_1a2a - w_up_1b2b), pred_final, up_1a2a], axis=1)           # 1
        r_conv0 = conv2d_leaky_relu(r_conv0_input, num_filters=32, filter_size=3, stride=1, padding=1)                      # 1
        r_conv1 = conv2d_leaky_relu(r_conv0, num_filters=64, filter_size=3, stride=2, padding=1)                            # 1 / 2
        c_conv1a = conv2d_leaky_relu(conv1a, num_filters=16, filter_size=3, stride=1, padding=1, name='c_conv1ab')          # 1 / 2
        c_conv1b = conv2d_leaky_relu(conv1b, num_filters=16, filter_size=3, stride=1, padding=1, name='c_conv1ab')          # 1 / 2
        r_corr = self.build_cost_volume(c_conv1a, c_conv1b, self.dsize // 5)

        r_conv1_1_input = fluid.layers.concat([r_conv1, r_corr], axis=1)
        r_conv1_1 = conv2d_leaky_relu(r_conv1_1_input, num_filters=64, filter_size=3, stride=1, padding=1)
        r_conv2 = conv2d_leaky_relu(r_conv1_1, num_filters=128, filter_size=3, stride=2, padding=1)
        r_conv2_1 = conv2d_leaky_relu(r_conv2, num_filters=128, filter_size=3, stride=1, padding=1)
        r_res2_predict = conv2d(r_conv2_1, num_filters=1, filter_size=3, stride=1, padding=1)
        r_res2_up = deconv2d(r_res2_predict, num_filters=1, filter_size=4, stride=2, padding=1)
        r_uconv1 = deconv2d_leaky_relu(r_conv2_1, num_filters=64, filter_size=4, stride=2, padding=1)
        r_iconv1_input = fluid.layers.concat([r_uconv1, r_res2_up, r_conv1_1], axis=1)
        r_iconv1 = conv2d(r_iconv1_input, num_filters=64, filter_size=3, stride=1, padding=1)
        r_res1_predict = conv2d(r_iconv1, num_filters=1, filter_size=3, stride=1, padding=1)
        r_res1_up = deconv2d(r_res1_predict, num_filters=1, filter_size=4, stride=2, padding=1)
        r_uconv0 = deconv2d_leaky_relu(r_iconv1, num_filters=32, filter_size=4, stride=2, padding=1)
        r_iconv0_input = fluid.layers.concat([r_uconv0, r_res1_up, r_conv0], axis=1)
        r_iconv0 = conv2d(r_iconv0_input, num_filters=32, filter_size=3, stride=1, padding=1)
        r_res0 = conv2d(r_iconv0, num_filters=1, filter_size=3, stride=1, padding=1)

        return r_res2_predict, r_res1_predict, r_res0

    def build_cost_volume(self, feat_map_l, feat_map_r, d_size):
        """
build cost volume
:param feat_map_l: left feature map
:param feat_map_r: right feature map
:param d_size: disparity size
:return: cost volume
"""
        lyrs = list()
        corr = fluid.layers.reduce_mean(fluid.layers.elementwise_mul(feat_map_l, feat_map_r), dim=1)
        lyrs.append(corr)

        for d in range(d_size + 1):
            # moving the features by disparity d can be done by padding zeros
            shifted = fluid.layers.pad(feat_map_r[:, :, :, :-1 - d], paddings=[0, 0, 0, 0, 0, 0, d + 1, 0])
            corr = fluid.layers.reduce_mean(fluid.layers.elementwise_mul(feat_map_l, shifted), dim=1)
            lyrs.append(corr)

        volume = fluid.layers.stack(lyrs, axis=1)
        return volume
vlurs2pr

vlurs2pr7#

你好,该代码难以复现,能提供较为完整的代码么

i86rm4rw

i86rm4rw8#

你好,该代码难以复现,能提供较为完整的代码么

https://github.com/yanyq1990/iresnet-paddle.git

b1zrtrql

b1zrtrql9#

问题已解决,refinement_sub_network中的c_conv1a、c_conv1b的name一样,改成不同的时间就不会增长

相关问题