Paddle paddle 1.6版本中,动态图,反向传播后,梯度无法获取

k5ifujac  于 2022-10-20  发布在  其他
关注(0)|答案(2)|浏览(158)

使用paddle1.6版本动态图,经过反向传播后,梯度获取,网络层权重无变化。
main方法内容如下:

import  paddle.fluid as fl
hr = fl.dygraph.to_variable(hr_image) # hr_image is the real image
lr = fl.dygraph.to_variable(lr_image)
hr.stop_gradient = False
lr.stop_gradient = False
sr = model.forward(lr)

# L1 loss

loss =  fl.layers.mse_loss(sr,hr)

# 计算梯度更新参数

start = print_net_params_value(model)
loss.backward()
optimizer.minimize(loss,parameter_list=model.parameters())
end = print_net_params_value(model)
a = print_net_params(model)
model.clear_gradients()

获取网络参数的方法如下:

def print_net_params(net):
    # dy_param_value= {}
    dy_param_value= []
    for param in net.parameters():
        if param.trainable:
            # if param._grad_ivar() is not None:
            same_value = param.gradient()
            same_value =  same_value.reshape(same_value.shape[0],-1)
            # gradients = fl.layers.reshape(param,[param.shape[0],-1])
            some1 = np.linalg.norm(same_value,ord=2,axis=1)
            # dy_param_value[param.name] = np.mean(some1)
            dy_param_value.append(np.mean(some1))
    a = [dy_param_value[0],dy_param_value[-2]]
    return(a)

网络内容如下:


# !/usr/bin/python

# -*- coding: utf-8 -*-

import  paddle
import paddle.fluid as fl
from paddle.fluid.dygraph import Conv2D
from   option import  opt
import numpy as np
class ResBlock(fl.dygraph.Layer):
    def __init__(self,namescope,n_feats,expansion_ratio,res_scale = 1.0):
        super(ResBlock, self).__init__(namescope)
        self.res_scale = res_scale
        self.conv1 = Conv2D(namescope,n_feats*expansion_ratio,filter_size=3,padding=1)

        self.conv2 = Conv2D(namescope,n_feats,filter_size=3,padding=1)
    def forward(self, y):

        x = self.conv1(y)
        x = fl.layers.clip_by_norm(x,max_norm=1.0)
        x = fl.layers.relu(x)
        x = self.conv2(x)
        x = fl.layers.clip_by_norm(x,max_norm=1.0)

        return(y+x*self.res_scale)
def print_net_params(net):
    # dy_param_value= {}
    dy_param_value= []
    for param in net.parameters():
        if param.trainable:

            same_value = param.gradient()
            same_value =  same_value.reshape(same_value.shape[0],-1)
            # gradients = fl.layers.reshape(param,[param.shape[0],-1])
            some1 = np.linalg.norm(same_value,ord=2,axis=1)
            # dy_param_value[param.name] = np.mean(some1)
            dy_param_value.append(np.mean(some1))
    a = [dy_param_value[0],dy_param_value[-2]]
    return(a)

class Wdsr_A(fl.dygraph.Layer):
    def __init__(self,namescope):
        super(Wdsr_A,self).__init__(namescope)
        # skip
        self.skip = Conv2D(namescope,3*(opt.scale**2),filter_size=5,padding=2)
        # head

        self.head = Conv2D(namescope,opt.n_feats,filter_size=3,padding=1)
        # body
        for i in range(opt.n_res_blocks):
            self.add_sublayer('body'+str(i+1),ResBlock(namescope,opt.n_feats,opt.expansion_ratio,opt.res_scale))
        # tail
        self.tail = Conv2D(namescope,3*(opt.scale**2),filter_size=3,padding=1)

    def forward(self, x):
        # if self.subtract_mean:
        # 第一步就开始上采样了
        # 单独增加drop层,因为通常上来说,显示中的低阶图片都是缺少关键的像素点\
        # 先看看这个效果怎么样之后在增加内容
        # skip
        x = self.skip(x)
        x = fl.layers.clip_by_norm(x,max_norm=1.0)
        s = fl.layers.pixel_shuffle((x),upscale_factor = opt.scale)
        # head
        x = self.head(x)
        x = fl.layers.clip_by_norm(x,max_norm=1.0)
        # body
        for i in range(opt.n_res_blocks):
            x = self.__getattr__('body'+str(i+1)).forward(x)
        # tail
        x = fl.layers.clip_by_norm(self.tail(x),max_norm=1.0)
        x = fl.layers.pixel_shuffle((x),upscale_factor = opt.scale)
        # residual
        x += s
        return(x)
nom7f22z

nom7f22z1#

代码格式能容易阅读的版本么,或者提供一个链接

jgovgodb

jgovgodb2#

代码格式能容易阅读的版本么,或者提供一个链接

您好,修复了粘贴代码格式的问题。经过调试我发现fl.layers.clip_by_norm 是这行代码了阻止了反向传播,把他去掉之后就可以了。

因为这里应用了很多个fl.layers.clip_by_norm这个方法,去掉不同位置时,有的时候会组织反向传播,有的时候不会

相关问题