pytorch swish激活函数、CNN、torch中的可训练测试版

6tqwzwtp  于 2023-04-12  发布在  其他
关注(0)|答案(1)|浏览(261)

我使用Swish激活函数,𝛽根据论文 SWISH:Prajit Ramachandran,Barret Zoph和Quoc V. Le的Self-Gated Activation Function 论文。我使用LeNet-5 CNN作为MNIST上的玩具示例来训练'beta',而不是使用nn.SiLU()中的beta = 1。我使用PyTorch 2.0和Python 3.10。示例代码为:

class LeNet5(nn.Module):
    def __init__(self, beta = 1.0):
        super(LeNet5, self).__init__()
        
        b = torch.tensor(data = beta, dtype = torch.float32)
        self.beta = torch.autograd.Variable(b, requires_grad = True)
        
        self.conv1 = nn.Conv2d(
            in_channels = 1, out_channels = 6, 
            kernel_size = 5, stride = 1,
            padding = 0, bias = False 
        )
        self.bn1 = nn.BatchNorm2d(num_features = 6)
        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(
            in_channels = 6, out_channels = 16,
            kernel_size = 5, stride = 1,
            padding = 0, bias = False
        )
        self.bn2 = nn.BatchNorm2d(num_features = 16)
        self.fc1 = nn.Linear(
            in_features = 256, out_features = 120,
            bias = True
        )
        self.bn3 = nn.BatchNorm1d(num_features = 120)
        self.fc2 = nn.Linear(
            in_features = 120, out_features = 84,
            bias = True
        )
        self.bn4 = nn.BatchNorm1d(num_features = 84)
        self.fc3 = nn.Linear(
            in_features = 84, out_features = 10,
            bias = True
        )
        
        self.initialize_weights()

        
    def initialize_weights(self):
        for m in self.modules():
            # print(m)
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
                
                # Do not initialize bias (due to batchnorm)-
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
                
            elif isinstance(m, nn.BatchNorm2d):
                # Standard initialization for batch normalization-
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
    
    
    def swish_fn(self, x):
        return x * torch.sigmoid(x * self.beta)

    
    def forward(self, x):
        '''
        x = nn.SiLU()(self.pool1(self.bn1(self.conv1(x))))
        x = nn.SiLU()(self.pool1(self.bn2(self.conv2(x))))
        x = x.view(-1, 256)
        x = nn.SiLU()(self.bn3(self.fc1(x)))
        x = nn.SiLU()(self.bn4(self.fc2(x)))
        '''
        x = self.pool(self.bn1(self.conv1(x)))
        x = self.swish_fn(x = x)
        x = self.pool(self.bn2(self.conv2(x)))
        x = self.swish_fn(x = x)
        x = x.view(-1, 256)
        x = self.bn3(self.fc1(x))
        x = self.swish_fn(x = x)
        x = self.bn4(self.fc2(x))
        x = self.swish_fn(x = x)
        x = self.fc3(x)
        return x

在训练模型时,我将'beta'打印为:

for epoch in range(1, num_epochs + 1):
    
    # One epoch of training-
    train_loss, train_acc = train_one_step(
        model = model, train_loader = train_loader,
        train_dataset = train_dataset
    )
    
    # Get validation metrics after 1 epoch of training-
    val_loss, val_acc = test_one_step(
        model = model, test_loader = test_loader,
        test_dataset = test_dataset
    )
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]["lr"]
    
    print(f"Epoch: {epoch}; loss = {train_loss:.4f}, acc = {train_acc:.2f}%",
          f" val loss = {val_loss:.4f}, val acc = {val_acc:.2f}%,"
          f" beta = {model.beta:.6f} & LR = {current_lr:.5f}"
         )
    
    # Save training metrics to Python3 dict-
    train_history[epoch] = {
        'train_loss': train_loss, 'val_loss': val_loss,
        'train_acc': train_acc, 'val_acc': val_acc,
        'lr': current_lr
    }
    
    # Save model with best validation accuracy-
    if (val_acc > best_val_acc):
        best_val_acc = val_acc
        print(f"Saving model with highest val_acc = {val_acc:.2f}%\n")
        torch.save(model.state_dict(), "LeNet5_MNIST_best_val_acc.pth")

我做错了什么?为什么测试版的训练不像预期的那样?

7eumitmz

7eumitmz1#

刚刚尝试了我自己的评论,将autograd.Variable替换为nn.Parameter可以工作
Variable已经被弃用很多年了,总是尽可能避免使用它,它已经被“合并”到Tensor中,Parameter是Tensor的 Package 器,它确保Tensor将被model.parameters()记录,(然后将由优化器更新)
使用变量,beta将永远不会更新,使用BP后更改的参数,beta
下面是测试代码:

import torch
from torch import nn

class LeNet5(nn.Module):
    def __init__(self, beta=1.0):
        super(LeNet5, self).__init__()

        b = torch.tensor(data=beta, dtype=torch.float32)
        # self.beta = torch.autograd.Variable(b, requires_grad=True)
        self.beta = torch.nn.Parameter(b, requires_grad=True)

        self.conv1 = nn.Conv2d(
            in_channels=1, out_channels=6,
            kernel_size=5, stride=1,
            padding=0, bias=False
        )
        self.bn1 = nn.BatchNorm2d(num_features=6)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=6, out_channels=16,
            kernel_size=5, stride=1,
            padding=0, bias=False
        )
        self.bn2 = nn.BatchNorm2d(num_features=16)
        self.fc1 = nn.Linear(
            in_features=256, out_features=120,
            bias=True
        )
        self.bn3 = nn.BatchNorm1d(num_features=120)
        self.fc2 = nn.Linear(
            in_features=120, out_features=84,
            bias=True
        )
        self.bn4 = nn.BatchNorm1d(num_features=84)
        self.fc3 = nn.Linear(
            in_features=84, out_features=10,
            bias=True
        )

        self.initialize_weights()

    def initialize_weights(self):
        for m in self.modules():
            # print(m)
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)

                # Do not initialize bias (due to batchnorm)-
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm2d):
                # Standard initialization for batch normalization-
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0)

    def swish_fn(self, x):
        return x * torch.sigmoid(x * self.beta)

    def forward(self, x):
        '''
        x = nn.SiLU()(self.pool1(self.bn1(self.conv1(x))))
        x = nn.SiLU()(self.pool1(self.bn2(self.conv2(x))))
        x = x.view(-1, 256)
        x = nn.SiLU()(self.bn3(self.fc1(x)))
        x = nn.SiLU()(self.bn4(self.fc2(x)))
        '''
        x = self.pool(self.bn1(self.conv1(x)))
        x = self.swish_fn(x=x)
        x = self.pool(self.bn2(self.conv2(x)))
        x = self.swish_fn(x=x)
        x = x.view(-1, 256)
        x = self.bn3(self.fc1(x))
        x = self.swish_fn(x=x)
        x = self.bn4(self.fc2(x))
        x = self.swish_fn(x=x)
        x = self.fc3(x)
        return x

if __name__ == '__main__':
    model = LeNet5()
    print(model.beta)
    optim = torch.optim.Adam(model.parameters())
    optim.zero_grad()
    out = model(torch.randn(32, 1, 128, 128))
    loss = out.mean()
    loss.backward()
    optim.step()

    print(model.beta)

相关问题