Pytorch模型损失和精度保持不变

kulphzqa  于 2023-08-05  发布在  其他
关注(0)|答案(2)|浏览(145)

我需要创建一个模型,输入一个351x351x11Tensor,输出一个351x351x11Tensor(它是一个自动编码器)。这两个Tensor由0和1组成。
这是模型:

class AutoEncoder(nn.Module):
def __init__(self):
    super(AutoEncoder, self).__init__()
    self.down_layers=nn.ModuleList()
    self.up_layers=nn.ModuleList()
    self.n_layers = 1
    self.down_layers.append(nn.Conv3d(5,1,(3,3,1)))
    self.up_layers.append(nn.ConvTranspose3d(1,5,(3,3,1)))
    for d_l in self.down_layers:
        torch.nn.init.normal_(d_l.weight, mean=0.5, std=0.7)
    for u_l in self.up_layers:
        torch.nn.init.normal_(u_l.weight, mean=0.5, std=0.7)

def encode(self, x):
    # Encoder
    for i in range(len(self.down_layers)): 
        x = self.down_layers[i](x)
        x = torch.sigmoid(x)  
    return x

def forward(self, x):

    # Decoder
    x = self.encode(x)
    for i in range(len(self.up_layers)):
        x = self.up_layers[i](x)
        x = torch.sigmoid(x)
        if(i==(len(self.up_layers)-1)):
            x = torch.round(x)
    return x

字符串
这是训练函数:

max_e,max_p = 351,11 #tensor dimensions
DEVICE = get_device() #device is cpu
EPOCHS = 100
BATCHSIZE=5
try: 
    print("Start model",flush=True)
    # Generate the model.
    model = AutoEncoder().to(DEVICE)
    lr = 0.09
    optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        """
        I have to create 25 dataloaders for 50000 training samples (each of 2000 samples) to avoid memory congestion. 
        """
        for i in range(25):
            train_loader,X_train_shape=get_dataset(i)
            N_TRAIN_EXAMPLES = X_train_shape
            for batch_idx, (data, target) in enumerate(train_loader):
                if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                    break
                data, target = data[None, ...].to(DEVICE, dtype=torch.float), target[None, ...].to(DEVICE, dtype=torch.float)
                optimizer.zero_grad()
                output = model(data)
                loss = torch.nn.BCELoss()
                loss = loss(output, target)
                loss.backward()
                optimizer.step()
            #remove train data loader from memory
            del train_loader

        print("VALIDATION",flush=True)
        # Validation of the model.
        model.eval()
        correct = 0
        tot = 0
        
        with torch.no_grad():
            """
             Same with the training, 10 data loaders for 20000 samples
            """
            for i in range(25,35):
                valid_loader,X_valid_shape=get_dataset(i)
                N_VALID_EXAMPLES = X_valid_shape
                for batch_idx, (data, target) in enumerate(valid_loader):
                    # Limiting validation data.
                    if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                        break
                    data, target = data[None, ...].to(DEVICE, dtype=torch.float), target[None, ...].to(DEVICE, dtype=torch.float)
                    output = model(data)
                    # count the number of 1s and 0s predicted correctly
                    newCorrect= output(target.view_as(output)).sum().item()
                    correct += newCorrect
                    tot +=max_e*max_e*max_p*BATCHSIZE
                del valid_loader
        accuracy = correct*100 / tot
        print('Epoch: {}  Loss: {}  Accuracy: {} %'.format(epoch, loss.data, accuracy),flush=True)


返回数据加载器的函数是:

def get_dataset(i):
    X_train=[]
    Y_train=[]
    for j in range(i*2000,(i+1)*2000):
        t = torch.load("/home/ubuntu/data/home/ubuntu/deeplogic/el_dataset/x/scene{}.pt".format(j))
        X_train.append(t)
        t = torch.load("/home/ubuntu/data/home/ubuntu/deeplogic/el_dataset/y/scene{}.pt".format(j))
        Y_train.append(t)
    train_x = torch.from_numpy(np.array(X_train)).float()
    train_y = torch.from_numpy(np.array(Y_train)).float()
    

    batch_size = 1

    train = torch.utils.data.TensorDataset(train_x,train_y)

    # data loader
    train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)

    return train_loader,len(X_train)


我得到的指纹是:

Epoch: 1  Loss: 99.80729675292969  Accuracy: 0.19852701903983955 %

Epoch: 2  Loss: 99.80729675292969  Accuracy: 0.19852701903983955 %

Epoch: 3  Loss: 99.80729675292969  Accuracy: 0.19852701903983955 %

Epoch: 4  Loss: 99.80729675292969  Accuracy: 0.19852701903983955 %

r9f1avp5

r9f1avp51#

x = torch.round(x)会阻止您更新模型,因为它是不可微分的。更重要的是,x = torch.round(x)对于BCELoss而言是冗余的。您应该只移动它的验证步骤。此外,验证循环中的newCorrect也不会与目标值进行比较。(我在代码中添加了缺少的eq()。)

# in validation loop
preds = torch.round(output)
newCorrect= preds.eq(target.view_as(preds)).sum().item()

字符串

s4n0splo

s4n0splo2#

一个潜在的原因:你的学习率太高了。没有必要为优化器设置1 e-2级别。在大多数情况下,1 e-4或1 e-5的学习率就足够了。

相关问题