我需要创建一个模型,输入一个351x351x11Tensor,输出一个351x351x11Tensor(它是一个自动编码器)。这两个Tensor由0和1组成。
这是模型:
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
self.down_layers=nn.ModuleList()
self.up_layers=nn.ModuleList()
self.n_layers = 1
self.down_layers.append(nn.Conv3d(5,1,(3,3,1)))
self.up_layers.append(nn.ConvTranspose3d(1,5,(3,3,1)))
for d_l in self.down_layers:
torch.nn.init.normal_(d_l.weight, mean=0.5, std=0.7)
for u_l in self.up_layers:
torch.nn.init.normal_(u_l.weight, mean=0.5, std=0.7)
def encode(self, x):
# Encoder
for i in range(len(self.down_layers)):
x = self.down_layers[i](x)
x = torch.sigmoid(x)
return x
def forward(self, x):
# Decoder
x = self.encode(x)
for i in range(len(self.up_layers)):
x = self.up_layers[i](x)
x = torch.sigmoid(x)
if(i==(len(self.up_layers)-1)):
x = torch.round(x)
return x
字符串
这是训练函数:
max_e,max_p = 351,11 #tensor dimensions
DEVICE = get_device() #device is cpu
EPOCHS = 100
BATCHSIZE=5
try:
print("Start model",flush=True)
# Generate the model.
model = AutoEncoder().to(DEVICE)
lr = 0.09
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
# Training of the model.
for epoch in range(EPOCHS):
model.train()
"""
I have to create 25 dataloaders for 50000 training samples (each of 2000 samples) to avoid memory congestion.
"""
for i in range(25):
train_loader,X_train_shape=get_dataset(i)
N_TRAIN_EXAMPLES = X_train_shape
for batch_idx, (data, target) in enumerate(train_loader):
if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
break
data, target = data[None, ...].to(DEVICE, dtype=torch.float), target[None, ...].to(DEVICE, dtype=torch.float)
optimizer.zero_grad()
output = model(data)
loss = torch.nn.BCELoss()
loss = loss(output, target)
loss.backward()
optimizer.step()
#remove train data loader from memory
del train_loader
print("VALIDATION",flush=True)
# Validation of the model.
model.eval()
correct = 0
tot = 0
with torch.no_grad():
"""
Same with the training, 10 data loaders for 20000 samples
"""
for i in range(25,35):
valid_loader,X_valid_shape=get_dataset(i)
N_VALID_EXAMPLES = X_valid_shape
for batch_idx, (data, target) in enumerate(valid_loader):
# Limiting validation data.
if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
break
data, target = data[None, ...].to(DEVICE, dtype=torch.float), target[None, ...].to(DEVICE, dtype=torch.float)
output = model(data)
# count the number of 1s and 0s predicted correctly
newCorrect= output(target.view_as(output)).sum().item()
correct += newCorrect
tot +=max_e*max_e*max_p*BATCHSIZE
del valid_loader
accuracy = correct*100 / tot
print('Epoch: {} Loss: {} Accuracy: {} %'.format(epoch, loss.data, accuracy),flush=True)
型
返回数据加载器的函数是:
def get_dataset(i):
X_train=[]
Y_train=[]
for j in range(i*2000,(i+1)*2000):
t = torch.load("/home/ubuntu/data/home/ubuntu/deeplogic/el_dataset/x/scene{}.pt".format(j))
X_train.append(t)
t = torch.load("/home/ubuntu/data/home/ubuntu/deeplogic/el_dataset/y/scene{}.pt".format(j))
Y_train.append(t)
train_x = torch.from_numpy(np.array(X_train)).float()
train_y = torch.from_numpy(np.array(Y_train)).float()
batch_size = 1
train = torch.utils.data.TensorDataset(train_x,train_y)
# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
return train_loader,len(X_train)
型
我得到的指纹是:
Epoch: 1 Loss: 99.80729675292969 Accuracy: 0.19852701903983955 %
Epoch: 2 Loss: 99.80729675292969 Accuracy: 0.19852701903983955 %
Epoch: 3 Loss: 99.80729675292969 Accuracy: 0.19852701903983955 %
Epoch: 4 Loss: 99.80729675292969 Accuracy: 0.19852701903983955 %
型
2条答案
按热度按时间r9f1avp51#
x = torch.round(x)
会阻止您更新模型,因为它是不可微分的。更重要的是,x = torch.round(x)
对于BCELoss
而言是冗余的。您应该只移动它的验证步骤。此外,验证循环中的newCorrect
也不会与目标值进行比较。(我在代码中添加了缺少的eq()
。)字符串
s4n0splo2#
一个潜在的原因:你的学习率太高了。没有必要为优化器设置1 e-2级别。在大多数情况下,1 e-4或1 e-5的学习率就足够了。