你好!
我一直在努力解决以下错误:所有Tensor应位于同一设备上,但至少找到两个设备,cuda:0和cpu!(在方法wrapper_nll_loss_forward中检查参数目标的参数时)
我一直无法找到我需要添加模型的一部分到GPU的位置。从错误消息中我推测它应该在损失函数中,但我已经尝试了所有我能想到的地方,与损失函数有关,并无法解决它。
希望能帮上忙。
我的完整代码可以在这里找到:
https://huggingface.co/AFAD85/CNN_apples/blob/main/CNN%20paper%20clone%20pytorch.ipynb
我尝试隔离所有可能相关的代码如下:
`transformer = transforms.Compose([
transforms.Resize((350,350)),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5],
[0.5,0.5,0.5])
])`
`class ConvNet(nn.Module):
def __init__(self,num_classes=4):
super(ConvNet,self).__init__()
self.conv1 = nn.Conv2d(in_channels=3,out_channels=128,kernel_size=3,stride=1,padding='valid')
self.bn1 = nn.BatchNorm2d(num_features=128)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(kernel_size=2)
self.conv2 = nn.Conv2d(in_channels=128,out_channels=64,kernel_size=3,stride=1,padding='valid')
self.bn2 = nn.BatchNorm2d(num_features=64)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.conv3 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride=1,padding='valid')
self.bn3 = nn.BatchNorm2d(num_features=64)
self.relu3 = nn.ReLU()
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.conv4 = nn.Conv2d(in_channels=64,out_channels=32,kernel_size=3,stride=1,padding='valid')
self.bn4 = nn.BatchNorm2d(num_features=32)
self.relu4 = nn.ReLU()
self.pool4 = nn.MaxPool2d(kernel_size=2)
self.conv5 = nn.Conv2d(in_channels=32,out_channels=32,kernel_size=3,stride=1,padding='valid')
self.bn5 = nn.BatchNorm2d(num_features=32)
self.relu5 = nn.ReLU()
self.pool5 = nn.MaxPool2d(kernel_size=2)
self.flat = nn.Flatten()
self.fc1 = nn.Linear(in_features=2592, out_features = 256)
self.fc2 = nn.Linear(in_features=256, out_features = num_classes)
def forward(self,input):
output = self.conv1(input)
output = self.bn1(output)
output = self.relu1(output)
output = self.pool1(output)
output = self.conv2(output)
output = self.bn2(output)
output = self.relu2(output)
output = self.pool2(output)
output = self.conv3(output)
output = self.bn3(output)
output = self.relu3(output)
output = self.pool3(output)
output = self.conv4(output)
output = self.bn4(output)
output = self.relu4(output)
output = self.pool4(output)
output = self.conv5(output)
output = self.bn5(output)
output = self.relu5(output)
output = self.pool5(output)
# output = output.view(-1,32,9,9)
output = self.flat(output)
output = self.fc1(output)
output = self.fc2(output)
return output`
model = ConvNet(num_classes=4).to(device)
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()
`best_accuracy = 0.0
for epoch in range(num_epochs):
model.train()
train_accuracy = 0.0
train_loss = 0.0
for i, (images,labels) in enumerate(train_loader):
if torch.cuda.is_available():
images = Variable(images.cuda())
lables = Variable(labels.cuda())
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
train_loss += loss.cpu().data*images.size(0)
_.prediction = torch.max(outputs.data,1)
train_accuracy += int(torch.sum(prediction==labels.data))
train_accuracy = train_accuracy/train_count
train_loss = train_loss/train_count
#test set evalueren
model.eval()
test_accuracy = 0.0
for i, (images,labels) in enumerate(train_loader):
if torch.cuda.is_available():
images = Variable(images.cuda())
lables = Variable(labels.cuda())
outputs = model(images)
_.prediction = torch.max(outputs.data,1)
test_accuracy = test_accuracy/test_count
print('Epoch: '+str(epoch)+' Train Loss: '+str(int(train_loss)))+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy)
if test_accuracy > best_accuracy:
torch.save(model.state_dict(), 'best_checkpoint.model')`
我试图让模型运行一个纪元,期望它在GPU上运行。
我尝试在所有可能出现问题的地方添加.cuda()和.to_device(),但无法找到正确的方法。
1条答案
按热度按时间mu0hgdu01#
这是由于打印错误
其中
lables
应为labels
。