嘿,我用pytorch写了一段代码,用unet分割卫星图像。如果我单独测试每个块(unet,dataloader,train()),它工作得很好,它会给我正确的大小。但是如果我把它们结合起来,我会得到错误:
Traceback (most recent call last):
File "/Users/.../gesamt.py", line 300, in <module>
train_loss, train_dice = train(model, train_loader, criterion, optimizer, device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/.../gesamt.py", line 251, in train
outputs = model(inputs)
^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/.../gesamt.py", line 193, in forward
x1 = self.down_conv_1(image) #
^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/container.py", line 217, in forward
input = module(input)
^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 16, 3, 572, 572]
字符串
**我知道问题是什么,但我找不到代码不会继续使用[16,3,572,572]的原因,而是添加了batch_size,因此ist在使用[16,3,572,572]时会遇到错误。
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize
from tqdm import tqdm
class SatelliteDataset(Dataset):
def __init__(self, image_dir, mask_dir, transform=None, patch_size=572):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.patch_size = patch_size
# creating a Dictionary, to group masks and images with the same index
self.image_mask_pairs = self._group_image_mask_pairs()
def _group_image_mask_pairs(self):
image_files = [f for f in os.listdir(self.image_dir) if f.endswith(".png")]
mask_files = [f for f in os.listdir(self.mask_dir) if f.endswith(".png")]
image_mask_pairs = {}
for image_file in image_files:
image_number = self._extract_number_from_filename(image_file)
for mask_file in mask_files:
mask_number = self._extract_number_from_filename(mask_file)
if image_number == mask_number:
image_path = os.path.join(self.image_dir, image_file)
mask_path = os.path.join(self.mask_dir, mask_file)
image_mask_pairs[image_number] = (image_path, mask_path)
break
return image_mask_pairs
def _extract_number_from_filename(self, filename):
return int(filename.split('_')[0])
def _split_image_into_patches(self, image):
patches = []
height, width, _ = image.shape
for y in range(0, height - self.patch_size + 1, self.patch_size):
for x in range(0, width - self.patch_size + 1, self.patch_size):
patch = image[y:y + self.patch_size, x:x + self.patch_size]
patches.append(patch)
return patches
def __len__(self):
return len(self.image_mask_pairs)
def _apply_transform(self, image, mask):
if self.transform is not None:
if self.transform.transforms is not None:
for t in self.transform.transforms:
image = t(image)
mask = t(mask)
return image, mask
def __getitem__(self, index):
image_path, mask_path = self.image_mask_pairs[index]
image = np.array(Image.open(image_path))
mask = np.array(Image.open(mask_path))
image_patches = self._split_image_into_patches(image)
mask_patches = self._split_image_into_patches(mask)
#Apply transformations to each patch individually
augmented_image_patches = []
augmented_mask_patches = []
for i in range(len(image_patches)):
augmented_image, augmented_mask = self._apply_transform(image_patches[i], mask_patches[i])
augmented_image_patches.append(augmented_image)
augmented_mask_patches.append(augmented_mask)
#Combine the patches (as Tensors) without transposing channels
augmented_image = torch.stack(augmented_image_patches)
augmented_mask = torch.stack(augmented_mask_patches)
return augmented_image, augmented_mask
def double_conv(in_c, out_c):
conv = nn.Sequential(
nn.Conv2d(in_c, out_c, kernel_size=3),
nn.ReLU(inplace=True),
nn.Conv2d(out_c, out_c, kernel_size=3),
nn.ReLU(inplace=True),
)
return conv
def crop_img(tensor, target_tensor):
target_size = target_tensor.size()[2]
tensor_size = tensor.size()[2]
delta = tensor_size - target_size
delta = delta // 2
return tensor[:, :, delta:tensor_size-delta, delta:tensor_size-delta]
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=7):
super(UNet, self).__init__()
self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.down_conv_1 = double_conv(3, 64)
self.down_conv_2 = double_conv(64, 128)
self.down_conv_3 = double_conv(128, 256)
self.down_conv_4 = double_conv(256, 512)
self.down_conv_5 = double_conv(512, 1024)
self.up_trans_1 = nn.ConvTranspose2d(
in_channels=1024,
out_channels=512,
kernel_size=2,
stride=2)
self.up_conv_1 = double_conv(1024, 512)
self.up_trans_2 = nn.ConvTranspose2d(
in_channels=512,
out_channels=256,
kernel_size=2,
stride=2)
self.up_conv_2 = double_conv(512, 256)
self.up_trans_3 = nn.ConvTranspose2d(
in_channels=256,
out_channels=128,
kernel_size=2,
stride=2)
self.up_conv_3 = double_conv(256, 128)
self.up_trans_4 = nn.ConvTranspose2d(
in_channels=128,
out_channels=64,
kernel_size=2,
stride=2)
self.up_conv_4 = double_conv(128, 64)
self.out = nn.Conv2d(
in_channels=64,
out_channels=7, #Objects to segments
kernel_size=1
)
def forward(self, image):
#encoder
x1 = self.down_conv_1(image) #
x2 = self.max_pool_2x2(x1)
x3 = self.down_conv_2(x2) #
x4 = self.max_pool_2x2(x3)
x5 = self.down_conv_3(x4) #
x6 = self.max_pool_2x2(x5)
x7 = self.down_conv_4(x6)
x8 = self.max_pool_2x2(x7)
x9 = self.down_conv_5(x8)
#decoder
x = self.up_trans_1(x9)
y = crop_img(x7, x)
x = self.up_conv_1(torch.cat([x, y], 1))
x = self.up_trans_2(x)
y = crop_img(x5, x)
x = self.up_conv_2(torch.cat([x, y], 1))
x = self.up_trans_3(x)
y = crop_img(x3, x)
x = self.up_conv_3(torch.cat([x, y], 1))
x = self.up_trans_4(x)
y = crop_img(x1, x)
x = self.up_conv_4(torch.cat([x, y], 1))
x = self.out(x)
print(x.size())
return x
# calculates the Dice-Koeffizient
def dice_coefficient(pred, target):
smooth = 1.0
intersection = (pred * target).sum()
dice = (2.0 * intersection + smooth) / (pred.sum() + target.sum() + smooth)
return dice
# train-function
def train(model, train_loader, criterion, optimizer, device):
model.train()
running_loss = 0.0
running_dice = 0.0
for inputs, masks in tqdm(train_loader, desc="Training", leave=False):
inputs, masks = inputs.to(device), masks.to(device)
optimizer.zero_grad()
outputs = model(inputs)
print(f"outputs size: {outputs.size()}")
loss = criterion(outputs, masks)
dice = dice_coefficient(outputs.argmax(dim=1), masks.argmax(dim=1))
loss.backward()
optimizer.step()
running_loss += loss.item()
running_dice += dice.item()
epoch_loss = running_loss / len(train_loader)
epoch_dice = running_dice / len(train_loader)
return epoch_loss, epoch_dice
# Configuration for the training
image_dir = "/Users/.../img_path/"
mask_dir = "/Users/.../mask_path/"
patch_size = 572
batch_size = 16
learning_rate = 0.001
num_epochs = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Instanz for Datasets and DataLoader
transform = Compose([
ToTensor(),
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = SatelliteDataset(image_dir, mask_dir, transform=transform, patch_size=patch_size)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Instanz of the modell and the optimizer
model = UNet(in_channels=3, out_channels=7) # Passe out_channels an die Anzahl der Objekte an, die segmentiert werden sollen
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# start the training
for epoch in range(num_epochs):
print("hi")
print(f"Epoch {epoch + 1}/{num_epochs}")
train_loss, train_dice = train(model, train_loader, criterion, optimizer, device)
print(f"Train Loss: {train_loss:.4f} | Train Dice: {train_dice:.4f}")
print("hallo")
# save the model
torch.save(model.state_dict(), "trainiertes_modell.pth")
型
我在互联网和stackoverflow上寻找这个问题的解决方案。我发现一些类似的错误信息,但解决方案没有帮助,因为我找不到为什么我的代码是增加16的大小。我也问过ChatGPT,但它也没有找到一个有效的解决方案。
我希望你们中的一些人能帮助我发生了什么。提前感谢!
1条答案
按热度按时间tyg4sfes1#
您正在数据集
__get_item__
中生成一系列影像,并使用专为一个影像设计的典型训练循环我猜这个方法的输出大小是(16,3,572,572)(在debug中检查),然后你运行Dataloader,批量大小为1,你有一个五维Tensor大小:[1,16,3,572,572],不适合Conv2d。
您可以:
1.修正
get_item
以产生(C,H,W)或形式的Tensor1.将
inputs, masks
从(B,16,C,H,W)挤压到(B*16,C,H,W)