pytorch 运行时间错误:conv2d的预期3D(未批处理)或4D(批处理)输入

axzmvihb  于 2023-08-05  发布在  其他
关注(0)|答案(1)|浏览(590)

嘿,我用pytorch写了一段代码,用unet分割卫星图像。如果我单独测试每个块(unet,dataloader,train()),它工作得很好,它会给我正确的大小。但是如果我把它们结合起来,我会得到错误:

Traceback (most recent call last):                                                                                                     
  File "/Users/.../gesamt.py", line 300, in <module>
    train_loss, train_dice = train(model, train_loader, criterion, optimizer, device)
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/.../gesamt.py", line 251, in train
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/.../gesamt.py", line 193, in forward
    x1 = self.down_conv_1(image) #
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/container.py", line 217, in forward
    input = module(input)
            ^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 463, in forward
    return self._conv_forward(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
    return F.conv2d(input, weight, bias, self.stride,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [1, 16, 3, 572, 572]

字符串

**我知道问题是什么,但我找不到代码不会继续使用[16,3,572,572]的原因,而是添加了batch_size,因此ist在使用[16,3,572,572]时会遇到错误。

import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize
from tqdm import tqdm

class SatelliteDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None, patch_size=572):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.patch_size = patch_size

        # creating a Dictionary, to group masks and images with the same index
        self.image_mask_pairs = self._group_image_mask_pairs()

    def _group_image_mask_pairs(self):
        image_files = [f for f in os.listdir(self.image_dir) if f.endswith(".png")]
        mask_files = [f for f in os.listdir(self.mask_dir) if f.endswith(".png")]
        image_mask_pairs = {}

        for image_file in image_files:
            image_number = self._extract_number_from_filename(image_file)
            for mask_file in mask_files:
                mask_number = self._extract_number_from_filename(mask_file)
                if image_number == mask_number:
                    image_path = os.path.join(self.image_dir, image_file)
                    mask_path = os.path.join(self.mask_dir, mask_file)
                    image_mask_pairs[image_number] = (image_path, mask_path)
                    break

        return image_mask_pairs

    def _extract_number_from_filename(self, filename):
        return int(filename.split('_')[0])

    def _split_image_into_patches(self, image):
        patches = []
        height, width, _ = image.shape
        for y in range(0, height - self.patch_size + 1, self.patch_size):
            for x in range(0, width - self.patch_size + 1, self.patch_size):
                patch = image[y:y + self.patch_size, x:x + self.patch_size]
                patches.append(patch)
        return patches

    def __len__(self):
        return len(self.image_mask_pairs)

    def _apply_transform(self, image, mask):
        if self.transform is not None:
            if self.transform.transforms is not None:
                for t in self.transform.transforms:
                    image = t(image)
                    mask = t(mask)
        return image, mask


    def __getitem__(self, index):
        image_path, mask_path = self.image_mask_pairs[index]

        image = np.array(Image.open(image_path))
        mask = np.array(Image.open(mask_path))

        image_patches = self._split_image_into_patches(image)
        mask_patches = self._split_image_into_patches(mask)

        #Apply transformations to each patch individually
        augmented_image_patches = []
        augmented_mask_patches = []
        for i in range(len(image_patches)):
            augmented_image, augmented_mask = self._apply_transform(image_patches[i], mask_patches[i])
            augmented_image_patches.append(augmented_image)
            augmented_mask_patches.append(augmented_mask)

        #Combine the patches (as Tensors) without transposing channels
        augmented_image = torch.stack(augmented_image_patches)
        augmented_mask = torch.stack(augmented_mask_patches)

        return augmented_image, augmented_mask

def double_conv(in_c, out_c):
    conv = nn.Sequential(
        nn.Conv2d(in_c, out_c, kernel_size=3),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_c, out_c, kernel_size=3),
        nn.ReLU(inplace=True),
    )
    return conv

def crop_img(tensor, target_tensor):
    target_size = target_tensor.size()[2]
    tensor_size = tensor.size()[2]
    delta = tensor_size - target_size
    delta = delta // 2
    return tensor[:, :, delta:tensor_size-delta, delta:tensor_size-delta]

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=7):

        super(UNet, self).__init__()
        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.down_conv_1 = double_conv(3, 64)
        self.down_conv_2 = double_conv(64, 128)
        self.down_conv_3 = double_conv(128, 256)
        self.down_conv_4 = double_conv(256, 512)
        self.down_conv_5 = double_conv(512, 1024)

        self.up_trans_1 = nn.ConvTranspose2d(
            in_channels=1024,
            out_channels=512,
            kernel_size=2,
            stride=2)
        
        self.up_conv_1 = double_conv(1024, 512)

        self.up_trans_2 = nn.ConvTranspose2d(
            in_channels=512,
            out_channels=256,
            kernel_size=2,
            stride=2)
        
        self.up_conv_2 = double_conv(512, 256)

        self.up_trans_3 = nn.ConvTranspose2d(
            in_channels=256,
            out_channels=128,
            kernel_size=2,
            stride=2)
        
        self.up_conv_3 = double_conv(256, 128)

        self.up_trans_4 = nn.ConvTranspose2d(
            in_channels=128,
            out_channels=64,
            kernel_size=2,
            stride=2)
        
        self.up_conv_4 = double_conv(128, 64)

        self.out = nn.Conv2d(
            in_channels=64,
            out_channels=7, #Objects to segments
            kernel_size=1
        )


    def forward(self, image):
        #encoder
        x1 = self.down_conv_1(image) #
        x2 = self.max_pool_2x2(x1)
        x3 = self.down_conv_2(x2) #
        x4 = self.max_pool_2x2(x3)
        x5 = self.down_conv_3(x4) #
        x6 = self.max_pool_2x2(x5)
        x7 = self.down_conv_4(x6)
        x8 = self.max_pool_2x2(x7)
        x9 = self.down_conv_5(x8)

        #decoder
        x = self.up_trans_1(x9)
        y = crop_img(x7, x)
        x = self.up_conv_1(torch.cat([x, y], 1))

        x = self.up_trans_2(x)
        y = crop_img(x5, x)
        x = self.up_conv_2(torch.cat([x, y], 1))

        x = self.up_trans_3(x)
        y = crop_img(x3, x)
        x = self.up_conv_3(torch.cat([x, y], 1))

        x = self.up_trans_4(x)
        y = crop_img(x1, x)
        x = self.up_conv_4(torch.cat([x, y], 1))

        x = self.out(x)
        print(x.size())
        return x

# calculates the Dice-Koeffizient
def dice_coefficient(pred, target):
    smooth = 1.0
    intersection = (pred * target).sum()
    dice = (2.0 * intersection + smooth) / (pred.sum() + target.sum() + smooth)
    return dice

# train-function
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    running_dice = 0.0

    for inputs, masks in tqdm(train_loader, desc="Training", leave=False):
        inputs, masks = inputs.to(device), masks.to(device)

        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        print(f"outputs size: {outputs.size()}")
        loss = criterion(outputs, masks)
        dice = dice_coefficient(outputs.argmax(dim=1), masks.argmax(dim=1))
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        running_dice += dice.item()
        
    epoch_loss = running_loss / len(train_loader)
    epoch_dice = running_dice / len(train_loader)
    return epoch_loss, epoch_dice


# Configuration for the training
image_dir = "/Users/.../img_path/"
mask_dir = "/Users/.../mask_path/"
patch_size = 572
batch_size = 16
learning_rate = 0.001
num_epochs = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instanz for Datasets and DataLoader
transform = Compose([
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = SatelliteDataset(image_dir, mask_dir, transform=transform, patch_size=patch_size)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Instanz of the modell and the optimizer
model = UNet(in_channels=3, out_channels=7)  # Passe out_channels an die Anzahl der Objekte an, die segmentiert werden sollen
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# start the training
for epoch in range(num_epochs):
    print("hi")
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss, train_dice = train(model, train_loader, criterion, optimizer, device)
    print(f"Train Loss: {train_loss:.4f} | Train Dice: {train_dice:.4f}")
    print("hallo")

# save the model
torch.save(model.state_dict(), "trainiertes_modell.pth")


我在互联网和stackoverflow上寻找这个问题的解决方案。我发现一些类似的错误信息,但解决方案没有帮助,因为我找不到为什么我的代码是增加16的大小。我也问过ChatGPT,但它也没有找到一个有效的解决方案。
我希望你们中的一些人能帮助我发生了什么。提前感谢!

tyg4sfes

tyg4sfes1#

您正在数据集__get_item__中生成一系列影像,并使用专为一个影像设计的典型训练循环
我猜这个方法的输出大小是(16,3,572,572)(在debug中检查),然后你运行Dataloader,批量大小为1,你有一个五维Tensor大小:[1,16,3,572,572],不适合Conv2d。
您可以:
1.修正get_item以产生(C,H,W)形式的Tensor
1.将inputs, masks从(B,16,C,H,W)挤压到(B*16,C,H,W)

相关问题