pytorch Albumentations返回“KeyError:'标签'

bjp0bcyl  于 2023-08-05  发布在  其他
关注(0)|答案(1)|浏览(164)

该项目是pytorch maskrcnn模型的实现,用于细胞分割。
基本模型运行良好,但为了增加训练样本,我尝试实现albumentation库。
Python:3.10.6 Torch:2.0.1+cu118 Numpy:1.22.4 PIL:9.4..0白蛋白化:1.2.1
在从计算机的文件目录加载图像作为numpy数组后,以下内容被放置在Dataset Class下:

class CellDataset(Dataset):
# images are jpgs while masks are pngs
  def __init__(self, image_dir, mask_dir, height = 250, width = 250, transform = None):
    self.image_dir = image_dir
    self.mask_dir = mask_dir
    self.transform = transform
    self.images = os.listdir(image_dir)
    self.masks = os.listdir(mask_dir)

  def __len__(self):
    return len(self.images)

  def __getitem__(self,index):
    assert len(os.listdir(image_dir))==len(os.listdir(mask_dir)), "Images and Masks folder size do not match"
    img_path = os.path.join(self.image_dir, self.images[index])
    mask_path = os.path.join(self.mask_dir, self.masks[index])

    if img_path[0].endswith(".npy"):
      image = np.load(img_path)
    else:
      image = np.array(Image.open(img_path).convert("RGB"))

    if mask_path[0].endswith(".npy"):
      mask = np.load(mask_path)
    else:
      mask = np.array(Image.open(mask_path))
    obj_ids = np.unique(mask)
    obj_ids = obj_ids[1:] # removing first item of list, which represents background
    num_objs = len(obj_ids) # finding number of objects (i.e. number of cells)

    # creates boolean mask of all objects together
    masks = np.zeros((num_objs, mask.shape[0], mask.shape[1]))
    for i in range(num_objs):
      masks[i][mask==i+1] = True

    # creates segmentation boxes based on rect coordinates
    boxes = []
    for i in range(num_objs):
      pos = np.where(masks[i]) # create new array with only data points of the object
      xmin = np.min(pos[1]) # find max/min of x & y axes
      xmax = np.max(pos[1])
      ymin = np.min(pos[0])
      ymax = np.max(pos[0])
      boxes.append([xmin, ymin, xmax, ymax])
    boxes = list(boxes) # boxes need to be in a list, according to article

    ## creating class labels
    class_labels = torch.ones((num_objs,),dtype=torch.int64)

    # Albumentation transformations
    if self.transform is not None:
      augmentations = self.transform(image=image, mask=mask, bboxes=boxes, class_labels=class_labels)
      image = augmentations["image"]
      mask = augmentations["mask"]
      boxes = augmentations["bboxes"]
      class_labels = augmentations["class_labels"]

    boxes = torch.as_tensor(boxes, dtype = torch.float32)
    masks = torch.as_tensor(masks, dtype=torch.uint8)

    target = {}
    target["boxes"] = boxes
    target["masks"] = masks
    target["class_labels"] = class_labels

    return image, target

字符串
创建一个文件,从包含训练和验证数据的目录中生成train_loader和瓦尔_loader:

def get_loaders(
    train_img_dir,
    train_mask_dir,
    val_img_dir,
    val_mask_dir,
    batch_size,
    train_transform,
    val_transform,
    num_workers=4,
    pin_memory=True,
):
    train_ds = CellDataset(
        image_dir=train_img_dir,
        mask_dir=train_mask_dir,
        transform=train_transform,
    )

    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
        collate_fn = collate_fn
    )

    val_ds = CellDataset(
        image_dir=val_img_dir,
        mask_dir=val_mask_dir,
        transform=val_transform,
    )

    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
        collate_fn = collate_fn
    )

    return train_loader, val_loader


一个单独的文件加载数据集并运行训练循环:

model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2()
bbox_params = A.BboxParams(format='pascal_voc',label_fields = [])

def main():
    # defining transformations. Will be accessed in dataloader
    train_transform = A.Compose([
        # A.Rotate(limit=35, p=1.0),
        A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
        A.Resize(height=image_height, width=image_width),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
            ),
        ToTensorV2()
        ],bbox_params=bbox_params, p=1)

    val_transforms = A.Compose([
        A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
        A.Resize(height=image_height, width=image_width),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
            ),
        ToTensorV2()
        ],bbox_params=bbox_params, p=1)
    loss_fn = nn.BCEWithLogitsLoss()
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=momentum, weight_decay=weight_decay)

    train_loader, val_loader = get_loaders(
        train_img_dir,
        train_mask_dir,
        val_img_dir,
        val_mask_dir,
        batch_size,
        train_transform,
        val_transforms,
        num_workers,
        pin_memory,
    )
    scaler = torch.cuda.amp.GradScaler()
    model.to(device)

    for epoch in range(num_epochs):
        train_epoch_loss = 0
        val_epoch_loss = 0
        model.train()

        for i, dt in enumerate(train_loader):
          img = [dt[0][0].to(device), dt[1][0].to(device)]
          targ = [dt[0][1], dt[1][1]]
          targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
          # targets = [{k: v for k, v in t.items()} for t in targ]
          loss = model(img, targets)
          print(loss)
          losses = sum([l for l in loss.values()])
          train_epoch_loss += losses.cpu().detach().numpy()
          optimizer.zero_grad()
          losses.backward()
          optimizer.step()
        all_train_losses.append(train_epoch_loss)
        with torch.no_grad():
          for j, dt in enumerate(val_loader):
            img = [dt[0][0].to(device), dt[1][0].to(device)]
            targ = [dt[0][1], dt[1][1]]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
            # targets = [{k: v for k, v in t.items()} for t in targ]
            loss = model(img, targets)
            losses = sum([l for l in loss.values()])
            val_epoch_loss += losses.cpu().detach().numpy()
          all_val_losses.append(val_epoch_loss)
        print(epoch, " ", train_epoch_loss, " ", val_epoch_loss)


产生的错误消息:

/usr/local/lib/python3.10/dist-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.
  warnings.warn("torch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.")

---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

<ipython-input-8-692fc61b0d19> in <cell line: 162>()
    161 
    162 if __name__ == "__main__":
--> 163     main()

4 frames

<ipython-input-8-692fc61b0d19> in main()
    121           targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
    122           # targets = [{k: v for k, v in t.items()} for t in targ]
--> 123           loss = model(img, targets)
    124           print(loss)
    125           losses = sum([l for l in loss.values()])

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
    103             features = OrderedDict([("0", features)])
    104         proposals, proposal_losses = self.rpn(images, features, targets)
--> 105         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
    106         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
    107 

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
    746                 if not t["boxes"].dtype in floating_point_types:
    747                     raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
--> 748                 if not t["labels"].dtype == torch.int64:
    749                     raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
    750                 if self.has_keypoint():

KeyError: 'labels'


如果t[“labels”].dtype是一个torch.int64,而标签在DataLoader类中也是这样格式化的,那么会导致错误的特定行将进行检查。

6yt4nkrj

6yt4nkrj1#

首先要知道你的错误不是数据类型不是int 64;那就是'labels'这个字段根本不存在它并没有达到实际的比较。
除了知道您的目标显然缺少这个必需的字段之外,您实际上没有显示足够的代码,因为代码中没有任何地方实际定义了train_loader
根据上下文我最有根据的猜测是

target = {}
    target["boxes"] = boxes
    target["masks"] = masks
    target["class_labels"] = class_labels

字符串
train_loader的目标部分,如果是,则显然字段"class_labels"是错误的;该模型期望字段被精确地命名为"labels"

相关问题