pytorch 目标检测-运行时错误:堆栈期望每个Tensor大小相等

thtygnil  于 2023-02-16  发布在  其他
关注(0)|答案(1)|浏览(352)

我创建了一个名为ReceivDataset的对象检测自定义数据集,如下所示。

from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F

class ReceiptDataset(torch.utils.data.Dataset):
  def __init__(self, train_dir,width,height,labels,transforms=None):
    self.images = os.listdir(train_dir)
    self.width = width
    self.height = height
    self.train_dir = train_dir
    self.labels = labels
    self.transforms = transforms

  def __getitem__(self,idx):
    img_name = self.images[idx]
    img_path = os.path.join(self.train_dir,img_name)

    #print(f"img_name: {img_name}")

    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
    img_res = cv2.resize(img_rgb,(self.width,self.height), cv2.INTER_AREA)

    img_res /= 255.0

    annot = self.labels[str(img_name)]

    lbls = []
    boxes = []
    target = {}

    ht, wt, _ = img.shape
    
    #print(f"img_res shape: {img_res.shape}, orig shape: {wt}, {ht}")

    for item in annot:
      x,y,box_wt,box_ht,lbl = item

      x_min = x
      x_max = x + box_wt
      y_min = y
      y_max = y + box_ht

      x_min_corr = (x_min / wt) * self.width
      x_max_corr = (x_max /wt ) * self.width
      y_min_corr = (y_min / ht) * self.height
      y_max_corr = (y_max / ht) * self.height

      boxes.append([x_min_corr, y_min_corr, x_max_corr, y_max_corr])

      lbls.append( classes.index(str(lbl)) )

    #print(f"dls_lbls: {lbls}, {len(lbls)}")

    #lbls += [-1] * (NUM_CLASSES - len(lbls))

    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    lbls = torch.as_tensor(lbls, dtype=torch.int64)

    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

    iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)

    target["boxes"]  = boxes
    target["labels"] = lbls
    target["image_id"] = torch.as_tensor(idx)
    target["area"] = area
    target["iscrowd"] = iscrowd

    #print(f"dls_lbls -- 2: {target['labels']}, { target['labels'].shape }")

    if self.transforms:
      trans = self.transforms(image=img_res,
                              bboxes = target["boxes"],
                              labels=lbls
                              )
      img_res = trans["image"]
      target["boxes"] = torch.Tensor(trans["bboxes"])

    return img_res, target

  def __len__(self):
    return len(self.images)

我创建了一个示例:

train_dataset = ReceiptDataset("label-detector/images",width,height,plabels)

我的培训片段是

from engine import train_one_epoch, evaluate

for epoch in range(num_epochs):
  train_one_epoch(model,optim,train_loader,device,epoch,print_freq=2)

  lr_scheduler.step()

  evaluate(model,test_loader,device)

但每当我运行训练循环时,我都会收到一个运行时错误:

RuntimeError: stack expects each tensor to be equal size, but got [11,4] at entry 0 and [9,4] at entry 1

总共有17个类,每个图像至少有4个注解。我注意到问题似乎来自我的标签列表/Tensor在数据集类,标签列表/Tensor的大小根据图像中注解项目的数量而变化,但我似乎找不到一种方法来解决这个问题。
谢谢大家!

erhoui1w

erhoui1w1#

我通过为dataloader实现一个定制的collate函数来解决这个问题,该函数根据模型的需要返回一批数据集。

def collate_fn_seq(batch):

  images = [ item[0] for item in batch ]
  targets = [ item[1] for item in batch ]

  imgs = []
  for image in images:
    img = torch.from_numpy(image).permute(2, 0, 1)
    imgs.append(img)

  boxes = [target["boxes"] for target in targets]

  labels = [target["labels"] for target in targets]

  image_ids = [ target["image_id"] for target in targets ]
  areas = [target["area"] for target in targets]
  iscrowds = [target["iscrowd"] for target in targets]

  tars = []

  for i in range(len(batch)):
    box = boxes[i]
    label = labels[i]
    image_id = image_ids[i]
    area = areas[i]
    iscrowd = iscrowds[i]

    target = {"boxes": box, "labels": label, "image_id": image_id, "area": area, "iscrowd": iscrowd}
    tars.append(target)
    
    
  return imgs, tars

并将其包含在我的数据加载器中,使用:

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn_seq)

相关问题