numpy ValueError:无法将输入数组从形状(3024,3024,3)广播到形状(3024,3024)

ylamdve6  于 2023-11-18  发布在  其他
关注(0)|答案(1)|浏览(96)

我有这个代码,它工作得很好:

import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import numpy as np
import os
from PIL import Image
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

# Set directories for generation images and edit images
base_image_dir = os.path.join("IMG_4297.png")
mask_dir = os.path.join("masks")
edit_image_dir = os.path.join("03_edits")

# Point to your downloaded SAM model
sam_model_filepath = "../segment-anything/segment_anything/sam_vit_h_4b8939.pth"
#sam_model_filepath = "./sam_vit_h_4b8939.pth"

# Initiate SAM model
sam = sam_model_registry["default"](checkpoint=sam_model_filepath)

# Function to display mask using matplotlib
def show_mask(mask, ax):
    color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

# Function to display where we've "clicked"
def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels == 1]
    neg_points = coords[labels == 0]
    ax.scatter(
        pos_points[:, 0],
        pos_points[:, 1],
        color="green",
        marker="*",
        s=marker_size,
        edgecolor="white",
        linewidth=1.25,
    )
    ax.scatter(
        neg_points[:, 0],
        neg_points[:, 1],
        color="red",
        marker="*",
        s=marker_size,
        edgecolor="white",
        linewidth=1.25,
    )

# Load chosen image using opencv
image = cv2.imread("./IMG_4297.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display our chosen image
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis("on")
plt.show()

# Set the pixel coordinates for our "click" to assign masks
input_point = np.array([[525, 325]])
input_label = np.array([1])

# Display the point we've clicked on
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_points(input_point, input_label, plt.gca())
plt.axis("on")
plt.show()

# Initiate predictor with Segment Anything model
predictor = SamPredictor(sam)
predictor.set_image(image)

# Use the predictor to gather masks for the point we clicked
masks, scores, logits = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    multimask_output=True,
)

# Check the shape - should be three masks of the same dimensions as our image
masks.shape

# Display the possible masks we can select along with their confidence
for i, (mask, score) in enumerate(zip(masks, scores)):
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    show_mask(mask, plt.gca())
    show_points(input_point, input_label, plt.gca())
    plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
    plt.axis("off")
    plt.show()

# Choose which mask you'd like to use
chosen_mask = masks[1]

# We'll now reverse the mask so that it is clear and everything else is white
chosen_mask = chosen_mask.astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255

# create a base blank mask
width = 1512
height = 1512
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))  # create an opaque image mask

# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask

# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask

# We'll save this mask for re-use for our edit
new_mask.save(os.path.join(mask_dir, "new_mask.png"))

字符串
但我试图用一个稍微不同的程序/ AI语言模型来使用下半部分:

import numpy as np
from lang_sam.utils import draw_image
from PIL import Image
from lang_sam import LangSAM
from heic2png import HEIC2PNG

if __name__ == '__main__':
    heic_img = HEIC2PNG('/Users/Downloads/IMG_4316.heic', quality=70)  # Specify the quality of the converted image
    heic_img.save()  # The converted image will be saved as `test.png`

model = LangSAM()
image_pil = Image.open("/Users/Downloads/IMG_4316.png").convert("RGB")
text_prompt = "wall"
masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)

masks.shape

labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)]
image_array = np.asarray(image_pil)
image = draw_image(image_array, masks, boxes, labels)
image = Image.fromarray(np.uint8(image)).convert("RGB")
image.show()

chosen_mask = np.array(image).astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255

# create a base blank mask
width = 3024    
height = 3024
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))  # create an opaque image mask

# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask

# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask.show()
new_mask.save()


我认为问题在于这一行转换后的图像的格式:

pix[:, :, 3] = chosen_mask


是否需要在chosen_mask上执行转换或某些操作才能使图像在这里工作?
完整的错误是:

> Traceback (most recent call last):
  File "/Users/Desktop/code/lang-segment-anything/app.py", line 112, in <module>
    pix[:, :, 2] = chosen_mask
    ~~~^^^^^^^^^
ValueError: could not broadcast input array from shape (3024,3024,3) into shape (3024,3024)
    ~~~^^^^^^^^^

nr7wwzry

nr7wwzry1#

当你这样做时:

width = 3024    
height = 3024
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1))  # create an opaque image mask

# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)

字符串
你正在创建一个3024x3024的图像,有4个通道(即RGBA),所以你的Numpy数组pix将具有[3024,3024,4]的形状。
当你这样做时:

image = Image.fromarray(np.uint8(image)).convert("RGB")
chosen_mask = np.array(image).astype("uint8")


你制作了一个有3个通道的RGB图像(即RGB),所以你的Numpy数组chosen_mask将具有[3024,3024,3]的形状。
所以,问题是当你这样做的时候:

pix[:, :, 3] = chosen_mask


你是说你想设置阿尔法通道在每个像素位置在pix的3 RGB通道在该位置在chosen_mask和不能工作.你不能把R和G和B通道从chosen_mask到阿尔法通道,因为只有一个空间在阿尔法通道在每个位置。
因此,您需要通过在L模式下创建chosen_mask来使其成为单通道映像:

image = Image.fromarray(np.uint8(image)).convert("L")
chosen_mask = np.array(image).astype("uint8")


或者,您需要选择chosen_mask中的哪个RGB通道要放入pix的A通道,例如,只需将chosen_mask中的绿色通道放入pix的A通道:

pix[:, :, 3] = chosen_mask[..., 1]

相关问题