我有这个代码,它工作得很好:
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import numpy as np
import os
from PIL import Image
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
# Set directories for generation images and edit images
base_image_dir = os.path.join("IMG_4297.png")
mask_dir = os.path.join("masks")
edit_image_dir = os.path.join("03_edits")
# Point to your downloaded SAM model
sam_model_filepath = "../segment-anything/segment_anything/sam_vit_h_4b8939.pth"
#sam_model_filepath = "./sam_vit_h_4b8939.pth"
# Initiate SAM model
sam = sam_model_registry["default"](checkpoint=sam_model_filepath)
# Function to display mask using matplotlib
def show_mask(mask, ax):
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
# Function to display where we've "clicked"
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels == 1]
neg_points = coords[labels == 0]
ax.scatter(
pos_points[:, 0],
pos_points[:, 1],
color="green",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
ax.scatter(
neg_points[:, 0],
neg_points[:, 1],
color="red",
marker="*",
s=marker_size,
edgecolor="white",
linewidth=1.25,
)
# Load chosen image using opencv
image = cv2.imread("./IMG_4297.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Display our chosen image
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.axis("on")
plt.show()
# Set the pixel coordinates for our "click" to assign masks
input_point = np.array([[525, 325]])
input_label = np.array([1])
# Display the point we've clicked on
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_points(input_point, input_label, plt.gca())
plt.axis("on")
plt.show()
# Initiate predictor with Segment Anything model
predictor = SamPredictor(sam)
predictor.set_image(image)
# Use the predictor to gather masks for the point we clicked
masks, scores, logits = predictor.predict(
point_coords=input_point,
point_labels=input_label,
multimask_output=True,
)
# Check the shape - should be three masks of the same dimensions as our image
masks.shape
# Display the possible masks we can select along with their confidence
for i, (mask, score) in enumerate(zip(masks, scores)):
plt.figure(figsize=(10, 10))
plt.imshow(image)
show_mask(mask, plt.gca())
show_points(input_point, input_label, plt.gca())
plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
plt.axis("off")
plt.show()
# Choose which mask you'd like to use
chosen_mask = masks[1]
# We'll now reverse the mask so that it is clear and everything else is white
chosen_mask = chosen_mask.astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255
# create a base blank mask
width = 1512
height = 1512
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1)) # create an opaque image mask
# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask
# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask
# We'll save this mask for re-use for our edit
new_mask.save(os.path.join(mask_dir, "new_mask.png"))
字符串
但我试图用一个稍微不同的程序/ AI语言模型来使用下半部分:
import numpy as np
from lang_sam.utils import draw_image
from PIL import Image
from lang_sam import LangSAM
from heic2png import HEIC2PNG
if __name__ == '__main__':
heic_img = HEIC2PNG('/Users/Downloads/IMG_4316.heic', quality=70) # Specify the quality of the converted image
heic_img.save() # The converted image will be saved as `test.png`
model = LangSAM()
image_pil = Image.open("/Users/Downloads/IMG_4316.png").convert("RGB")
text_prompt = "wall"
masks, boxes, phrases, logits = model.predict(image_pil, text_prompt)
masks.shape
labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)]
image_array = np.asarray(image_pil)
image = draw_image(image_array, masks, boxes, labels)
image = Image.fromarray(np.uint8(image)).convert("RGB")
image.show()
chosen_mask = np.array(image).astype("uint8")
chosen_mask[chosen_mask != 0] = 255
chosen_mask[chosen_mask == 0] = 1
chosen_mask[chosen_mask == 255] = 0
chosen_mask[chosen_mask == 1] = 255
# create a base blank mask
width = 3024
height = 3024
mask = Image.new("RGBA", (width, height), (0, 0, 0, 1)) # create an opaque image mask
# Convert mask back to pixels to add our mask replacing the third dimension
pix = np.array(mask)
pix[:, :, 3] = chosen_mask
# Convert pixels back to an RGBA image and display
new_mask = Image.fromarray(pix, "RGBA")
new_mask.show()
new_mask.save()
型
我认为问题在于这一行转换后的图像的格式:
pix[:, :, 3] = chosen_mask
型
是否需要在chosen_mask
上执行转换或某些操作才能使图像在这里工作?
完整的错误是:
> Traceback (most recent call last):
File "/Users/Desktop/code/lang-segment-anything/app.py", line 112, in <module>
pix[:, :, 2] = chosen_mask
~~~^^^^^^^^^
ValueError: could not broadcast input array from shape (3024,3024,3) into shape (3024,3024)
~~~^^^^^^^^^
型
1条答案
按热度按时间nr7wwzry1#
当你这样做时:
字符串
你正在创建一个3024x3024的图像,有4个通道(即RGBA),所以你的Numpy数组
pix
将具有[3024,3024,4]的形状。当你这样做时:
型
你制作了一个有3个通道的RGB图像(即RGB),所以你的Numpy数组
chosen_mask
将具有[3024,3024,3]的形状。所以,问题是当你这样做的时候:
型
你是说你想设置阿尔法通道在每个像素位置在
pix
的3 RGB通道在该位置在chosen_mask
和不能工作.你不能把R和G和B通道从chosen_mask
到阿尔法通道,因为只有一个空间在阿尔法通道在每个位置。因此,您需要通过在
L
模式下创建chosen_mask
来使其成为单通道映像:型
或者,您需要选择
chosen_mask
中的哪个RGB通道要放入pix
的A通道,例如,只需将chosen_mask
中的绿色通道放入pix
的A通道:型