opencv python中基于深度图的曲面法线计算

g2ieeal7  于 2023-01-31  发布在  Python
关注(0)|答案(3)|浏览(299)

我试着用python实现下面的c++代码:

depth.convertTo(depth, CV_64FC1); // I do not know why it is needed to be 
transformed to 64bit image my input is 32bit

Mat nor(depth.size(), CV_64FC3);

for(int x = 1; x < depth.cols - 1; ++x)
{
   for(int y = 1; y < depth.rows - 1; ++y)
   {
      Vec3d t(x,y-1,depth.at<double>(y-1, x)/*depth(y-1,x)*/);
      Vec3d l(x-1,y,depth.at<double>(y, x-1)/*depth(y,x-1)*/);
      Vec3d c(x,y,depth.at<double>(y, x)/*depth(y,x)*/);
      Vec3d d = (l-c).cross(t-c);
      Vec3d n = normalize(d);
      nor.at<Vec3d>(y,x) = n;
   }
}

imshow("normals", nor);

python代码:

d_im = cv2.imread("depth.jpg")
d_im = d_im.astype("float64")

normals = np.array(d_im, dtype="float32")
h,w,d = d_im.shape
for i in range(1,w-1):
  for j in range(1,h-1):
    t = np.array([i,j-1,d_im[j-1,i,0]],dtype="float64")
    f = np.array([i-1,j,d_im[j,i-1,0]],dtype="float64")
    c = np.array([i,j,d_im[j,i,0]] , dtype = "float64")
    d = np.cross(f-c,t-c)
    n = d / np.sqrt((np.sum(d**2)))
    normals[j,i,:] = n

cv2.imwrite("normal.jpg",normals*255)

输入图像:

c++代码输出:

我的python代码输出:

我找不出这些差异的原因。我怎么能得到c++代码输出与python?

x8diyxa7

x8diyxa71#

正如 user8408080 所说,您的输出似乎有jpeg格式引起的伪像。还要记住,导入8位图像作为深度图不会给予与直接使用深度图矩阵相同的结果。
关于你的python代码,我的建议是使用向量化函数,尽可能避免循环(它非常慢)。

zy, zx = np.gradient(d_im)  
# You may also consider using Sobel to get a joint Gaussian smoothing and differentation
# to reduce noise
#zx = cv2.Sobel(d_im, cv2.CV_64F, 1, 0, ksize=5)     
#zy = cv2.Sobel(d_im, cv2.CV_64F, 0, 1, ksize=5)

normal = np.dstack((-zx, -zy, np.ones_like(d_im)))
n = np.linalg.norm(normal, axis=2)
normal[:, :, 0] /= n
normal[:, :, 1] /= n
normal[:, :, 2] /= n

# offset and rescale values to be in 0-255
normal += 1
normal /= 2
normal *= 255

cv2.imwrite("normal.png", normal[:, :, ::-1])
kuhbmx9i

kuhbmx9i2#

@sowlosc的答案没有考虑摄像头的内在特性
@百川的回答有明显bug
实现了一个简洁、正确、快速、易用的get_surface_normal_by_depth函数,并与修改后的@百川方案进行了比较,验证了结果的正确性。

import cv2
import numpy as np

def get_surface_normal_by_depth(depth, K=None):
    """
    depth: (h, w) of float, the unit of depth is meter
    K: (3, 3) of float, the depth camere's intrinsic
    """
    K = [[1, 0], [0, 1]] if K is None else K
    fx, fy = K[0][0], K[1][1]

    dz_dv, dz_du = np.gradient(depth)  # u, v mean the pixel coordinate in the image
    # u*depth = fx*x + cx --> du/dx = fx / depth
    du_dx = fx / depth  # x is xyz of camera coordinate
    dv_dy = fy / depth

    dz_dx = dz_du * du_dx
    dz_dy = dz_dv * dv_dy
    # cross-product (1,0,dz_dx)X(0,1,dz_dy) = (-dz_dx, -dz_dy, 1)
    normal_cross = np.dstack((-dz_dx, -dz_dy, np.ones_like(depth)))
    # normalize to unit vector
    normal_unit = normal_cross / np.linalg.norm(normal_cross, axis=2, keepdims=True)
    # set default normal to [0, 0, 1]
    normal_unit[~np.isfinite(normal_unit).all(2)] = [0, 0, 1]
    return normal_unit

def get_normal_map_by_point_cloud(depth, K):
    height, width = depth.shape

    def normalization(data):
        mo_chang = np.sqrt(
            np.multiply(data[:, :, 0], data[:, :, 0])
            + np.multiply(data[:, :, 1], data[:, :, 1])
            + np.multiply(data[:, :, 2], data[:, :, 2])
        )
        mo_chang = np.dstack((mo_chang, mo_chang, mo_chang))
        return data / mo_chang

    x, y = np.meshgrid(np.arange(0, width), np.arange(0, height))
    x = x.reshape([-1])
    y = y.reshape([-1])
    xyz = np.vstack((x, y, np.ones_like(x)))
    pts_3d = np.dot(np.linalg.inv(K), xyz * depth.reshape([-1]))
    pts_3d_world = pts_3d.reshape((3, height, width))
    f = (
        pts_3d_world[:, 1 : height - 1, 2:width]
        - pts_3d_world[:, 1 : height - 1, 1 : width - 1]
    )
    t = (
        pts_3d_world[:, 2:height, 1 : width - 1]
        - pts_3d_world[:, 1 : height - 1, 1 : width - 1]
    )
    normal_map = np.cross(f, t, axisa=0, axisb=0)
    normal_map = normalization(normal_map)
    return normal_map

vis_normal = lambda normal: np.uint8((normal + 1) / 2 * 255)[..., ::-1]

normal1 = get_surface_normal_by_depth(depth, K)    #  spend time: 60ms
normal2 = get_normal_map_by_point_cloud(depth, K)  #  spend time: 90ms

cv2.imwrite("normal1.png", vis_normal(normal1))
cv2.imwrite("normal2.png", vis_normal(normal2))

结果:正常1.png和正常2.png
第一节第一节第一节第一节第一次

o2g1uqev

o2g1uqev3#

代码(矩阵计算)应为:

def normalization(data):
   mo_chang =np.sqrt(np.multiply(data[:,:,0],data[:,:,0])+np.multiply(data[:,:,1],data[:,:,1])+np.multiply(data[:,:,2],data[:,:,2]))
   mo_chang = np.dstack((mo_chang,mo_chang,mo_chang))
   return data/mo_chang

x,y=np.meshgrid(np.arange(0,width),np.arange(0,height))
x=x.reshape([-1])
y=y.reshape([-1])
xyz=np.vstack((x,y,np.ones_like(x)))
pts_3d=np.dot(np.linalg.inv(K),xyz*img1_depth.reshape([-1]))
pts_3d_world=pts_3d.reshape((3,height,width))
f= pts_3d_world[:,1:height-1,2:width]-pts_3d_world[:,1:height-1,1:width-1]
t= pts_3d_world[:,2:height,1:width-1]-pts_3d_world[:,1:height-1,1:width-1]
normal_map=np.cross(f,l,axisa=0,axisb=0)
normal_map=normalization(normal_map)
normal_map=normal_map*0.5+0.5
alpha = np.full((height-2,width-2,1), (1.), dtype="float32")
normal_map=np.concatenate((normal_map,alpha),axis=2)

1.我们应该使用摄像头的内部函数“K”。我认为f和t的值是基于摄像头坐标中的3D点。
1.对于法向量,(-1,-1,100)和(255,255,100)在8比特图像中是相同的颜色,但是它们是完全不同的法向量,因此我们应该通过normal_map=normal_map*0.5+0.5将法向量值Map到(0,1)。

相关问题