Paddle 【GPU矩阵乘法精度损失】与PyTorch和Numpy对比测试

yi0zb3m4 于 4个月前发布在其他

关注(0)|答案(4)|浏览(111)

bug描述 Describe the Bug

BUG：

PaddlePaddle在GPU上的矩阵乘法与【Pytorch和Numpy】的结果存在精度差异。

注意：PyTorch和Numpy的精度是对齐的，虽然这里没有比较。

版本

PaddlePaddle 2.6
PyTorch 1.7.1
Numpy 1.24.3
Python 3.9

精度

float 32

测试代码【calculate_alignment_num()代码附在最后】

if __name__ == '__main__':
    import paddle
    import torch
    import numpy as np

    torch.set_printoptions(precision=9, sci_mode=False) # 为了方便比对有效数字，禁用科学计数法，并显示充足的小数
    paddle.set_printoptions(precision=9, sci_mode=False)

    data = [[0.1234567], [0.2345678], [0.3456789]]

    numpy_x = np.array(data, dtype=np.float32)
    torch_gpu_x = torch.tensor(data, dtype=torch.float32, device=torch.device('cuda:0'))
    paddle_cpu_x = paddle.to_tensor(data, dtype='float32', place=paddle.CPUPlace())
    paddle_gpu_x = paddle.to_tensor(data, dtype='float32', place=paddle.CUDAPlace(0))

    numpy_res = np.matmul(numpy_x, numpy_x.T)
    torch_gpu_res = torch_gpu_x.mm(torch_gpu_x.t())
    paddle_cpu_res = paddle.mm(paddle_cpu_x, paddle_cpu_x.t())
    paddle_gpu_res = paddle.mm(paddle_gpu_x, paddle_gpu_x.t())

    print(f"Numpy:\n{numpy_res}")
    print(f"PyTorch(GPU):\n{torch_gpu_res}")
    print(f"PaddlePaddle(CPU):\n{paddle_cpu_res}")
    print(f"PaddlePaddle(GPU):\n{paddle_gpu_res}")

    print("The number of significant numbers aligned(MIN & AVG):")
    print("paddle_gpu_res | torch_gpu_res :")
    calculate_alignment_num(paddle_gpu_res, torch_gpu_res)
    print("paddle_gpu_res | numpy_res :")
    calculate_alignment_num(paddle_gpu_res, numpy_res)
    print("paddle_cpu_res | torch_gpu_res :")
    calculate_alignment_num(paddle_cpu_res, torch_gpu_res)
    print("paddle_cpu_res | numpy_res :")
    calculate_alignment_num(paddle_cpu_res, numpy_res)

输出结果

W0808 16:32:40.628000 219632 gpu_resources.cc:119] Please NOTE: device: 0, GPU Compute Capability: 8.6, Driver API Version: 11.2, Runtime API Version: 11.1
W0808 16:32:40.633858 219632 gpu_resources.cc:164] device: 0, cuDNN Version: 8.2.
Numpy:
[[0.01524156 0.02895897 0.04267638]
 [0.02895897 0.05502206 0.08108514]
 [0.04267638 0.08108514 0.1194939 ]]
PyTorch(GPU):
tensor([[0.015241557, 0.028958967, 0.042676378],
        [0.028958967, 0.055022057, 0.081085138],
        [0.042676378, 0.081085138, 0.119493902]], device='cuda:0')
PaddlePaddle(CPU):
Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
       [[0.015241557, 0.028958967, 0.042676378],
        [0.028958967, 0.055022057, 0.081085138],
        [0.042676378, 0.081085138, 0.119493902]])
PaddlePaddle(GPU):
Tensor(shape=[3, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [[0.015245859, 0.028969392, 0.042685390],
        [0.028969392, 0.055046141, 0.081108570],
        [0.042685390, 0.081108570, 0.119510651]])
The number of significant numbers aligned(MIN & AVG):
paddle_gpu_res | torch_gpu_res :
min alignment: 2
avg alignment:2.888888888888889
paddle_gpu_res | numpy_res :
min alignment: 2
avg alignment:2.888888888888889
paddle_cpu_res | torch_gpu_res :
min alignment: 7
avg alignment:7.0
paddle_cpu_res | numpy_res :
min alignment: 7
avg alignment:7.0

Process finished with exit code 0

附：计算有效数字对齐位数的【calculate_alignment_num()】函数代码

def calculate_alignment_num(data1, data2):
    data1_np = tensor_to_numpy(data1)
    data2_np = tensor_to_numpy(data2)

    # Flatten the data to ensure we can iterate over them element-wise
    flat_data1 = data1_np.flatten()
    flat_data2 = data2_np.flatten()

    # Initialize list to keep track of all alignment counts
    alignment_counts = []

    for i in range(len(flat_data1)):
        alignment_counts.append(count_the_number_of_alignments(flat_data1[i], flat_data2[i]))

    # Calculate minimum and average alignment counts
    min_alignment = min(alignment_counts)
    avg_alignment = sum(alignment_counts) / len(alignment_counts)

    print(f"min alignment: {min_alignment}")
    print(f"avg alignment:{avg_alignment}")

    return min_alignment, avg_alignment

def tensor_to_numpy(tensor):
    if isinstance(tensor, torch.Tensor):
        return tensor.detach().cpu().numpy()
    elif isinstance(tensor, paddle.Tensor):
        return tensor.cpu().numpy()
    elif isinstance(tensor, np.ndarray):
        return tensor
    else:
        raise TypeError("Unsupported data type")

def count_the_number_of_alignments(x, y):
    x = extract_7_significant_digits(x)
    y = extract_7_significant_digits(y)

    aligned_count = 0
    for i in range(len(x)):
        if x[i] == y[i]:
            aligned_count += 1
        else:
            break

    return aligned_count

def extract_7_significant_digits(num):
    # 绝对值去掉符号；科学技术法个位有且只有1位非零的数，保留小数点后7位，这样一共就有8位有效数字，并且能确保四舍五入不影响前7位有效数字
    num_str = "{:.7e}".format(abs(num))  
    e_index = num_str.find('e')
    # 先取e前面的数字，然后去掉小数点，最后截取前7位有效数字
    digits = num_str[:e_index].replace('.', '')[:7]  
    return digits