ncnn convolution_vulkan forward时候崩溃

bpzcxfmw  于 4个月前  发布在  其他
关注(0)|答案(3)|浏览(35)

1、版本
ncnn-20220216 release版本

2、崩溃堆栈

Tombstone maker: 'xCrash 3.0.0'
Crash type: 'native'
Start time: '2022-05-12T09:29:43.007+0800'
Crash time: '2022-05-12T09:32:34.929+0800'
App ID: 'com.xxxx.xxxx'
App version: '7.69.2'
Rooted: 'No'
API level: '29'
OS version: '10'
Kernel version: 'Linux version 4.14.116 #1 SMP PREEMPT Fri Aug 13 12:39:43 CST 2021 (aarch64)'
ABI list: 'arm64-v8a,armeabi-v7a,armeabi'
Manufacturer: 'HUAWEI'
Brand: 'HUAWEI'
Model: 'OCE-AN10'
Build fingerprint: 'HUAWEI/OCE-AN10/HWOCE-L:10/HUAWEIOCE-AN10/102.0.0.165C00:user/release-keys'
ABI: 'arm64'
pid: 6530, tid: 9959, name: FrescoBackgroun >>> com.xxxx.xxxx <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0
x0 0000000000000000 x1 0000007204933160 x2 0000007204933030 x3 0000007204933f38
x4 0000007204932fd0 x5 00000071a70c25a0 x6 00000071b2cafea8 x7 0000000000000001
x8 00000071d53a8e50 x9 0000000000000244 x10 000000719c9e0a08 x11 0000000000000001
x12 0000007204933058 x13 0000000000000001 x14 0000000000000002 x15 0000000000000002
x16 0000007222d97730 x17 0000000000000003 x18 00000071ccbba000 x19 0000007204932f20
x20 0000000000000001 x21 0000007204936020 x22 000000719bccc7a0 x23 0000007204933160
x24 0000000000000000 x25 0000000000000002 x26 0000007204933f38 x27 0000000000000004
x28 0000000000000002 x29 00000072049330e0
sp 0000007204932e70 lr 000000717e0c5a00 pc 000000717e151424

backtrace:
#00 pc 000000000022c424 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE+436)
#1 pc 00000000001a09fc /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate16do_forward_layerEPKNS_5LayerERNSt6__ndk16vectorINS_5VkMatENS4_9allocatorIS6_EEEERNS_9VkComputeERKNS_6OptionE+496)
#2 pc 00000000001a072c /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1440)
#3 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#4 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#5 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#6 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#7 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#8 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#9 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#10 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#11 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#12 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#13 pc 00000000001a0754 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZNK4ncnn10NetPrivate13forward_layerEiRNSt6__ndk16vectorINS_3MatENS1_9allocatorIS3_EEEERNS2_INS_5VkMatENS4_IS8_EEEERNS_9VkComputeERKNS_6OptionE+1480)
#14 pc 00000000001a958c /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZN4ncnn9Extractor7extractEiRNS_5VkMatERNS_9VkComputeE+304)
#15 pc 00000000001a8a58 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libncnn.so (_ZN4ncnn9Extractor7extractEiRNS_3MatEi+680)
#16 pc 00000000000051a8 /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libvipSR.so (ZNK5VipSR13processMatRgbERKN4ncnn3MatERS1+188)
#17 pc 00000000000033dc /data/app/com.xxxx.xxxx-JiTtlniklULF9ubz2HBDfA==/lib/arm64/libvipSR.so (Java_com_vip_vipsr_VipSR_DoImageRgbSR+612)
#18 pc 0000000000150350 /apex/com.android.runtime/lib64/libart.so (art_quick_generic_jni_trampoline+144)
#19 pc 0000000000147334 /apex/com.android.runtime/lib64/libart.so (art_quick_invoke_stub+548)
#20 pc 00000000001561b4 /apex/com.android.runtime/lib64/libart.so (_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc+252)
#21 pc 00000000002fd900 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter34ArtInterpreterToCompiledCodeBridgeEPNS_6ThreadEPNS_9ArtMethodEPNS_11ShadowFrameEtPNS_6JValueE+384)
#22 pc 00000000002f8bd0 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter6DoCallILb0ELb0EEEbPNS_9ArtMethodEPNS_6ThreadERNS_11ShadowFrameEPKNS_11InstructionEtPNS_6JValueE+912)
#23 pc 00000000005cb550 /apex/com.android.runtime/lib64/libart.so (MterpInvokeVirtual+648)
#24 pc 0000000000141814 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_virtual+20)
#25 pc 00000000005cd060 /apex/com.android.runtime/lib64/libart.so (MterpInvokeInterface+1752)
#26 pc 0000000000141a14 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_interface+20)
#27 pc 00000000005cdbfc /apex/com.android.runtime/lib64/libart.so (MterpInvokeDirect+1168)
#28 pc 0000000000141914 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_direct+20)
#29 pc 00000000005cdbfc /apex/com.android.runtime/lib64/libart.so (MterpInvokeDirect+1168)
#30 pc 0000000000141914 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_direct+20)
#31 pc 00000000005ce408 /apex/com.android.runtime/lib64/libart.so (MterpInvokeStatic+1136)
#32 pc 0000000000141994 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_static+20)
#33 pc 00000000002ce22c /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEbb.llvm.10887373532384510885+320)
#34 pc 00000000005bc090 /apex/com.android.runtime/lib64/libart.so (artQuickToInterpreterBridge+1012)
#35 pc 0000000000150468 /apex/com.android.runtime/lib64/libart.so (art_quick_to_interpreter_bridge+88)
#36 pc 00000000003d8d08 /system/framework/arm64/boot.oat (java.util.concurrent.ThreadPoolExecutor.runWorker+984)
#37 pc 00000000003d5880 /system/framework/arm64/boot.oat (java.util.concurrent.ThreadPoolExecutor$Worker.run+64)
#38 pc 0000000000147334 /apex/com.android.runtime/lib64/libart.so (art_quick_invoke_stub+548)
#39 pc 00000000001561b4 /apex/com.android.runtime/lib64/libart.so (_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc+252)
#40 pc 00000000002fd900 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter34ArtInterpreterToCompiledCodeBridgeEPNS_6ThreadEPNS_9ArtMethodEPNS_11ShadowFrameEtPNS_6JValueE+384)
#41 pc 00000000002f8bd0 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter6DoCallILb0ELb0EEEbPNS_9ArtMethodEPNS_6ThreadERNS_11ShadowFrameEPKNS_11InstructionEtPNS_6JValueE+912)
#42 pc 00000000005cccfc /apex/com.android.runtime/lib64/libart.so (MterpInvokeInterface+884)
#43 pc 0000000000141a14 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_interface+20)
#44 pc 00000000002ce22c /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEbb.llvm.10887373532384510885+320)
#45 pc 00000000005bc090 /apex/com.android.runtime/lib64/libart.so (artQuickToInterpreterBridge+1012)
#46 pc 0000000000150468 /apex/com.android.runtime/lib64/libart.so (art_quick_to_interpreter_bridge+88)
#47 pc 0000000000217fc8 /system/framework/arm64/boot.oat (java.lang.Thread.run+72)
#48 pc 0000000000147334 /apex/com.android.runtime/lib64/libart.so (art_quick_invoke_stub+548)
#49 pc 00000000001561b4 /apex/com.android.runtime/lib64/libart.so (_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc+252)
#50 pc 00000000004d8820 /apex/com.android.runtime/lib64/libart.so (_ZN3art12_GLOBAL__N_118InvokeWithArgArrayERKNS_33ScopedObjectAccessAlreadyRunnableEPNS_9ArtMethodEPNS0_8ArgArrayEPNS_6JValueEPKc+104)
#51 pc 00000000004d98b4 /apex/com.android.runtime/lib64/libart.so (_ZN3art35InvokeVirtualOrInterfaceWithJValuesERKNS_33ScopedObjectAccessAlreadyRunnableEP8_jobjectP10_jmethodIDPK6jvalue+416)
#52 pc 000000000051ca8c /apex/com.android.runtime/lib64/libart.so (_ZN3art6Thread14CreateCallbackEPv+1232)
#53 pc 00000000000cf700 /apex/com.android.runtime/lib64/bionic/libc.so (_ZL15__pthread_startPv+36)
#54 pc 00000000000720e8 /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64)

5kgi1eie

5kgi1eie1#

堆栈信息看
ncnn/src/layer/vulkan/convolution_vulkan.cpp

Line 1010 in 6b2495c

| | padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad); |

padding NULL 导致 crash

azpvetkf

azpvetkf2#

找到代码行的步骤

  1. 用的是 20220216 vulkan 动态库,去下载 https://github.com/Tencent/ncnn/releases/download/20220216/ncnn-20220216-android-vulkan-shared.zip 解压
  2. 反编译 $ANDROID_NDK/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d libncnn.so > libncnn.so.s
  3. 根据堆栈,在 libncnn.so.s 查找 _ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE
  4. 找到函数入口 0000000000203dc0 <_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE>:
  5. 算出 0x203dc0 +436 = 0x203f74,找到行 203f74: f9400008 ldr x8, [x0]
  6. 翻出函数源代码

ncnn/src/layer/vulkan/convolution_vulkan.cpp

Line 983 in 6b2495c

| | intConvolution_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const |

往上看,发现是多次判断 > 0 时候,进入 203f40

203f14:	7100017f 	cmp	w11, #0x0
  203f18:	5400014c 	b.gt	203f40 <_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE+0x180>
  203f1c:	b940e94c 	ldr	w12, [x10,#232]
  203f20:	7100019f 	cmp	w12, #0x0
  203f24:	540000ec 	b.gt	203f40 <_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE+0x180>
  203f28:	b940ed4d 	ldr	w13, [x10,#236]
  203f2c:	710001bf 	cmp	w13, #0x0
  203f30:	5400008c 	b.gt	203f40 <_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE+0x180>
  203f34:	b940f14e 	ldr	w14, [x10,#240]
  203f38:	710005df 	cmp	w14, #0x1
  203f3c:	5400836b 	b.lt	204fa8 <_ZNK4ncnn18Convolution_vulkan7forwardERKNS_5VkMatERS1_RNS_9VkComputeERKNS_6OptionE+0x11e8>
  203f40:	ad4002c1 	ldp	q1, q0, [x22]
  203f44:	d102c3a2 	sub	x2, x29, #0xb0
  203f48:	910583e4 	add	x4, sp, #0x160
  203f4c:	aa1703e1 	mov	x1, x23
  203f50:	ad0b03e1 	stp	q1, q0, [sp,#352]
  203f54:	ad4202c1 	ldp	q1, q0, [x22,#64]
  203f58:	ad410ac3 	ldp	q3, q2, [x22,#32]
  203f5c:	ad0d03e1 	stp	q1, q0, [sp,#416]
  203f60:	ad0c0be3 	stp	q3, q2, [sp,#384]
  203f64:	f94012c8 	ldr	x8, [x22,#32]
  203f68:	f900bfe8 	str	x8, [sp,#376]
  203f6c:	f9400460 	ldr	x0, [x3,#8]
  203f70:	aa1a03e3 	mov	x3, x26
  203f74:	f9400008 	ldr	x8, [x0]

这正好对应于条件

ncnn/src/layer/vulkan/convolution_vulkan.cpp

Line 1005 in 6b2495c

| | if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0) |

于是这段

203f40:	ad4002c1 	ldp	q1, q0, [x22]
  203f44:	d102c3a2 	sub	x2, x29, #0xb0
  203f48:	910583e4 	add	x4, sp, #0x160
  203f4c:	aa1703e1 	mov	x1, x23
  203f50:	ad0b03e1 	stp	q1, q0, [sp,#352]
  203f54:	ad4202c1 	ldp	q1, q0, [x22,#64]
  203f58:	ad410ac3 	ldp	q3, q2, [x22,#32]
  203f5c:	ad0d03e1 	stp	q1, q0, [sp,#416]
  203f60:	ad0c0be3 	stp	q3, q2, [sp,#384]
  203f64:	f94012c8 	ldr	x8, [x22,#32]
  203f68:	f900bfe8 	str	x8, [sp,#376]
  203f6c:	f9400460 	ldr	x0, [x3,#8]
  203f70:	aa1a03e3 	mov	x3, x26
  203f74:	f9400008 	ldr	x8, [x0]
  203f78:	f9403108 	ldr	x8, [x8,#96]
  203f7c:	d63f0100 	blr	x8

对应于代码

Option opt_pad = opt;
        opt_pad.blob_vkallocator = opt.workspace_vkallocator;

        padding->forward(bottom_blob, bottom_blob_bordered, cmd, opt_pad);

203f74 正好是准备好了 padding->forward 4个参数后(x1 x2 x3 x4),调用前的地方

of1yzvn4

of1yzvn43#

看起来(not sure)是因为用了 dynamic weight 导致的

Convolution_vulkan::Convolution_vulkan()
{
    support_vulkan = true;
    support_image_storage = true;

    padding = 0; // 构造函数, 赋予padding指针为NULL
    ...
}

int Convolution_vulkan::create_pipeline(const Option& _opt)
{
    if (dynamic_weight)
    {
        support_vulkan = false;
        support_image_storage = false;
        return 0;   // 返回, 导致 padding 仍为 0.
    }

    ...

    {
        padding = ncnn::create_layer(ncnn::LayerType::Padding);
        padding->vkdev = vkdev;

        padding->bottom_shapes.resize(1);
        padding->bottom_shapes[0] = shape;
        padding->top_shapes.resize(1);
        padding->top_shapes[0] = shape_bordered;

        ncnn::ParamDict pd;
        pd.set(0, pad_top);
        pd.set(1, pad_bottom);
        pd.set(2, pad_left);
        pd.set(3, pad_right);
        pd.set(4, 0);
        pd.set(5, pad_value);

        padding->load_param(pd);

        padding->create_pipeline(opt);
    }

相关问题