尝试仅使用numpy实现CNN时出错

hgc7kmma  于 2023-04-12  发布在  其他
关注(0)|答案(1)|浏览(180)

我正在尝试使用numpy实现CNN。我遵循Grokking的深度学习这本书的指南。我写的代码如下:

import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))

for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp/np.sum(temp, axis=1, keepdims=True)

alpha, iterations = (2, 300)
pixels_per_image, num_labels = (784, 10)
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

hidden_size = ((input_rows - kernel_rows)*(input_cols - kernel_cols))*num_kernels

kernels = 0.02*np.random.random((kernel_rows*kernel_cols, num_kernels))-0.01

weights_1_2 = 0.02*np.random.random((hidden_size, num_labels))-0.1

def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_to:col_from]
    return section.reshape(-1,1, row_to-row_from,col_to-col_from)

for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images)/batch_size)):
        batch_start, batch_end = ((i*batch_size), ((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        print(layer_0.shape)

        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2]-kernel_cols):
                sect = get_image_section(layer_0, row_start, row_start+kernel_rows, col_start, col_start+kernel_cols)
                sects.append(sect)
        
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        print(f"The shape of the expanded input {es}")
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask*2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))

        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == np.argmax(labelset))
            correct_cnt += _inc

        layer_2_delta = (labels[batch_start:batch_end]-layer_2)/(batch_size*layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha*layer_1.T.dot(layer_2_delta)

        l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha*k_update

    test_correct_cnt = 0

    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)
        print(layer_0.shape)

        sects = list()

        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2]-kernel_cols):
                sect = get_image_section(layer_0, row_start, row_start+kernel_rows, col_start, col_start+kernel_rows)
                sects.append(sect)
        
        expanded_input = np.concatenate(sects, axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1], -1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0], -1))
        layer_2 = np.dot(layer_1, weights_1_2)

        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

    if(j%1 == 0):
        print(f"I:{j} Test-Acc:{test_correct_cnt/float(len(test_images))} Train-Acc:{correct_cnt/float(len(images))}")

但我得到了以下错误。

(128, 28, 28)
The shape of the expanded input (0, 625, 3, 3)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-3ba2437a604d> in <cell line: 52>()
     67         es = expanded_input.shape
     68         print(f"The shape of the expanded input {es}")
---> 69         flattened_input = expanded_input.reshape(es[0]*es[1], -1)
     70 
     71         kernel_output = flattened_input.dot(kernels)

ValueError: cannot reshape array of size 0 into shape (0,newaxis)

我做错了什么?

slsn1g29

slsn1g291#

看来我在 get_image_section 函数中错误地编写了以下代码

section = layer[:, row_from:row_to, col_to:col_from]

应该是

section = layer[:, row_from:row_to, col_from:col_to]

相关问题