我正在尝试训练一个像vgg一样的模型来识别验证码图像:
import os
import random
import numpy as np
from keras.models import Model
from keras.utils import to_categorical
from keras.layers import Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D
from keras_preprocessing.image import load_img, img_to_array
mapping = {
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'A': 10,
'B': 11,
'C': 12,
'D': 13,
'E': 14,
'F': 15,
'G': 16,
'H': 17,
'I': 18,
'J': 19,
'K': 20,
'L': 21,
'M': 22,
'N': 23,
'O': 24,
'P': 25,
'Q': 26,
'R': 27,
'S': 28,
'T': 29,
'U': 30,
'V': 31,
'W': 32,
'X': 33,
'Y': 34,
'Z': 35
}
def split_dataset(directory, train_ratio=0.85, val_ratio=0.1, test_ratio=0.05):
# Get the list of all files in the directory
files = os.listdir(directory)
# Shuffle the list of files
random.shuffle(files)
# Calculate the number of samples for each split
train_size = int(len(files) * train_ratio)
val_size = int(len(files) * val_ratio)
test_size = len(files) - train_size - val_size
# Split the list of files into train, validation and test sets
train_files = files[:train_size]
val_files = files[train_size:train_size + val_size]
test_files = files[train_size + val_size:]
return train_files, val_files, test_files
def read_captcha_images(directory, file_list):
images = []
labels = []
for filename in file_list:
# Read the image
img = load_img(os.path.join(directory, filename), target_size=(25, 80))
# Convert the image to an array
img_arr = img_to_array(img)
# Normalize the pixel values
img_arr = img_arr / 255.
# Append the image to the list of images
images.append(img_arr)
# Extract the label from the filename and append it to the list of labels
label = filename[:filename.index('.')].split('_')[0]
labels.append([mapping[c.upper()] for c in label])
# Convert the list of images to a numpy array
images = np.array(images)
# Encode the labels
labels = to_categorical(labels)
return images, labels
if __name__ == "__main__":
# Define the directory where the captcha images are stored
directory = 'train_dataset'
# Split the dataset into train, validation, and test sets
train_files, val_files, test_files = split_dataset(directory)
# Read the training data
X_train, y_train = read_captcha_images(directory, train_files)
# Read the validation data
X_val, y_val = read_captcha_images(directory, val_files)
# Read the test data
X_test, y_test = read_captcha_images(directory, test_files)
input_tensor = Input(shape=(25, 80, 3))
# Block 1
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_tensor)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), strides=(2, 2))(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), strides=(2, 2))(x)
# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), strides=(2, 2))(x)
# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), strides=(2, 2))(x)
# Classification block
x = Flatten(name='flatten')(x)
x = Dropout(0.25)(x)
x = Dense(36, activation='softmax')(x)
x = Dense(36, activation='softmax')(x)
x = Dense(36, activation='softmax')(x)
x = Dense(36, activation='softmax')(x)
# Create the model
model = Model(input_tensor, x)
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val))
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'[*] Test accuracy: {test_acc}')
验证码图像有一个固定大小的4个字符的解决方案,其中包含A-Z和0-9,仅上限字符和数字。
我设法转换了可以输入到模型中的图像,但是标签一点也不正确。我尝试使用to_categorical,但是它使标签成为3D数组而不是2D数组。
我的代码有什么问题?
1条答案
按热度按时间yh2wf1be1#
那么一张图片对应多少个标签呢?我在代码中看到,对于每一张图片,都附加了一个标签数组:
labels.append([mapping[c.upper()] for c in label])
。因此,在训练期间得到的形状将是(batch_size, labels_per_image, 36)
,它对应于您的(None, 4, 36)
。但是如果你现在看一下最后一个
Dense
层,它的维数是36,你从模型中得到的向量是(batch_size, 36)
,它对应于(None, 36)
,这意味着模型只为你的输入图像预测了一个标签。因此,当您尝试计算损失时,模型预测的Tensor形状与地面实况标签的Tensor形状不匹配。如果您需要预测图像的多个标签,您可以将密集图层中的维度增加到4*36(假设每个图像有4个标签)。然后,在计算损失之前将Tensor重塑为(None,4,36)。
请随意补充其他信息,然后我会修改我的答案。