我正在尝试建立一个Tensorflow/ Keras CNN图像分类模型。我使用了大约21 k的训练图像和6 k的测试图像,并对这些相同的6 k测试图像进行预测。创建模型的代码如下:
stage = "TEST"
if stage == "DEV":
epochs = 1
elif stage == "DEV2":
epochs = 50
elif stage == "TEST":
epochs = 250
else:
epochs = 1000
# Input the size of your sample images
img_width, img_height = 240,172
# Enter the number of samples, training + validation
#nb_train_samples = 13204
#nb_validation_samples = 1412
nb_filters1 = 32
nb_filters2 = 32
nb_filters3 = 64
conv1_size = 3
conv2_size = 2
conv3_size = 5
pool_size = 2
# We have 2 classes, buy and sell
classes_num = 2
batch_size = 128
lr = 0.001
chanDim =3
model = Sequential()
model.add(Convolution2D(nb_filters1, conv1_size, conv1_size, padding ='same', input_shape=(img_height, img_width , 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
model.add(Convolution2D(nb_filters2, conv2_size, conv2_size, padding ="same"))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(pool_size, pool_size), data_format="channels_first")) #for GPU
model.add(MaxPooling2D(pool_size=(pool_size, pool_size), data_format="channels_last")) #for CPU
model.add(Convolution2D(nb_filters3, conv3_size, conv3_size, padding ='same'))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(pool_size, pool_size), data_format="channels_first")) #for GPU
model.add(MaxPooling2D(pool_size=(pool_size, pool_size), data_format="channels_last")) #for CPU
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(classes_num, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer= optimizers.RMSprop(),
metrics=['accuracy'])
#get class names
train_dir = pathlib.Path(folder_path + "/train/")
class_names = list([item.name for item in train_dir.glob('*')])
print("We have the following classes:", class_names)
#get number of images
no_trainbuy = len([name for name in os.listdir(folder_path + "/train/buy") if os.path.isfile(os.path.join(folder_path + "/train/buy", name))])
no_trainsell = len([name for name in os.listdir(folder_path + "/train/sell") if os.path.isfile(os.path.join(folder_path + "/train/sell", name))])
no_train = no_trainbuy + no_trainsell
no_testbuy = len([name for name in os.listdir(folder_path + "/test/buy") if os.path.isfile(os.path.join(folder_path + "/test/buy", name))])
no_testsell = len([name for name in os.listdir(folder_path + "/test/sell") if os.path.isfile(os.path.join(folder_path + "/test/sell", name))])
no_test = no_testbuy + no_testsell
nb_train_samples = no_train
nb_validation_samples = no_test
print ("number of images. no_train: " + str(no_train) + " no_test: " + str(no_test) )
train_datagen = ImageDataGenerator( rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=False)
test_datagen = ImageDataGenerator( rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=False)
train_generator = train_datagen.flow_from_directory(
folder_path + "/train",
target_size = (img_height, img_width),
batch_size=batch_size,
shuffle=True,
class_mode='categorical' )
test_generator = test_datagen.flow_from_directory(
folder_path + "/train",
target_size = (img_height, img_width),
batch_size=batch_size,
shuffle=True,
class_mode='categorical' )
#early stopping, prevents additional runtime when not needed
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples//batch_size,
epochs=epochs,
shuffle=True,
validation_data=test_generator,
#callbacks=callbacks_list,
validation_steps=nb_validation_samples//batch_size)
===时段250之后的结果如下:
时间点250/250 165/165 [============================================] -179秒1秒/阶跃-损耗:0.6427 -精度:0.6245 -瓦尔_损失:0.6383 -瓦尔_准确度:0.6297
===我尝试了两种预测方法-第一种方法:
img_width, img_height = 240,172
images = []
image_file_names= []
for img in os.listdir(folder_path + '/pred/samples/' ):
image_file_names.append ( img)
img = os.path.join(folder_path + '/pred/samples/', img)
img = image.load_img(img )
img = image.img_to_array(img)/255
img = np.expand_dims(img, axis=0)
images.append(img)
image.array_to_img(img[0]).show()
#print ("image file names: ", image_file_names)
images = np.vstack(images)
pred = model.predict(images, verbose=1)
#print ("predictions:", pred)
class_names = ['buy', 'sell']
class_list = []
prob_list = []
for x in pred:
#print ("array: " + str(x) )
#print("This image most likely belongs to {} with a {:.2f} percent confidence." .format( class_names[np.argmax(x)], 100 * np.max(x)) )
class_list.append( class_names[np.argmax(x)] )
prob_list.append( 100 * np.max(x) )
#correct indicator
correct = []
for a in range (0, len(image_file_names)):
if image_file_names[a].find("buy") != -1:
if class_list[a] == "buy":
correct.append(1)
else:
correct.append(0)
if image_file_names[a].find("sell") != -1:
if class_list[a] == "sell":
correct.append(1)
else:
correct.append(0)
#create df from lists
df = pd.DataFrame({'image_file_names': image_file_names, 'class_list': class_list, 'prob_list': prob_list, 'correct': correct})
print (df)
total_correct = df['correct'].sum()
correct_pc = ( total_correct / len(df) ) * 100
print ("the number of correct predictions is: " + str(total_correct) + " and percentage correct is " + str(correct_pc) )
===这提供了大约50%的预测准确度
我使用的第二种方法如下:
train_dir = pathlib.Path(folder_path + "/train/")
class_names = list([item.name for item in train_dir.glob('*')])
print("We have the following classes:", class_names)
batch_size=1
img_width, img_height = 240,172
pred_datagen = ImageDataGenerator( rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=False)
pred_generator = pred_datagen.flow_from_directory(
folder_path + "/pred/",
target_size=(img_height, img_width),
batch_size=batch_size,
shuffle=True,
class_mode='categorical' )
#pred generator classes
#print ( pred_generator.class_indices )
image_file_names = pred_generator.filenames
#print (image_file_names)
nb_samples = len(image_file_names)
image_batch, label_batch = next(pred_generator)
plt.imshow(image_batch[0])
plt.show()
#pred_generator.reset()
pred = model.predict_generator(pred_generator, steps = nb_samples // batch_size)
score = tf.nn.softmax(pred)
#print (score)
class_list = []
prob_list = []
v=0
for x in score:
#print("-----------------------------------------")
#print ("array: " + str(x) )
#print ("image file: " + str(image_file_names[v]) )
v=v+1
#print("This image most likely belongs to {} with a {:.2f} percent confidence." .format( class_names[np.argmax(x)], 100 * np.max(x)) )
class_list.append( class_names[np.argmax(x)] )
prob_list.append( 100 * np.max(x) )
#correct indicator
correct = []
for a in range (0, len(image_file_names)):
if image_file_names[a].find("buy") != -1:
if class_list[a] == "buy":
correct.append(1)
else:
correct.append(0)
if image_file_names[a].find("sell") != -1:
if class_list[a] == "sell":
correct.append(1)
else:
correct.append(0)
#print prediction list results
print ( image_file_names)
print ( class_list )
print ( prob_list )
print ( correct)
#create df from lists
print ("\n\nall results df: ")
df = pd.DataFrame({'image_file_names': image_file_names, 'class_list': class_list, 'prob_list': prob_list, 'correct': correct})
print (df)
total_correct = df['correct'].sum()
correct_pc = ( total_correct / len(df) ) * 100
print ("the number of correct predictions is: " + str(total_correct) + " and percentage correct is " + str(correct_pc) )
第二种方法也给出了大约50%的预测精度。
我做错了什么?
1条答案
按热度按时间0dxa2lsx1#
如果你的目录folder_path + '/pred/samples/中的图像不是240 X172的形状,那么在你第一次尝试预测的代码中,你需要调整图像的大小。在你第二次预测的代码中,你不应该在生成器中使用扩增,只是重新缩放图像。2请定义目录folder_path + '/pred/samples的结构。3其中是否有2个子目录(买和卖)?