我试图训练一个1D-CNN处理表格数据,然后在其上使用LRP(就像here那样)。我在实现的model.fit()
部分卡住了。到那时为止,一切似乎都工作正常。
下面是我得到的错误:
model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
Epoch 1/100
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [60], in <cell line: 1>()
----> 1 model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)
File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filelswqetod.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1051, in train_function *
return step_function(self, iterator)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\training.py", line 948, in compute_loss
return self.compiled_loss(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 139, in __call__
losses = call_fn(y_true, y_pred)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 243, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 1787, in categorical_crossentropy
return backend.categorical_crossentropy(
File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None,) and (None, 6, 2) are incompatible
然而,为了清楚起见,这里是我当前实现的其余部分(改编自Kaggle笔记本)。模型结构基于my previous question here on SO的反馈。
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn.metrics import plot_confusion_matrix
from scipy.stats import norm, boxcox
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import Counter
from scipy import stats
import tensorflow as tf
import warnings
warnings.simplefilter(action='ignore', category=Warning)
dataset = pd.read_csv('F:/Programmieren/this_one/data/Churn_Modelling.csv')
# split into variables and target
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values
# here gender is encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
# one hot encoding the country (as explained in Ullah et al.)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
# split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .2)
# upsample minority class with SMOTE
import imblearn
from imblearn.over_sampling import SMOTENC
#get original class distribution
counter = Counter(y)
print(counter)
#SMOTENC is used instead of SMOTE because there are multiple categorical features in the dataset
oversample = SMOTENC(categorical_features=[0, 1, 2, 4, 9, 10])
X_train_smote, y_train_smote = oversample.fit_resample(X_train, y_train)
#get new class distribution
counter = Counter(y_train_smote)
print(counter)
# normalize values to range 0-1 as explained in Ullah et al.
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_smote = mms.fit_transform(X_train_smote)
X_test = mms.transform(X_test)
#record-wise normalization for relative value comparison as stated by one of the authors I was in contact with
from sklearn.preprocessing import normalize
X_train_smote = normalize(X_train_smote, axis=1, norm='l1')
X_test = normalize(X_test, axis=1, norm='l1')
#reshape data for CNN
sample_size = X_train_smote.shape[0] # number of samples in train set
time_steps = X_train_smote.shape[1] # number of features in train set
input_dimension = 1
train_data_reshaped = X_train_smote.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])
#reshape test data as well
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D
#create model as explained in the paper
model = Sequential()
model.add(Conv1D(filters=25, kernel_size=3, activation='relu', input_shape=(12,1)))
model.add(Conv1D(50, 3))
model.add(Conv1D(100, 3))
model.add(Dense(2200, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()
#output of model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 10, 25) 100
conv1d_1 (Conv1D) (None, 8, 50) 3800
conv1d_2 (Conv1D) (None, 6, 100) 15100
dense (Dense) (None, 6, 2200) 222200
dense_1 (Dense) (None, 6, 2) 4402
dense_2 (Dense) (None, 6, 2) 6
=================================================================
Total params: 245,608
Trainable params: 245,608
Non-trainable params: 0
_________________________________________________________________
在我目前的方法中,是否有我不知道的重大缺陷?
1条答案
按热度按时间piwo6bdm1#
您应该在输出图层之前添加一个
Flatten()
图层,因为输出形状是(6,2)而不是(2,)。另外,请将loss更改为
sparse_categorical_crossentopy
,因为您使用整数(0,1)作为标签。工作代码请参考gist。谢谢!