我尝试用时间序列data训练LSTM模型。
如果我加载它,因为他们是一切都好。模型工作与良好的准确性和损失函数。
但是如果我重新采样它,例如1H
或24H
,它停止工作。精度下降非常低,丢失的功能是nan
Epoch 5/5
110/110 [==============================] - 4s 32ms/step - loss: nan - accuracy: 0.4437 - val_loss: nan - val_accuracy: 0.3453
我做错了什么?
我的加载功能:
def readCSV(path, candleTime):
# Load data
#print("Loading: " + path)
data = pd.read_csv(path, names=['Date_Time', 'open', 'high', 'low', 'close', 'volume'], sep=";", index_col=0)
# Convert the index to datetime
data.index = pd.to_datetime(data.index, format='%Y%m%d %H%M%S%f')
if(candleTime == CandleTime.hour):
data = data.resample('1H').agg({'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last'})
if(candleTime == CandleTime.day):
data = data.resample('24H').agg({'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last'})
return data
编辑
整车功能。注:现在我只有一次迭代。
def trainModel(trainCandles, prediction_minutes = 60, model_name = 'lstm_1m_10_model'):
tf.keras.backend.clear_session()
#Prepare Data
print("Preparing data..")
x_train = []
y_train = []
normalizedCandles = trainCandles[['open', 'high', 'low', 'close']].to_numpy(copy=True)
for x in range(prediction_minutes, len(normalizedCandles)):
xdata = normalizedCandles[x-prediction_minutes:x]
predictionData = []
for candleX in xdata:
predictionData.append([candleX[0], candleX[1], candleX[2], candleX[3]])
candleY = normalizedCandles[x]
x_train.append(predictionData)
y_train.append([candleY[0], candleY[1], candleY[2], candleY[3]])
print("Spliting..")
# split train and test
x_toSplit, y_toSplit = x_train, y_train
sizeOf70percentage = int(len(x_toSplit)/100*70)
x_test = np.array(x_toSplit[sizeOf70percentage:len(x_toSplit)])
y_test = np.array(y_toSplit[sizeOf70percentage:len(x_toSplit)])
x_train = np.array(x_toSplit[0: sizeOf70percentage])
y_train = np.array(y_toSplit[0: sizeOf70percentage])
print("Total size of samples: " + str(len(x_train)))
model=None
if (os.path.isdir(model_name)): # you won't have a model for first iteration
print("Loading model..")
model = load_model(model_name)
else:
print("Creatng model..")
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=4))
model.compile(optimizer='Adam', loss='mean_squared_error', metrics=["accuracy"])
history = model.fit(
x_train,
y_train,
validation_data=(x_test, y_test),
epochs=5,
batch_size=32)
model.save(model_name)
1条答案
按热度按时间euoag5mw1#
我的问题是输入数据包含空数据。
dropna()
是我的解决方案。