numpy 基于LSTM的模型的输入问题,TensorFlow数据中的形状不匹配

o2g1uqev  于 2023-05-17  发布在  其他
关注(0)|答案(1)|浏览(128)

因此,我正在尝试创建一个GAN模型,称为TAnoGAN(时间序列异常检测GAN)。它在PyTorch中的官方实现是here
我正在尝试将其转换为Tensorflow实现以供我使用。我已经为我的目的创建了一个数据加载器,并为生成器和判别器模型创建了一个继承类。它们是基于LSTM的最终模型。
下面是我写的代码:

import logging

import numpy as np
import pandas as pd
import tensorflow as tf
from mlprimitives.utils import import_object
from numpy import ndarray
from tensorflow.keras import Model
import os

from loader import TadGANDataLoader

LOGGER = logging.getLogger(__name__)

WORKERS=4
BATCH_SIZE=32
EPOCHS=20
LR=0.0002
SEED=2
NOISE_DIM=100

class LSTMGenerator(Model):
    """An LSTM based generator. It expects a sequence of noise vectors as input.

    Args:
        in_dim: Input noise dimensionality
        out_dim: Output dimensionality
        n_layers: number of lstm layers
        hidden_dim: dimensionality of the hidden layer of lstms

    Input: noise of shape (batch_size, seq_len, in_dim)
    Output: sequence of shape (batch_size, seq_len, out_dim)
    """

    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.out_dim = out_dim
        self.in_dim = in_dim

        self.lstm0 = tf.keras.layers.LSTM(units=32, return_sequences=True, input_shape=(None, in_dim))
        self.lstm1 = tf.keras.layers.LSTM(units=64, return_sequences=True)
        self.lstm2 = tf.keras.layers.LSTM(units=128, return_sequences=True)
        
        self.linear = tf.keras.Sequential([
            tf.keras.layers.Dense(units=out_dim),
            tf.keras.layers.Activation('tanh')
        ])

    def call(self, input):
        print(input.shape)
        batch_size, seq_len = input.shape[0], input.shape[1]
        h_0 = tf.zeros(shape=(batch_size, 32))
        c_0 = tf.zeros(shape=(batch_size, 32))

        recurrent_features = self.lstm0(input, initial_state=[h_0, c_0])
        recurrent_features = self.lstm1(recurrent_features)
        recurrent_features = self.lstm2(recurrent_features)
        
        outputs = self.linear(tf.reshape(recurrent_features, shape=(batch_size*seq_len, 128)))
        outputs = tf.reshape(outputs, shape=(batch_size, seq_len, self.out_dim))
        return outputs, recurrent_features

class LSTMDiscriminator(tf.keras.Model):
    """An LSTM based discriminator. It expects a sequence as input and outputs a probability for each element.

    Args:
        in_dim: Input noise dimensionality
        n_layers: number of lstm layers
        hidden_dim: dimensionality of the hidden layer of lstms

    Inputs: sequence of shape (batch_size, seq_len, in_dim)
    Output: sequence of shape (batch_size, seq_len, 1)
    """

    def __init__(self, in_dim):
        super(LSTMDiscriminator, self).__init__()
        self.lstm = tf.keras.layers.LSTM(units=100, return_sequences=True, input_shape=(None, in_dim))
        self.linear = tf.keras.Sequential([
            tf.keras.layers.Dense(units=1, activation='sigmoid')
        ])

    def call(self, input):
        batch_size, seq_len = input.shape[0], input.shape[1]
        h_0 = tf.zeros((batch_size, 100))
        c_0 = tf.zeros((batch_size, 100))

        recurrent_features = self.lstm(input, initial_state=[h_0, c_0])
        outputs = self.linear(tf.reshape(recurrent_features, [batch_size*seq_len, 100]))
        outputs = tf.reshape(outputs, [batch_size, seq_len, 1])
        return outputs, recurrent_features

generator = LSTMGenerator(in_dim=NOISE_DIM, out_dim=1)
discriminator = LSTMDiscriminator(in_dim=1)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
optimizerD = tf.keras.optimizers.Adam(learning_rate=LR)
optimizerG = tf.keras.optimizers.Adam(learning_rate=LR)

def discriminator_loss(real_output, fake_output):
    real_loss = loss_fn(tf.ones_like(real_output), real_output)
    fake_loss = loss_fn(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return loss_fn(tf.ones_like(fake_output), fake_output)

@tf.function
def train_step(batch):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_batch = generator(noise, training=True)

        real_output = discriminator(batch, training=True)
        fake_output = discriminator(generated_batch, training=True)
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    D_x = tf.reduce_mean(real_output).numpy()
    D_G_z1 = tf.reduce_mean(fake_output).numpy()
    D_G_z2 = tf.reduce_mean(real_output).numpy()
    
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    optimizerG.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    optimizerD.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return D_x, D_G_z1, D_G_z2

def train(dataset, epochs):
  for epoch in range(epochs):
    for image_batch in dataset:
        D_x, D_G_z1, D_G_z2 = train_step(image_batch)
    print("Epoch: ", epoch, " D_x: ", D_x, " D_G_z1: ", D_G_z1, " D_G_z2: ", D_G_z2)


if __name__ == "__main__":
    tf.random.set_seed(SEED)
    batch_size = 16
    seq_len = 32
    noise_dim = 100
    seq_dim = 4

    # generator = LSTMGenerator(noise_dim, seq_dim)
    # discriminator = LSTMDiscriminator(seq_dim)
    # noise = tf.random.normal(shape=(8, 16, noise_dim))
    # gen_out, _ = generator(noise)
    # dis_out, _ = discriminator(gen_out)

    # print("Noise: ", noise.shape)
    # print("Generator output: ", gen_out.shape)
    # print("Discriminator output: ", dis_out.shape)
    data = pd.read_csv('540821.csv', usecols=['Date', 'No. of Trades'], parse_dates=['Date'])
    data.rename(columns={'No. of Trades': 'value', 'Date': 'timestamp'}, inplace=True)
    data.timestamp = (data.timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
    data.head()
    data_loader = TadGANDataLoader(data, shuffle=True)
    dataset = data_loader.get_tfdataset()
    train(dataset, 100)

下面是我编写的自定义数据加载器的代码:

import tensorflow as tf
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from mlprimitives.custom.timeseries_preprocessing import rolling_window_sequences
from orion.primitives.timeseries_preprocessing import slice_array_by_dims

import math

class TadGANDataLoader(tf.keras.utils.Sequence):
    def __init__(self, data, batch_size=16, shuffle=True, window_size=60, target_size=1, step_size=1, target_column=0):
        self.data = data
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.indexes = np.arange(data.shape[0])
        if self.shuffle:
            np.random.shuffle(self.indexes)
        self.X, self.index = data.value.values, data.timestamp.values
        self.X = self.X.reshape(-1, 1)
        imp = SimpleImputer()
        self.X = imp.fit_transform(self.X)
        scaler = MinMaxScaler(feature_range=(-1, 1))
        self.X = scaler.fit_transform(self.X)
        self.X, self.X_index, self.y, self.y_index = rolling_window_sequences(
            self.X, self.index, window_size=window_size, target_size=target_size, step_size=step_size, target_column=target_column)

    def __len__(self):
        return math.ceil(len(self.X) / self.batch_size)

    def __getitem__(self, idx):
        low = idx * self.batch_size
        high = min(low + self.batch_size, len(self.X))
        batch_x = self.X[low:high]
        batch_y = self.y[low:high].reshape(-1, 1)
        return batch_x, batch_y
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
            self.X = self.X[self.indexes]
            self.y = self.y[self.indexes]
            self.X_index = self.X_index[self.indexes]
            self.y_index = self.y_index[self.indexes]

    def get_tfdataset(self):
        return tf.data.Dataset.from_generator(
            lambda: self,
            output_types=(tf.float32, tf.float32),
            output_shapes=((None, self.X.shape[1], self.X.shape[2]), (None, 1)),
        )

if __name__ == "__main__":
    import pandas as pd
    data = pd.read_csv('540821.csv', usecols=['Date', 'No. of Trades'], parse_dates=['Date'])
    data.rename(columns={'No. of Trades': 'value', 'Date': 'timestamp'}, inplace=True)
    data.timestamp = (data.timestamp - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
    data.head()
    data_loader = TadGANDataLoader(data, shuffle=True)
    # print(data_loader[0].shape)
    datagen = data_loader.get_tfdataset()
    for i in iter(datagen):
        X, y = i
        print(X.shape, y.shape)

由于输入形状不匹配,我遇到了以下错误,以下是错误的堆栈跟踪:

Traceback (most recent call last):
  File "/home/opc/src/orion/tanogan.py", line 162, in <module>
    train(dataset, 100)
  File "/home/opc/src/orion/tanogan.py", line 135, in train
    D_x, D_G_z1, D_G_z2 = train_step(image_batch)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 823, in _call
    self._initialize(args, kwds, add_initializers_to=initializers)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 696, in _initialize
    self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2855, in _get_concrete_function_internal_garbage_collected
    graph_function, _, _ = self._maybe_define_function(args, kwargs)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3213, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3065, in _create_graph_function
    func_graph_module.func_graph_from_py_func(
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 986, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 600, in wrapped_fn
    return weak_wrapped_fn().__wrapped__(*args, **kwds)
  File "/home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 973, in wrapper
    raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:

    /home/opc/src/orion/tanogan.py:114 train_step  *
        generated_batch = generator(noise, training=True)
    /home/opc/src/orion/tanogan.py:56 call  *
        recurrent_features = self.lstm0(input, initial_state=[h_0, c_0])
    /home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent.py:716 __call__  **
        return super(RNN, self).__call__(inputs, **kwargs)
    /home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:975 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    /home/opc/.conda/envs/orion/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:176 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer lstm is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [32, 100]

数据集具有以下结构:

Date,Open Price,High Price,Low Price,Close Price,WAP,No.of Shares,No. of Trades,Total Turnover (Rs.),Deliverable Quantity,% Deli. Qty to Traded Qty,Spread High-Low,Spread Close-Open
24-March-2023,6.06,6.68,6.06,6.31,6.181378548613391444,33499,160,207070.00,25058,74.80,0.62,0.25
mf98qq94

mf98qq941#

这个错误告诉你LSTM层不是它期望的形状,它想要(batch_size, timesteps, features),但你给予了(batch_size, features),而没有timesteps
让我们尝试将噪声整形为timesteps尺寸。
noise = tf.random.normal([BATCH_SIZE, noise_dim])替换为

SEQ_LEN = 1 # or whatever sequence length you want
noise = tf.random.normal([BATCH_SIZE, SEQ_LEN, noise_dim])

相关问题