我有一个深入学习的tensorflow代码,为了进行培训,我使用了10倍交叉验证。模型在第一次折叠中训练得很好,但当第二次折叠开始时,第一层的输出为nan,模型没有训练。输入数据是正确的(我检查过),但我不知道模型中发生了什么。你可以在网站上看到模型https://github.com/chang-li-hfut/acrnn.
使用函数创建的第一层,在该函数中,有两个变量,这两个变量在第一个折叠中为空,但对于第二个折叠,这两个变量具有nan值。如何在函数中重置这些变量?
频道注意功能:
import tensorflow as tf
def channel_wise_attention(feature_map, H, W, C, weight_decay=0.00004, scope='', reuse=None):
"""This method is used to add spatial attention to model.
Parameters
---------------
@feature_map: Which visual feature map as branch to use.
@K: Map `H*W` units to K units. Now unused.
@reuse: reuse variables if use multi gpus.
Return
---------------
@attended_fm: Feature map with Channel-Wise Attention.
"""
with tf.variable_scope(scope, 'ChannelWiseAttention', reuse=reuse):
# Tensorflow's tensor is in BHWC format. H for row split while W for column split.
# _, H, W, C = tuple([int(x) for x in feature_map.get_shape()])
weight = tf.get_variable("weight", [C, C],
dtype=tf.float32,
initializer=tf.initializers.orthogonal,
regularizer=tf.contrib.layers.l2_regularizer(weight_decay))
bias = tf.get_variable("bias", [C],
dtype=tf.float32,
initializer=tf.initializers.zeros)
print(weight)
print(bias)
print('**************************************************************')
transpose_feature_map = tf.transpose(tf.reduce_mean(feature_map, [1, 2], keep_dims=True),
perm=[0, 3, 1, 2])
channel_wise_attention_fm = tf.matmul(tf.reshape(transpose_feature_map,
[-1, C]), weight) + bias
channel_wise_attention_fm = tf.nn.sigmoid(channel_wise_attention_fm)
# channel_wise_attention_fm = tf.clip_by_value(tf.nn.relu(channel_wise_attention_fm),
# clip_value_min = 0,
# clip_value_max = 1)
attention = tf.reshape(tf.concat([channel_wise_attention_fm] * (H * W),
axis=1), [-1, H, W, C])
attended_fm = attention * feature_map
return attended_fm, weight, bias
模型是:
# input placeholder
X = tf.placeholder(tf.float32, shape=[None, input_height, input_width, input_channel_num], name = 'X')
Y = tf.placeholder(tf.float32, shape=[None, num_labels], name = 'Y')
train_phase = tf.placeholder(tf.bool, name = 'train_phase')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# channel-wise attention layer
X_1 = tf.transpose(X,[0, 3, 2, 1])
# print(X_1)
conv, weight, bias = channel_wise_attention(X_1, 1, window_size, n_channel, weight_decay=0.00004, scope='', reuse=None)
conv_1 = tf.transpose(conv,[0, 3, 2, 1])
# print(conv_1)
# CNN layer
conv_1 = cnn_2d.apply_conv2d(conv_1, kernel_height_1st, kernel_width_1st, input_channel_num, conv_channel_num, kernel_stride, train_phase)
print("conv 1 shape: ", conv_1.get_shape().as_list())
pool_1 = cnn_2d.apply_max_pooling(conv_1, pooling_height_1st, pooling_width_1st, pooling_stride_1st)
print("pool 1 shape: ", pool_1.get_shape().as_list())
pool_1_shape = pool_1.get_shape().as_list()
pool1_flat = tf.reshape(pool_1, [-1, pool_1_shape[1]*pool_1_shape[2]*pool_1_shape[3]])
fc_drop = tf.nn.dropout(pool1_flat, keep_prob)
# LSTMs layer
lstm_in = tf.reshape(fc_drop, [-1, num_timestep, pool_1_shape[1]*pool_1_shape[2]*pool_1_shape[3]])
cells = []
for _ in range(2):
cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_state, forget_bias=1.0, state_is_tuple=True)
cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
cells.append(cell)
lstm_cell = tf.contrib.rnn.MultiRNNCell(cells)
init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
# output ==> [batch, step, n_hidden_state]
rnn_op, states = tf.nn.dynamic_rnn(lstm_cell, lstm_in, initial_state=init_state, time_major=False)
# self-attention layer
with tf.name_scope('Attention_layer'):
attention_op = multi_dimensional_attention(rnn_op, 64, scope=None,
keep_prob=1., is_train=None, wd=0., activation='elu',
tensor_dict=None, name=None)
attention_drop = tf.nn.dropout(attention_op, keep_prob)
y_ = cnn_2d.apply_readout(attention_drop, rnn_op.shape[2].value, num_labels)
# softmax layer: probability prediction
y_prob = tf.nn.softmax(y_, name = "y_prob")
# class prediction
y_pred = tf.argmax(y_prob, 1, name = "y_pred")
# y_pred = tf.cast(y_pred, tf.float32)
# cross entropy cost function
# crossE = tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=Y), name = 'loss')
# cost = tf.losses.softmax_cross_entropy(onehot_labels=Y, logits=y_pred)
# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=tf.cast(tf.argmax(tf.nn.softmax(y_), 1), tf.float32), labels=Y), name = 'loss')
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
# set training SGD optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# get correctly predicted object
correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y_), 1), tf.argmax(Y, 1))
# calculate prediction accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = 'accuracy')
暂无答案!
目前还没有任何答案,快来回答吧!