Neural network always outputting 1
I'm trying to develop a small network for the aerial cactus identification challenge.
This is a binary classification challenge (0 no cactus, 1 is cactus), but my networks is always outputting 1.
I have identified that the network always output values >= 0.5, so the sigmoid function outputs 1. However, I can't understand why. I built a simple network, which works when implemented with keras, but I'm trying to use tf.nn for learning purposes, but can't make it work
My network architecture:
2 Conv 64x3 + Maxpooling
2 Conv 128x3 + Maxpooling
Flatten
Dense 1024
Dense 512
Dense 1
I create the layers this way:
initializer = tf.keras.initializers.glorot_uniform()
def new_weights(shape, name='W', glorot=False):
content = tf.random.normal(shape, stddev=0.03)
if glorot:
content = initializer(shape)
return tf.Variable(content)
def conv_block(inputs, n_filters, n_size, weights, stage=0):
shape = inputs.shape
weights1 = weights[f'W_{stage}_1']
bias = weights[f'b_{stage}_1']
layer = tf.nn.conv2d(
inputs,
filters=weights1,
strides=[1, 1, 1, 1],
padding='VALID',
name=f'W_{stage}_1'
)
layer += bias
activated = tf.nn.relu(layer)
weights2 = weights[f'W_{stage}_2']
bias2 = weights[f'b_{stage}_2']
layer = tf.nn.conv2d(
activated,
filters=weights2,
strides=[1, 1, 1, 1],
padding='VALID',
name=f'W_{stage}_2'
)
layer += bias2
activated = tf.nn.relu(layer)
activated = tf.nn.max_pool(activated, ksize=2, strides=2,padding='VALID')
print(stage, activated.shape)
return activated, weights
def flatten(conv):
flat = tf.reshape(conv, [-1, 3200])
print('flat', flat.shape)
return flat
def dense_block(flat, nb, weights, stage=0, relu=True):
weights1 = weights[f'Wd_{stage}_1']
bias = weights[f'bd_{stage}_1']
res = tf.matmul(flat, weights1) + bias
if relu:
res = tf.nn.relu(res)
print('d', stage, res.shape)
return res
Here is my assembling of the layers:
weights = {
'W_0_1': new_weights([3, 3, 3, 64], 'W_0_1', glorot=True),
'b_0_1': new_weights([64], 'b_0_1'),
'W_0_2': new_weights([3, 3, 64, 64], 'W_0_2', glorot=True),
'b_0_2': new_weights([64], 'b_0_2'),
'W_1_1': new_weights([3, 3, 64, 128], 'W_1_1', glorot=True),
'b_1_1': new_weights([128], 'b_1_1'),
'W_1_2': new_weights([3, 3, 128, 128], 'W_1_2', glorot=True),
'b_1_2': new_weights([128], 'b_1_2'),
'Wd_0_1': new_weights([3200, 1024], 'Wd_0_1'),
'bd_0_1': new_weights([1024], 'bd_0_1'),
'Wd_1_1': new_weights([1024, 512], 'Wd_1_1'),
'bd_1_1': new_weights([512], 'bd_1_1'),
'Wd_2_1': new_weights([512, 1], 'Wd_2_1'),
'bd_2_1': new_weights([1], 'bd_2_1'),
}
def process_one_batch(x, y):
block1 = conv_block(x, 64, 3, weights, 0)
block2, _ = conv_block(block1[0], 128, 3, weights, 1)
flat = flatten(block2)
dense1 = dense_block(flat, 1024, weights, 0)
dense2 = dense_block(dense1, 512, weights, 1)
dense2 = dense_block(dense2, 1, weights, 2, False)
res = dense2
print_op = tf.print("dense2:", dense2, tf.nn.sigmoid(dense2), output_stream=sys.stdout)
#with tf.control_dependencies([print_op]):
res = tf.nn.sigmoid(dense2)
return res
NB_EPOCHS = 5
def create_dataset(X, y, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS, batch=True):
dataset = tf.data.Dataset.from_tensor_slices((X, y))
dataset = dataset.map(my_process_path)
if batch:
dataset = dataset.batch(batch_size)
dataset = dataset.repeat(nb_epochs)
dataset = dataset.prefetch(buffer_size=2)
iterator = tf.data.make_one_shot_iterator(dataset)
#iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
y_ = process_one_batch(next_element[0], next_element[1])
return dataset, next_element, y_
train_ds, (train_x, train_y), prediction = create_dataset(X_train.values, y_train.values)
test_ds, (test_x, test_y), test_prediction = create_dataset(X_test.values, y_test.values, batch=True)
And the loss + training loop part:
cross_entropy
optimiser = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001).minimize(cross_entropy)
def get_acc(y_true, y_pred, threshold=0.5):
to_check = tf.cast(tf.round(y_pred), tf.int64)
correct_prediction = tf.equal(y_true, to_check)
print_op = tf.print("tensors:", y_true, to_check, y_pred, correct_prediction, output_stream=sys.stdout)
#with tf.control_dependencies([print_op]):
accuracy_ = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy_
accuracy = get_acc(test_y, tf.reshape(test_prediction, [-1]))
accuracy_train = get_acc(train_y, tf.reshape(prediction, [-1]))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
train_steps = int(len(X_train.values) / BATCH_SIZE)
val_steps = int(len(X_test.values) / BATCH_SIZE)
# initialise the variables
sess.run(init_op)
print('Init')
for epoch in range(NB_EPOCHS):
avg_cost = 0
train_acc = 0
for i in range(train_steps):
if i % 100 == 0:
pass
#print(epoch, i)
#dbg = sess.run([next_element[0], next_element[1], y_])
_, c, ac = sess.run([optimiser, cross_entropy,accuracy_train])
avg_cost += c
train_acc += ac
avg_acc = 0
for i in range(val_steps):
acc = sess.run(accuracy)
avg_acc += acc
print('train_acc: ', train_acc/train_steps)
#print(sess.run([accuracy]))
print(train_steps, val_steps)
print("Epoch:", (epoch + 1), "cost =", "{:.3f}, acc: {:.3f}".format(avg_cost / train_steps, avg_acc / val_steps ))
print("\nTraining complete!")
I ran my previous keras model on the dataset created like this, and it runs fine, so I guess it's a mistake related to the network construction, but I can't seem to figure why are my values so high. Any help would be really appreciated.
Thanks !
Topic tensorflow machine-learning
Category Data Science