Neural network always outputting 1

I'm trying to develop a small network for the aerial cactus identification challenge.

This is a binary classification challenge (0 no cactus, 1 is cactus), but my networks is always outputting 1.

I have identified that the network always output values >= 0.5, so the sigmoid function outputs 1. However, I can't understand why. I built a simple network, which works when implemented with keras, but I'm trying to use tf.nn for learning purposes, but can't make it work

My network architecture:

2 Conv 64x3 + Maxpooling

2 Conv 128x3 + Maxpooling

Flatten

Dense 1024

Dense 512

Dense 1

I create the layers this way:

    initializer = tf.keras.initializers.glorot_uniform()
    def new_weights(shape, name='W', glorot=False):
        content = tf.random.normal(shape, stddev=0.03)
        if glorot:
            content = initializer(shape)
        return tf.Variable(content)

    def conv_block(inputs, n_filters, n_size, weights, stage=0):
        shape = inputs.shape

        weights1 = weights[f'W_{stage}_1']
        bias = weights[f'b_{stage}_1']

        layer = tf.nn.conv2d(
            inputs,
            filters=weights1,
            strides=[1, 1, 1, 1],
            padding='VALID',
            name=f'W_{stage}_1'
        )
        layer += bias
        activated = tf.nn.relu(layer)

        weights2 = weights[f'W_{stage}_2']
        bias2 = weights[f'b_{stage}_2']

        layer = tf.nn.conv2d(
            activated,
            filters=weights2,
            strides=[1, 1, 1, 1],
            padding='VALID',
            name=f'W_{stage}_2'
        )
        layer += bias2
        activated = tf.nn.relu(layer)
        activated = tf.nn.max_pool(activated, ksize=2, strides=2,padding='VALID')
        print(stage, activated.shape)
        return activated, weights

    def flatten(conv):
        flat = tf.reshape(conv, [-1, 3200])
        print('flat', flat.shape)
        return flat

    def dense_block(flat, nb, weights, stage=0, relu=True):
        weights1 = weights[f'Wd_{stage}_1']
        bias = weights[f'bd_{stage}_1']

        res = tf.matmul(flat, weights1) + bias
        if relu:
            res = tf.nn.relu(res)
        print('d', stage, res.shape)
        return res

Here is my assembling of the layers:

    weights = {
        'W_0_1': new_weights([3, 3, 3, 64], 'W_0_1', glorot=True),
        'b_0_1': new_weights([64], 'b_0_1'),
        'W_0_2': new_weights([3, 3, 64, 64], 'W_0_2', glorot=True),
        'b_0_2': new_weights([64], 'b_0_2'),
        'W_1_1': new_weights([3, 3, 64, 128], 'W_1_1', glorot=True),
        'b_1_1': new_weights([128], 'b_1_1'),
        'W_1_2': new_weights([3, 3, 128, 128], 'W_1_2', glorot=True),
        'b_1_2': new_weights([128], 'b_1_2'),
        'Wd_0_1': new_weights([3200, 1024], 'Wd_0_1'),
        'bd_0_1': new_weights([1024], 'bd_0_1'),
        'Wd_1_1': new_weights([1024, 512], 'Wd_1_1'),
        'bd_1_1': new_weights([512], 'bd_1_1'),
        'Wd_2_1': new_weights([512, 1], 'Wd_2_1'),
        'bd_2_1': new_weights([1], 'bd_2_1'),
    }

    def process_one_batch(x, y):
        block1 = conv_block(x, 64, 3, weights, 0)

        block2, _ = conv_block(block1[0], 128, 3, weights, 1)
        flat = flatten(block2)

        dense1 = dense_block(flat, 1024, weights, 0)
        dense2 = dense_block(dense1, 512, weights, 1)
        dense2 = dense_block(dense2, 1, weights, 2, False)

        res = dense2
        print_op = tf.print("dense2:", dense2, tf.nn.sigmoid(dense2), output_stream=sys.stdout)

        #with tf.control_dependencies([print_op]):
        res = tf.nn.sigmoid(dense2)


        return res

    NB_EPOCHS = 5
    def create_dataset(X, y, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS, batch=True):
        dataset = tf.data.Dataset.from_tensor_slices((X, y))
        dataset = dataset.map(my_process_path)
        if batch:
            dataset = dataset.batch(batch_size)
        dataset = dataset.repeat(nb_epochs)
        dataset = dataset.prefetch(buffer_size=2)
        iterator =  tf.data.make_one_shot_iterator(dataset)
        #iterator = dataset.make_one_shot_iterator()
        next_element = iterator.get_next()
        y_ = process_one_batch(next_element[0], next_element[1])
        return dataset, next_element, y_

    train_ds, (train_x, train_y), prediction = create_dataset(X_train.values, y_train.values)
    test_ds, (test_x, test_y), test_prediction = create_dataset(X_test.values, y_test.values, batch=True)

And the loss + training loop part:

    cross_entropy
    optimiser = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001).minimize(cross_entropy)
    def get_acc(y_true, y_pred, threshold=0.5):
        to_check = tf.cast(tf.round(y_pred), tf.int64)
        correct_prediction = tf.equal(y_true, to_check)
        print_op = tf.print("tensors:", y_true, to_check, y_pred, correct_prediction, output_stream=sys.stdout)

        #with tf.control_dependencies([print_op]):
        accuracy_ = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        return accuracy_

    accuracy = get_acc(test_y, tf.reshape(test_prediction, [-1]))
    accuracy_train = get_acc(train_y, tf.reshape(prediction, [-1]))
    # setup the initialisation operator
    init_op = tf.global_variables_initializer()

    with tf.Session() as sess:
        train_steps = int(len(X_train.values) / BATCH_SIZE)
        val_steps = int(len(X_test.values) / BATCH_SIZE)
        # initialise the variables
        sess.run(init_op)
        print('Init')
        for epoch in range(NB_EPOCHS):
            avg_cost = 0
            train_acc = 0
            for i in range(train_steps):
                if i % 100 == 0:
                    pass
                    #print(epoch, i)
                #dbg = sess.run([next_element[0], next_element[1], y_])
                _, c, ac = sess.run([optimiser, cross_entropy,accuracy_train])
                avg_cost += c
                train_acc += ac
            avg_acc = 0
            for i in range(val_steps):
                acc = sess.run(accuracy)
                avg_acc += acc
            print('train_acc: ', train_acc/train_steps)
            #print(sess.run([accuracy]))
            print(train_steps, val_steps)
            print("Epoch:", (epoch + 1), "cost =", "{:.3f}, acc: {:.3f}".format(avg_cost / train_steps, avg_acc / val_steps ))
        print("\nTraining complete!")

I ran my previous keras model on the dataset created like this, and it runs fine, so I guess it's a mistake related to the network construction, but I can't seem to figure why are my values so high. Any help would be really appreciated.

Thanks !

Topic tensorflow machine-learning

Category Data Science


Most likely there is a bug in your implementation.

Your code could be revise to find bugs more easily:

  • You are reusing the same variables over and over (e.g., layer and activated). If you use unique variable names, it is easy to track state.

  • The functions might not be useful. If you rewrite your code inline, then it is easier to track state.

  • You reimplement functionality unnecessarily. For example, the TensorFlow package has an accuracy built-in tf.keras.metrics.Accuracy. If you replace your implementations with package-based code, it might eliminate the bug.

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.