Keras loss object and shapes

I'm at a loss. I've been staring at this problem for a while and I'm unsure how to proceed. I've been constructing a script to train a model for object detection based on a dataset I've compiled. I've been going along with some example scripts and modifying some code.

Here is my code:

import os
from tempfile import gettempdir

import tensorflow as tf
from tensorflow.keras import layers, Model, Sequential

import numpy as np

from clearml import Task, Dataset, TaskTypes

def parse_tfrecord_fn(example):
        feature_description = {
            image:[], tf.string),
            image_width:[], tf.int64),
            image_height:[], tf.int64),
            cat_id:[], tf.int64)
        example =, feature_description)
        example[image] =[image], channels=3)
        example[bbox] = tf.sparse.to_dense(example[bbox])

        return example

def prepare_sample(features):
    image_size = (tf.cast(512, dtype=tf.float32), tf.cast(512, dtype=tf.float32))
    image = tf.image.resize(features[image], size=(512, 512))
    bbox = features[bbox]

    image_width = tf.cast(features[image_width], dtype=tf.float32)
    image_height = tf.cast(features[image_height], dtype=tf.float32)

    xmin = bbox[0] * image_size[0] / image_width
    ymin = bbox[1] * image_size[1] / image_height
    xmax = bbox[2] * image_size[0] / image_width
    ymax = bbox[3] * image_size[1] / image_height

    return image, tf.convert_to_tensor([xmin, ymin, xmax, ymax], dtype=tf.float32)

def get_dataset(filenames, batch_size):
    dataset = (, num_parallel_reads=AUTOTUNE)
        .map(parse_tfrecord_fn, num_parallel_calls=AUTOTUNE)
        .map(prepare_sample, num_parallel_calls=AUTOTUNE)
        .shuffle(batch_size * 10)
    return dataset

task = Task.init(project_name=HazardSymbols, task_name=Train Object-detection model, reuse_last_task_id=True,

params = {
    number_of_epochs: 20,
    batch_size: 64,
    dropout: 0.25,
    base_lr: 0.001,
    momentum: 0.9,
    loss_report: 100,
    dataset_split: 0.90
params = task.connect(params)

dataset_path = Dataset.get(
    dataset_name=hazardsymbol-objectdetection, dataset_project=HazardSymbols

full_dataset = get_dataset([os.path.join(dataset_path, dataset.tfrecord)], params[batch_size])

ds_len = 0
for r in full_dataset:
    ds_len = ds_len + 1

train_size = int(params[dataset_split] * ds_len)
test_size = int((1 - int(params[dataset_split])) * ds_len)

train_dataset = full_dataset.take(train_size)
test_dataset = full_dataset.skip(train_size)

input_shape = (512, 512, 3)

model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, name=bl_1),
    layers.Conv2D(16, 3, padding=same, activation=relu, name=bl_2),
    layers.Conv2D(32, 3, padding=same, activation=relu, name=bl_4),
    layers.Conv2D(64, 3, padding=same, activation=relu, name=bl_6),
    layers.Dense(128, activation=relu, name=bb_1),
    layers.Dense(64, activation=relu, name=bb_2),
    layers.Dense(32, activation=relu, name=bb_3),
    layers.Dense(4, activation=sigmoid, name=bb_head)

loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(
optimizer = tf.keras.optimizers.Adam()
model.compile(loss=loss_obj, optimizer=optimizer)

train_loss = tf.keras.metrics.Mean(name=train_loss, dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name=train_accuracy)

test_loss = tf.keras.metrics.Mean(name=test_loss, dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name=test_accuracy)

def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_obj(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_accuracy(labels, predictions)

def test_step(images, labels):
    predictions = model(images)
    t_loss = loss_obj(labels, predictions)

    test_accuracy(labels, predictions)

train_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'train')
test_log_dir = os.path.join(gettempdir(), 'logs', 'gradient_tape', 'test')
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, os.path.join(gettempdir(), 'tf_ckpts'), max_to_keep=3)
if manager.latest_checkpoint:
    print(Restored from {}.format(manager.latest_checkpoint))
    print(Initializing from scratch.)

# Start training
for epoch in range(params[number_of_epochs]):
    for images, labels in train_dataset:
        train_step(images, labels)
        with train_summary_writer.as_default():
            tf.summary.scalar('loss', train_loss.result(), step=epoch)
            tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

    if int(ckpt.step) % 1 == 0:
        save_path =
        print(Saved checkpoint for step {}: {}.format(int(ckpt.step), save_path))

    for test_images, test_labels in test_dataset:
        test_step(test_images, test_labels)
        with test_summary_writer.as_default():
            tf.summary.scalar('loss', test_loss.result(), step=epoch)
            tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'

    # Reset the metrics for the next epoch

The error I'm getting looks like this:

2022-05-26 11:24:18.347184: I tensorflow/core/common_runtime/gpu/] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7446 MB memory:  - device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6
2022-05-26 11:24:18.348946: I tensorflow/core/common_runtime/gpu/] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 2153 MB memory:  - device: 1, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:02:00.0, compute capability: 7.5
Initializing from scratch.
(64, 4)
(64, 4)
C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\util\ UserWarning: `sparse_categorical_crossentropy` received `from_logits=True`, but the `output` argument was produced by a sigmoid or softmax activation and thus does not represent logits. Was this intended?
  return dispatch_target(*args, **kwargs)
Traceback (most recent call last):
  File c:\Users\Hiromi\Dropbox\Development\Python\HazardSymbols\, line 157, in module
    train_step(images, labels)
  File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\util\, line 153, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File C:\Users\Hiromi\AppData\Local\Temp\, line 13, in tf__train_step
    loss = ag__.converted_call(ag__.ld(loss_obj), (ag__.ld(labels), ag__.ld(predictions)), None, fscope)
  File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 139, in __call__
    losses = call_fn(y_true, y_pred)
  File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 243, in call
    return ag_fn(y_true, y_pred, **self._fn_kwargs)
  File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 1860, in sparse_categorical_crossentropy
    return backend.sparse_categorical_crossentropy(
  File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 5238, in sparse_categorical_crossentropy
    res = tf.nn.sparse_softmax_cross_entropy_with_logits(
ValueError: in user code:

    File c:\Users\Hiromi\Dropbox\Development\Python\HazardSymbols\, line 124, in train_step  *
        loss = loss_obj(labels, predictions)
    File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 139, in __call__  **
        losses = call_fn(y_true, y_pred)
    File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 243, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 1860, in sparse_categorical_crossentropy
        return backend.sparse_categorical_crossentropy(
    File C:\Users\Hiromi\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\, line 5238, in sparse_categorical_crossentropy
        res = tf.nn.sparse_softmax_cross_entropy_with_logits(

    ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(256,) and logits.shape=(64, 4)

As you can see, I print the shapes of both the label tensor and the predictions and they are identical, however, when passed to the loss object, it seems the shape of the label tensor has changed. I'm not sure what's going on. The passage of the error log with the shapes is shown below:

Initializing from scratch.
(64, 4)
(64, 4)

The first is the label tensor shape and the second is the shape of the predictions. If anyone can point me in the right direction I would be very grateful.

Have a look at the documentation of the loss function you're using, SparseCategoricalCrossentropy expects the values of y_true to be of shape batch_size, whereas you are providing it with a shape of batch_size, num_classes. The documentation also shows you examples of what the y_true array should look like.


