Overfitting problem: high accurance and low accurancy validation for image classification
I want to define a model to predict 3 categories of images. I'm learnong on the field :-) I've 1500 images (500 for each category) in 3 directories. I've read in this blog many suggestions:
- use a simple loss function
- use droput
- use shuffle
I've applied these tricks but the model still overfits ... This is the code I'm using, any suggestion?
dim_x = 500
dim_y = 200
dim_kernel = (3,3)
data_gen = ImageDataGenerator(rescale=1/255,validation_split=0.3)
data_dir = image_path
train_data_generator=data_gen.flow_from_directory(
data_dir,
target_size=(dim_x,dim_y),
batch_size=16,
class_mode='categorical',
subset='training')
validation_data_generator=data_gen.flow_from_directory(
data_dir,
target_size=(dim_x,dim_y),
batch_size=16,
class_mode='categorical',
subset='validation')
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=dim_kernel,input_shape=(dim_x, dim_y, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.40))
model.add(Conv2D(filters=128, kernel_size=dim_kernel,activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.20))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
class MyThresholdCallback(Callback):
def __init__(self, threshold):
super(MyThresholdCallback, self).__init__()
self.threshold = threshold
def on_epoch_end(self, epoch, logs=None):
val_accuracy = logs[val_accuracy]
accuracy = logs[accuracy]
if (accuracy = self.threshold) and (val_accuracyself.threshold):
self.model.stop_training = True
mtc=MyThresholdCallback(0.8)
model.fit(
train_data_generator,
steps_per_epoch=10,
epochs=40,
validation_data=validation_data_generator,
validation_steps=10,
shuffle=True
,callbacks=[mtc]
)
Here below the history
loss accuracy val_loss val_accuracy
0 35.930771 0.300000 1.096471 0.3875
1 1.510384 0.337500 1.099458 0.2750
2 1.104813 0.362500 1.098945 0.3000
3 1.104424 0.475000 1.098440 0.3125
4 1.110834 0.325000 1.099410 0.2750
5 1.086059 0.500000 1.092703 0.4125
6 1.019856 0.575000 1.098669 0.3125
7 0.970792 0.575000 1.081574 0.4125
8 0.939478 0.625000 1.062269 0.3750
9 0.758962 0.675000 1.197168 0.4125
10 0.624016 0.775000 1.015282 0.3750
11 0.508740 0.862500 1.300160 0.3500
12 0.430987 0.851351 1.648522 0.2875
37 0.003358 1.000000 3.144989 0.2875
38 0.011525 1.000000 3.506971 0.2125
39 0.010525 1.000000 3.989878 0.3125
Topic overfitting tensorflow image-classification
Category Data Science