How to properly save and load an intermediate model in Keras?
I'm working with a model that involves 3 stages of 'nesting' of models in Keras.
Conceptually the first is a transfer learning CNN model, for example MobileNetV2. (Model 1) This is then wrapped by a model that consists of a small DNN. (Model 2) Finally during training these are all wrapped by a model that concatenates multiple outputs from model 2, calculates loss, and then backpropagates into model 2 and in the future model 1. (Model 3)
For inference later I simply want to save the weights of models 1 and 2. I have had multiple issues with this, due to what appear to be bugs in some versions of Keras (I'm using 2.2.2) and also loading the weights more explicitly is appearing to result in randomized weights and so isn't working correctly. Instead of attempting to troubleshoot what is going wrong with whatever scenario I'm simply trying to determine what is the best practice for saving intermediate nested models.
def create_model_2(IN_DIM=(224, 224, 3), OUT_DIM=128):
# First define the transfer learning model
initial_img = Input(shape=(IN_DIM))
black_box = MobileNetV2(include_top=False, input_shape=IN_DIM, weights="imagenet", pooling="avg")(initial_img)
bb_model = Model(b_img, black_box)
# freeze layers for transfer learning model
for layer in bb_model.layers:
layer.trainable = False
#########################
###### TOWER BLOCK ######
#########################
img = Input(shape=(IN_DIM))
x = bb_model(img)
# add some layers to try to learn
x = Dense(64, activation='relu', name='new_fc0')(x)
x = Dense(OUT_DIM, activation='relu', name='new_fc1')(x)
# L2 norm to project to unit sphere
out = Lambda(lambda x: K.l2_normalize(x, axis=1), name='final_l2_norm')(x)
_model_2 = Model(img, out)
return _model_2
Then the structure of Model 3:
IN_DIM = (224, 224, 3) # mobilenetv2=(224, 224, 3) Iv3=(299, 299, 3)
OUT_DIM = 32
model_2 = create_model_2(IN_DIM, OUT_DIM)
# then define images for triplets
anchor_img = Input(shape=IN_DIM)
pos_img = Input(shape=IN_DIM)
neg_img = Input(shape=IN_DIM)
# create three vectors representing the images
anchor_in = model_2(anchor_img)
positive_in = model_2(pos_img)
negative_in = model_2(neg_img)
# concatenate the vectors into one large vector for input into the triplet loss "processor"
merged_vector = concatenate([anchor_in, positive_in, negative_in], axis=-1)
# actually define the model:
model_3 = Model(inputs=[anchor_img, pos_img, neg_img], outputs=merged_vector)
The model seems to run and train just fine:
OPTIMIZER = SGD(lr=learning_rate, momentum=0.9)
final_model.compile(optimizer=OPTIMIZER, loss=triplet_loss, metrics=[avg_AP_dist, avg_AN_dist])
history = final_model.fit_generator(generator=training_generator,
epochs=5, # short for debugging
use_multiprocessing=True,
workers=4)
But saving the model after training is unclear:
out_file = "../../models/{:}_epoch_{:}_weights.h5".format(MODEL_DESC, 5)
model_2.save_weights(out_file) # save the actual Tower weights, discard the "booster" wrapper
print("Saved: {:}".format(out_file))
Or:
out_file = "../../models/{:}_epoch_{:}_weights.h5".format(MODEL_DESC, 5)
model_2.save(out_file) # save the actual Tower weights, discard the "booster" wrapper
print("Saved: {:}".format(out_file))
Or something else?
The current failure modes seem to be if I try to load in just the weights into a newly instantiated model_2 instance I get:
ValueError: axes don't match array
Which from searching may be related to a bug in Keras. If I save the model (.save() rather than .save_weights() then it loads without complaint but the inference is not stable and appears to be horrible/random.)
Thank you.
Still getting the following traceback:
snip/src/notebooks/vectorizer.py in load_model()
65
66 # load the weights
--- 67 loaded_model.load_weights(weights_path)
68
69 print("Model ready")
/opt/conda/lib/python3.6/site-packages/keras/engine/network.py in load_weights(self, filepath, by_name, skip_mismatch, reshape)
1164 else:
1165 saving.load_weights_from_hdf5_group(
- 1166 f, self.layers, reshape=reshape)
1167
1168 def _updated_config(self):
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in load_weights_from_hdf5_group(f, layers, reshape)
1043 original_keras_version,
1044 original_backend,
- 1045 reshape=reshape)
1046 if len(weight_values) != len(symbolic_weights):
1047 raise ValueError('Layer #' + str(k) +
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
680 weights = convert_nested_time_distributed(weights)
681 elif layer.__class__.__name__ in ['Model', 'Sequential']:
-- 682 weights = convert_nested_model(weights)
683
684 if original_keras_version == '1':
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in convert_nested_model(weights)
668 weights=weights[:num_weights],
669 original_keras_version=original_keras_version,
-- 670 original_backend=original_backend))
671 weights = weights[num_weights:]
672 return new_weights
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
680 weights = convert_nested_time_distributed(weights)
681 elif layer.__class__.__name__ in ['Model', 'Sequential']:
-- 682 weights = convert_nested_model(weights)
683
684 if original_keras_version == '1':
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in convert_nested_model(weights)
656 weights=weights[:num_weights],
657 original_keras_version=original_keras_version,
-- 658 original_backend=original_backend))
659 weights = weights[num_weights:]
660
/opt/conda/lib/python3.6/site-packages/keras/engine/saving.py in preprocess_weights_for_loading(layer, weights, original_keras_version, original_backend, reshape)
799 weights[0] = np.reshape(weights[0], layer_weights_shape)
800 elif layer_weights_shape != weights[0].shape:
-- 801 weights[0] = np.transpose(weights[0], (3, 2, 0, 1))
802 if layer.__class__.__name__ == 'ConvLSTM2D':
803 weights[1] = np.transpose(weights[1], (3, 2, 0, 1))
/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py in transpose(a, axes)
596
597 """
-- 598 return _wrapfunc(a, 'transpose', axes)
599
600
/opt/conda/lib/python3.6/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
49 def _wrapfunc(obj, method, *args, **kwds):
50 try:
--- 51 return getattr(obj, method)(*args, **kwds)
52
53 # An AttributeError occurs if the object does not have
ValueError: axes don't match array