How can I export the best classifier from my code to a model for real future usage?

# Read the CSV file
df = pd.read_csv('processed.csv', header=0, engine='python')

# Pre-processing the data
# Define X,Y features
X = df.drop('Class', axis=1)
Y = df['Class']

# prepare configuration for cross validation test harness
seed = 3
# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
          ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = %s: %f (%f) % (name, cv_results.mean(), cv_results.std())
    print(msg)

# Saving the model for usage in the Heroku app
joblib.dump(bestmodel, 'model.pkl')
print(Model Saved.)

# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

Working Update

# Pre-processing the data
# Define X,y features
X = df.drop('Class', axis=1)
Y = df['Class']
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
          ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
current_score = 0
best_score = 0
best_model = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = %s: %f (%f) % (name, cv_results.mean(), cv_results.std())
    print(msg)
    current_score = cv_results.mean()
    if current_score  best_score:
        best_score = current_score
        best_model = model

# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

# Saving the model for usage in the Heroku app
joblib.dump(best_model, 'model.pkl')
print(Model Saved.)

Topic model-selection scikit-learn classification predictive-modeling machine-learning

Category Data Science


The logic seems completely fine.

In your place, I would save all the model's performances inside a dictionary to keep track and use any of them in case I need them.

seed = 7
# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
          ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
models_dict = {}
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
    models_dict[name] = {"binary":model,"avg_performance":cv_results.mean(),"std_perfirmance":cv_results.std()}

best_model_dict = list(models_dict.values())[np.argmax([x["avg_performance"] for x in list(models_dict.values())])]

best_model = best_model_dict["binary"].fit(X,y)
    


# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

# Saving the model for usage in the Heroku app
joblib.dump(best_model, 'model.pkl')
print("Model Saved.")

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.