How can I export the best classifier from my code to a model for real future usage?
# Read the CSV file
df = pd.read_csv('processed.csv', header=0, engine='python')
# Pre-processing the data
# Define X,Y features
X = df.drop('Class', axis=1)
Y = df['Class']
# prepare configuration for cross validation test harness
seed = 3
# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = %s: %f (%f) % (name, cv_results.mean(), cv_results.std())
print(msg)
# Saving the model for usage in the Heroku app
joblib.dump(bestmodel, 'model.pkl')
print(Model Saved.)
# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
Working Update
# Pre-processing the data
# Define X,y features
X = df.drop('Class', axis=1)
Y = df['Class']
# prepare configuration for cross validation test harness
seed = 7
# prepare models
models = [('LR', LogisticRegression()), ('LDA', LinearDiscriminantAnalysis()), ('KNN', KNeighborsClassifier()),
('CART', DecisionTreeClassifier()), ('NB', GaussianNB()), ('SVM', SVC())]
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
current_score = 0
best_score = 0
best_model = []
for name, model in models:
kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
results.append(cv_results)
names.append(name)
msg = %s: %f (%f) % (name, cv_results.mean(), cv_results.std())
print(msg)
current_score = cv_results.mean()
if current_score best_score:
best_score = current_score
best_model = model
# boxplot algorithm comparison
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()
# Saving the model for usage in the Heroku app
joblib.dump(best_model, 'model.pkl')
print(Model Saved.)