How to plot the bar charts of precision, recall, and f-measure?

I have used 4 machine learning models on a task and now I am struggling to plot their bar charts just like shown below in the image. I am printing classification report to get precision, recall etc. My code is shown:

def Statistics(data):
  # Classification Report 
  print(Classification Report is shown below)
  print(classification_report(data['actual labels'],data['predicted labels']))

  # Confusion matrix
  print(Confusion matrix is shown below)
  cm=confusion_matrix(data['actual labels'],data['predicted labels'])
  
  plt.figure(figsize=(10,7))
  sn.heatmap(cm, annot=True,cmap='Blues', fmt='g')
  plt.xlabel('Predicted')
  plt.ylabel('Truth')
Statistics(data)

How can I plot this type of chart in python

I have saved my actual and predicted labels into a csv. There are 4 models applied on the data. So I saved their actual and predicted labels into csvs just like shown below:

path='/content/drive/MyDrive/CSVs/OriginalCensusRFmodel.csv'
def read_csv(path):
  # Read csv
  data=pd.read_csv(path)
  data=data.drop('Unnamed: 0',axis=1)
  return data
data=read_csv(path)

    def Statistics(data):
      # Classification Report 
      print(Classification Report is shown below)
      print(classification_report(data['actual labels'],data['predicted labels']))
    
      # Confusion matrix
      print(Confusion matrix is shown below)
      cm=confusion_matrix(data['actual labels'],data['predicted labels'])
      
      plt.figure(figsize=(10,7))
      sn.heatmap(cm, annot=True,cmap='Blues', fmt='g')
      plt.xlabel('Predicted')
      plt.ylabel('Truth')
    Statistics(data)

Topic plotly matplotlib plotting visualization python

Category Data Science


Try running the below method which uses a cross validation strategy to evaluate the models' performance across different metrics.

Of course it might be improved by for example changing the plot type to box plot so that you will see not only the mean score for each estimator but also the distribution of it.

from functools import reduce

def _get_model_name(model):
    """
            Returns a string with the name of a sklearn model
                model: Sklearn stimator class
    """
    if isinstance(model, Pipeline):
        estimator = model.steps[-1][1]
        name = "Pipeline_" + str(estimator)[:str(estimator).find("(")]
    else: 
        name = str(model)[:str(model).find("(")]
    return name
    
    
def plot_cv_score(X, y, models_list, cv = 5, scoring_list = None, refit = True, return_scores = False):
    """ 
            X: numpy_array/pandas dataframe n_rows, m_features
            y: numpy_array/pandas dataframe n_rows
            Plots min, max and avg kfold crosval_score for a list of models
        
    """
    
        
        
    names, mean_score = list(), list()
    ldf = list()
    mnames = list()
    
    for i, model in enumerate(models_list):
        name = _get_model_name(model)
    
        if refit:
            model.fit(X, y)
                
        for metric in score_list:
            
            score = cross_val_score(model, X, y, cv = cv, scoring = metric, n_jobs= -1)
            mean_score.append(np.mean(score))
    
    
        tmp = pd.DataFrame({name: mean_score}, index = score_list)
        
            
            
        ldf.append(tmp)
        
        
        mean_score = list()
        
    frame_scores = reduce(lambda x,y: pd.merge(x,y, left_index = True, right_index = True), ldf).T
        
    
    
    fig, ax  = plt.subplots(1,1, figsize = (10,5))

    frame_scores.plot.bar(ax = ax, cmap = 'RdYlBu', edgecolor = "black")
    ax.legend(loc = 'best')
    ax.set_xlabel("Score")
    ax.set_title("Cross validation model benchmark")

    if return_scores:    
        return frame_scores

Example:

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline

X, y = load_breast_cancer(return_X_y= True)

models_list =[LogisticRegression(random_state= 42),
              SVC(probability= True),
              RandomForestClassifier(random_state = 42),
              GaussianNB()]

score_list = ["roc_auc", "accuracy", "f1", "precision", "recall"]

t = plot_cv_score(X = X, y = y, models_list = models_list, cv = 5, scoring_list = score_list, refit = True)

Outputs:

enter image description here

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.