How to calculate true positive, true negative, false positive, negative and postive with Bayes Classifer from scratch
I am working on implementing a Naive Bayes Classification algorithm. I have a method def prob_continous_value
which is supposed to return the probability density function for an attribute given a class attribute. The problem requires classifying the following datasets:
Venue,color,Model,Category,Location,weight,Veriety,Material,Volume
1,6,4,4,4,1,1,1,6
2,5,4,4,4,2,6,1,1
1,6,2,1,4,1,4,2,4
1,6,2,1,4,1,2,1,2
2,6,5,5,5,2,2,1,2
1,5,4,4,4,1,6,2,2
1,3,3,3,3,1,6,2,2
1,5,2,1,1,1,2,1,2
1,4,4,4,1,1,5,3,6
1,4,4,4,4,1,6,4,6
2,5,4,4,4,2,4,4,1
2,4,3,3,3,2,1,1,1
Venue,color,Model,Category,Location,weight,Veriety,Material,Volume
2,6,4,4,4,2,2,1,1
1,2,4,4,4,1,6,2,6
1,5,4,4,4,1,2,1,6
2,4,4,4,4,2,6,1,4
1,4,4,4,4,1,2,2,2
2,4,3,3,3,2,1,1,1
1,5,2,1,4,1,6,2,6
1,2,3,3,3,1,2,1,6
2,6,4,4,4,2,3,1,1
1,4,4,4,4,1,2,1,6
1,5,4,4,4,1,2,1,4
1,4,5,5,5,1,6,2,4
2,5,4,4,4,2,3,1,1
The code for this is written like so:
from numpy.core.defchararray import count, index
import pandas as pd
import numpy as np
import math
from sklearn.decomposition import PCA
from numpy import linalg as LA
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
test_set_Bayes = pd.read_csv(Assignment 2--Training set for Bayes.csv)
training_set_Bayes = pd.read_csv(Assignment 2--Test set for Bayes.csv)
def calculate_metrics(tp, tn, fn, fp, p, n):
# calculate the accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set.
accuracy = tp + tn /(p+n)
error_rate = fp + fn /(p + n)
sensitivity = tp/ p
precision = tp/ (tp+fp)
specificity = tn/n
display_metrics(accuracy, error_rate, sensitivity, precision, specificity)
def display_metrics(accuracy, error_rate, sensitivity, precision, specificity):
print(f'Accuracy: {accuracy}, Error_rate:{error_rate}, Sensitivity:{sensitivity}, Precision:{precision}, specificity:{specificity}')
def prob_continous_value(A, v, classAttribute, dataset, x):
# calcuate the average for all values of A in dataset with class = x
a = dataset[dataset[classAttribute] == x][A].mean()
# calculate the standard deviation for all values A in dataset with class = x
stdev = 1
stdev = dataset[dataset[classAttribute] == x][A].std()
v = dataset[A].iloc[0]
if stdev == 0.0:
stdev = 0.00000000000001
return (1/(math.sqrt(2*math.pi)*stdev))*math.exp(-((v-a)*(v-a))/(2*stdev*stdev))
def BayesClassifier(training_set,test_set):
classAttribute = 'Volume'
products = []
max = -math.inf
classWithMaxValue =
for x in training_set[classAttribute].unique():
D = len(training_set[classAttribute].index)
d = len(training_set[training_set[classAttribute] == x].index)
pClassAttribute = d/D
print(********)
print(f'Step 1 calculate p({classAttribute}={x})={pClassAttribute}')
p = 0
probabilitiesProduct = 1
print(********)
print(Step 2 calculate product of probabilities)
for A, values in training_set.iteritems():
if not A == classAttribute:
v = training_set[A].iloc[0]
p = prob_continous_value(A, v, classAttribute, training_set, x)
print(f'p({A}={v}|{classAttribute}={x})={p}')
probabilitiesProduct *= p
print(fprobabilitiesProduct={probabilitiesProduct})
print(********)
# products.append(probabilitiesProduct)
ptotal = pClassAttribute*probabilitiesProduct
print(f'p({classAttribute}={x}|x)={ptotal}')
if ptotal max:
max = ptotal
classWithMaxValue = x
print(fwinner is {classAttribute}={classWithMaxValue})
tp = len(test_set[test_set[classAttribute] == classWithMaxValue].index)
tn = len(test_set[test_set[classAttribute] != classWithMaxValue].index)
p = len(test_set[classAttribute].index)
n = len(test_set[classAttribute].index)
fp = len(test_set[classAttribute].index)
fn = len(test_set[classAttribute].index)
calculate_metrics(tp, tn, fn, fp, p, n)
# prompt user to select either ID3 or Bayes classifier.
selection = Bayes #= input(Please enter your selection for either ID3 or Bayes classification: )
if(selection == Bayes):
BayesClassifier(training_set_Bayes,test_set_Bayes)
Expected:
A total displaying the following metrics accuracy, error rate, sensitivity, specificity, and precision for the selected classifier in reference to the corresponding test set.
Actual:
Accuracy: 56.42328767123288, Error_rate:365.5, Sensitivity:0.15342465753424658, Precision:0.1330166270783848, specificity:0.8465753424657534
The last iteration prints out:
Step 1 calculate p(Volume=5)=0.06818181818181818
********
Step 2 calculate product of probabilities
p(Venue=1|Volume=5)=0.5849089671682236
p(color=6|Volume=5)=0.00019621509920999636
p(Model=4|Volume=5)=0.04484934763369217
p(Category=4|Volume=5)=0.0
p(Location=4|Volume=5)=0.0
p(Weight=1.5|Volume=5)=0.46792717373457876
p(Variety=1|Volume=5)=0.0003021925272993778
p(Material=1.1|Volume=5)=0.31395152365343143
probabilitiesProduct=0.0
********
p(Volume=5|x)=0.0
winner is Volume=2
This block of code is where I need help
tp = len(test_set[test_set[classAttribute] == classWithMaxValue].index)
tn = len(test_set[test_set[classAttribute] != classWithMaxValue].index)
p = len(test_set[classAttribute].index)
n = len(test_set[classAttribute].index)
fp = len(test_set[classAttribute].index)
fn = len(test_set[classAttribute].index)
calculate_metrics(tp, tn, fn, fp, p, n)
If someone could explain how to determine these parameters
p = len(test_set[classAttribute].index)
n = len(test_set[classAttribute].index)
fp = len(test_set[classAttribute].index)
fn = len(test_set[classAttribute].index)
I would be greatly appreciated. Thank you.
Topic naive-bayes-algorithim implementation naive-bayes-classifier
Category Data Science