The Algorithms logo
The Algorithms
AboutDonate
library(e1071)
x <- cbind(x_train,y_train)
# Fitting model
fit <-naiveBayes(y_train ~ ., data = x)
summary(fit)
# Predict Output 
predicted= predict(fit,x_test)

Naive Bayes

C
H
E
from sklearn import datasets
import pandas as pd
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
df.columns = ["sl", "sw", 'pl', 'pw']
def abc(k, *val):
    if k &lt; val[0]:
        return 0
    else:
        return 1
df.sl.apply(abc, args=(5,))
0      1
1      0
2      0
3      0
4      1
5      1
6      0
7      1
8      0
9      0
10     1
11     0
12     0
13     0
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     0
23     1
24     0
25     1
26     1
27     1
28     1
29     0
      ..
120    1
121    1
122    1
123    1
124    1
125    1
126    1
127    1
128    1
129    1
130    1
131    1
132    1
133    1
134    1
135    1
136    1
137    1
138    1
139    1
140    1
141    1
142    1
143    1
144    1
145    1
146    1
147    1
148    1
149    1
Name: sl, dtype: int64
def label(val, *boundaries):
    if (val &lt; boundaries[0]):
        return 'a'
    elif (val &lt; boundaries[1]):
        return 'b'
    elif (val &lt; boundaries[2]):
        return 'c'
    else:
        return 'd'

def toLabel(df, old_feature_name):
    second = df[old_feature_name].mean()
    minimum = df[old_feature_name].min()
    first = (minimum + second)/2
    maximum = df[old_feature_name].max()
    third = (maximum + second)/2
    return df[old_feature_name].apply(label, args= (first, second, third))
df['sl_labeled'] = toLabel(df, 'sl')
df['sw_labeled'] = toLabel(df, 'sw')
df['pl_labeled'] = toLabel(df, 'pl')
df['pw_labeled'] = toLabel(df, 'pw')
df
sl sw pl pw sl_labeled sw_labeled pl_labeled pw_labeled
0 5.1 3.5 1.4 0.2 b c a a
1 4.9 3.0 1.4 0.2 a b a a
2 4.7 3.2 1.3 0.2 a c a a
3 4.6 3.1 1.5 0.2 a c a a
4 5.0 3.6 1.4 0.2 a c a a
5 5.4 3.9 1.7 0.4 b d a a
6 4.6 3.4 1.4 0.3 a c a a
7 5.0 3.4 1.5 0.2 a c a a
8 4.4 2.9 1.4 0.2 a b a a
9 4.9 3.1 1.5 0.1 a c a a
10 5.4 3.7 1.5 0.2 b c a a
11 4.8 3.4 1.6 0.2 a c a a
12 4.8 3.0 1.4 0.1 a b a a
13 4.3 3.0 1.1 0.1 a b a a
14 5.8 4.0 1.2 0.2 b d a a
15 5.7 4.4 1.5 0.4 b d a a
16 5.4 3.9 1.3 0.4 b d a a
17 5.1 3.5 1.4 0.3 b c a a
18 5.7 3.8 1.7 0.3 b d a a
19 5.1 3.8 1.5 0.3 b d a a
20 5.4 3.4 1.7 0.2 b c a a
21 5.1 3.7 1.5 0.4 b c a a
22 4.6 3.6 1.0 0.2 a c a a
23 5.1 3.3 1.7 0.5 b c a a
24 4.8 3.4 1.9 0.2 a c a a
25 5.0 3.0 1.6 0.2 a b a a
26 5.0 3.4 1.6 0.4 a c a a
27 5.2 3.5 1.5 0.2 b c a a
28 5.2 3.4 1.4 0.2 b c a a
29 4.7 3.2 1.6 0.2 a c a a
... ... ... ... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 d c d d
121 5.6 2.8 4.9 2.0 b b c d
122 7.7 2.8 6.7 2.0 d b d d
123 6.3 2.7 4.9 1.8 c b c c
124 6.7 3.3 5.7 2.1 c c d d
125 7.2 3.2 6.0 1.8 d c d c
126 6.2 2.8 4.8 1.8 c b c c
127 6.1 3.0 4.9 1.8 c b c c
128 6.4 2.8 5.6 2.1 c b d d
129 7.2 3.0 5.8 1.6 d b d c
130 7.4 2.8 6.1 1.9 d b d d
131 7.9 3.8 6.4 2.0 d d d d
132 6.4 2.8 5.6 2.2 c b d d
133 6.3 2.8 5.1 1.5 c b c c
134 6.1 2.6 5.6 1.4 c b d c
135 7.7 3.0 6.1 2.3 d b d d
136 6.3 3.4 5.6 2.4 c c d d
137 6.4 3.1 5.5 1.8 c c d c
138 6.0 3.0 4.8 1.8 c b c c
139 6.9 3.1 5.4 2.1 d c d d
140 6.7 3.1 5.6 2.4 c c d d
141 6.9 3.1 5.1 2.3 d c c d
142 5.8 2.7 5.1 1.9 b b c d
143 6.8 3.2 5.9 2.3 c c d d
144 6.7 3.3 5.7 2.5 c c d d
145 6.7 3.0 5.2 2.3 c b c d
146 6.3 2.5 5.0 1.9 c a c d
147 6.5 3.0 5.2 2.0 c b c d
148 6.2 3.4 5.4 2.3 c c d d
149 5.9 3.0 5.1 1.8 c b c c

150 rows × 8 columns

df.drop(['sl', 'sw', 'pl', 'pw'], axis = 1, inplace = True)
set(df['sl_labeled'])
{'a', 'b', 'c', 'd'}
df["output"] = iris.target
df
sl_labeled sw_labeled pl_labeled pw_labeled output
0 b c a a 0
1 a b a a 0
2 a c a a 0
3 a c a a 0
4 a c a a 0
5 b d a a 0
6 a c a a 0
7 a c a a 0
8 a b a a 0
9 a c a a 0
10 b c a a 0
11 a c a a 0
12 a b a a 0
13 a b a a 0
14 b d a a 0
15 b d a a 0
16 b d a a 0
17 b c a a 0
18 b d a a 0
19 b d a a 0
20 b c a a 0
21 b c a a 0
22 a c a a 0
23 b c a a 0
24 a c a a 0
25 a b a a 0
26 a c a a 0
27 b c a a 0
28 b c a a 0
29 a c a a 0
... ... ... ... ... ...
120 d c d d 2
121 b b c d 2
122 d b d d 2
123 c b c c 2
124 c c d d 2
125 d c d c 2
126 c b c c 2
127 c b c c 2
128 c b d d 2
129 d b d c 2
130 d b d d 2
131 d d d d 2
132 c b d d 2
133 c b c c 2
134 c b d c 2
135 d b d d 2
136 c c d d 2
137 c c d c 2
138 c b c c 2
139 d c d d 2
140 c c d d 2
141 d c c d 2
142 b b c d 2
143 c c d d 2
144 c c d d 2
145 c b c d 2
146 c a c d 2
147 c b c d 2
148 c c d d 2
149 c b c c 2

150 rows × 5 columns

def fit(data):
    output_name = data.columns[-1]
    features = data.columns[0:-1]
    counts = {}
    possible_outputs = set(data[output_name])
    for output in possible_outputs:
        counts[output] = {}
        smallData = data[data[output_name] == output]
        counts[output]["total_count"] = len(smallData)
        for f in features:
            counts[output][f] = {}
            possible_values = set(smallData[f])
            for value in possible_values:
                val_count = len(smallData[smallData[f] == value])
                counts[output][f][value] = val_count
    return counts
fit(df)
{0: {'pl_labeled': {'a': 50},
  'pw_labeled': {'a': 50},
  'sl_labeled': {'a': 28, 'b': 22},
  'sw_labeled': {'a': 1, 'b': 7, 'c': 32, 'd': 10},
  'total_count': 50},
 1: {'pl_labeled': {'b': 7, 'c': 43},
  'pw_labeled': {'b': 10, 'c': 40},
  'sl_labeled': {'a': 3, 'b': 21, 'c': 24, 'd': 2},
  'sw_labeled': {'a': 13, 'b': 29, 'c': 8},
  'total_count': 50},
 2: {'pl_labeled': {'c': 20, 'd': 30},
  'pw_labeled': {'c': 16, 'd': 34},
  'sl_labeled': {'a': 1, 'b': 5, 'c': 29, 'd': 15},
  'sw_labeled': {'a': 5, 'b': 28, 'c': 15, 'd': 2},
  'total_count': 50}}