The Algorithms logo
The Algorithms
AboutDonate

Similarity

H
import pandas as pd
from math import sqrt, pow
import matplotlib.pyplot as plt
 
def euclidean_distance(x,y):
    return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

def manhattan_distance(x,y):
    return sum(abs(a-b) for a,b in zip(x,y))

data = pd.read_csv('data.csv')
# Draw Graph
x = data[data['Gender']=='Male'].plot(kind='scatter', x= 'Height',y = 'Weight',color='blue',figsize=(10,7))
data[data['Gender']=='Female'].plot(kind='scatter',x= 'Height',y = 'Weight',color='pink',figsize=(10,7) ,ax=x)

plt.xlabel('Height')
plt.ylabel('weight')
plt.title('Analyze Height and Weight of men and women')
plt.legend(labels=['Males','Females'])
plt.show()
# Retrieving all heights and weights
heights = data['Height']
weights = data['Weight']

# Calculating Euclidean Distance 
euclidean_result = euclidean_distance(heights, weights)

# Calculating Manhattan Distance 
manhattan_result = manhattan_distance(heights, weights)

# To get value between 0 and 1 
euclidean_result = 1 / (1 + euclidean_result)
manhattan_result = 1 / (1 + manhattan_result)

# Checking if the similarity value is nearest to 0 or 1
# Eucliean Distance
if round(euclidean_result) == 0:
    print(f"This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")
else:
    print(f"This is similarity using Euclidean Distance between heights and weights for this dataset with similarity value = {euclidean_result}")

print('------------------------------------------------------')

# Manhattan Distance
if round(manhattan_result) == 0:
    print(f"This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")
else:
    print(f"This is similarity using Manhattan Distance between heights and weights for this dataset with similarity value = {manhattan_result}")

This is no similarity using Euclidean Distance between heights and weights for this dataset with similarity value = 0.00010071759753209523
------------------------------------------------------
This is no similarity using Manhattan Distance between heights and weights for this dataset with similarity value = 1.0518244744130195e-06