#MachineLearning #UnsupervisedLearning #Clustering
By Billy Gustave
Goal
:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
df = pd.read_csv('zoo.csv')
df.head()
df.info()
No missing values
Checking Distribution
df.groupby('class_type').size()
sns.countplot(df.class_type)
# getting X anf y
X = df.loc[:,'hair':'catsize']
y = df.class_type -1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
With fine tuning
# using n_cluster = 7
from sklearn.cluster import AgglomerativeClustering
k = 7
agglo = AgglomerativeClustering(n_clusters=k,affinity='euclidean',linkage='average')
y_pred = agglo.fit_predict(X)
from sklearn.metrics import mean_squared_error
# measure mean square error
np.sqrt(mean_squared_error(y, y_pred))