#MachineLearning #SupervisedLearning #Classification
By Billy Gustave
Goal:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
df = pd.read_csv('run_or_walk.csv')
df.shape
df.head()
df.describe()
df.info()
No Missing Values
# Features and Target
X = df.drop(['date','time','username','wrist','activity'], axis=1)
y = df.activity
X.shape
from sklearn.model_selection import train_test_split
# train/test split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)
fig, ax = plt.subplots(figsize=(16,14))
sns.heatmap(x_train.corr(), cmap='Reds', annot=True, linewidths=.5, ax=ax)
No highly correlated features
Using Kfold and Cross Validation:
from sklearn.model_selection import cross_val_score, KFold
kfold = KFold(n_splits=10, random_state=1, shuffle=True)
from sklearn.naive_bayes import GaussianNB
nbc = GaussianNB()
acc_features = ['acceleration_x', 'acceleration_y', 'acceleration_z']
gyro_features = ['gyro_x', 'gyro_y', 'gyro_z']
# Calculate accuracy scores
all_score = cross_val_score(nbc, x_train, y_train, cv=kfold, scoring='accuracy').mean()
print("Accuracy : {} ".format(all_score))
acc_score = cross_val_score(nbc, x_train[acc_features], y_train, cv=kfold, scoring='accuracy').mean()
print("Acceleration accuracy : {} ".format(acc_score))
gyro_score = cross_val_score(nbc, x_train[gyro_features], y_train, cv=kfold, scoring='accuracy').mean()
print("Gyro accuracy : {} ".format(gyro_score))
Testing
nbc = GaussianNB()
all_pred_y = nbc.fit(x_train,y_train).predict(x_test)
acc_pred_y = nbc.fit(x_train[acc_features],y_train).predict(x_test[acc_features])
gyro_pred_y = nbc.fit(x_train[gyro_features],y_train).predict(x_test[gyro_features])
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
All
accuracy_score(all_pred_y, y_test)
confusion_matrix(y_test, all_pred_y)
print(classification_report(y_test,all_pred_y))
Acceleration
accuracy_score(acc_pred_y, y_test)
confusion_matrix(y_test, acc_pred_y)
print(classification_report(y_test,acc_pred_y))
Gyro
accuracy_score(gyro_pred_y, y_test)
confusion_matrix(y_test, gyro_pred_y)
print(classification_report(y_test,gyro_pred_y))
Acceleration is the determining factor for classifying running or walking