In [28]:
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
In [18]:
df = sns.load_dataset("mpg").dropna()
df
Out[18]:
mpg cylinders displacement horsepower weight acceleration model_year origin name
0 18.0 8 307.0 130.0 3504 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 3436 11.0 70 usa plymouth satellite
3 16.0 8 304.0 150.0 3433 12.0 70 usa amc rebel sst
4 17.0 8 302.0 140.0 3449 10.5 70 usa ford torino
... ... ... ... ... ... ... ... ... ...
393 27.0 4 140.0 86.0 2790 15.6 82 usa ford mustang gl
394 44.0 4 97.0 52.0 2130 24.6 82 europe vw pickup
395 32.0 4 135.0 84.0 2295 11.6 82 usa dodge rampage
396 28.0 4 120.0 79.0 2625 18.6 82 usa ford ranger
397 31.0 4 119.0 82.0 2720 19.4 82 usa chevy s-10

392 rows × 9 columns

In [43]:
zoo = {
    "DT": DecisionTreeRegressor(),
    "SVR": SVR(kernel='linear'),
    "KNN": KNeighborsRegressor(n_neighbors=3),
    "NN": MLPRegressor((200, 200))
}
In [20]:
features = ["cylinders", "displacement", "horsepower", "weight", "acceleration", "model_year"]
X = df[features].to_numpy()
In [21]:
y = df["mpg"].to_numpy()
y.shape
Out[21]:
(392,)
In [22]:
Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=.2)
In [44]:
for model in zoo:
    reg = zoo[model]
    reg.fit(Xtr, ytr)
    ytr_pred = reg.predict(Xtr)
    yva_pred = reg.predict(Xva)
    print(model, round(r2_score(ytr, ytr_pred),2), round(r2_score(yva, yva_pred),2),
         round(mean_absolute_error(ytr, ytr_pred),2),
         round(mean_absolute_error(yva, yva_pred),2))
DT 1.0 0.84 0.0 2.15
SVR 0.57 0.56 4.02 3.96
KNN 0.84 0.75 2.29 2.92
NN 0.74 0.79 3.06 2.72