CarDataNeuralNetwork/carMPGNeuralNet.py at main · Ouroborusss/CarDataNeuralNetwork · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#IMPORTS
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import io

#VARIABLE DECLARATION
Var1 = 'tailpipe_co2_in_grams_mile_ft1'
Var2 = 'year'
Var3 = 'fuel_economy_score'
Var4 = 'engine_cylinders'
NUM_EPOCHS = 20
LEARNING_RATE = 0.0005
VARS = [Var1, Var2, Var3, Var4] #MUST BE UPDATED FOR NUM OF VARS-----------
TESTING_SET = 'avg_mpg'
NUM_VAR = len(VARS)

#function declaration for error loss plot
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error ')
  plt.legend()
  plt.grid(True)

#GOOGLE AUTH
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

#Streaming CSV file from google drive
downloaded = drive.CreateFile({'id': '1yC29V2fDnIy1465Qe4WtjI77FBjR2eR8'}) # String in the URL when view/downloading the file
df = pd.read_csv(io.StringIO(downloaded.GetContentString()), delimiter=',', on_bad_lines='warn')
df.describe()

#Visualization test
print(df['year'])
print(df.columns)

#Creating Avg_mpg and simple data conversion
df['avg_mpg'] = ((df['city_mpg_ft1'] + df['highway_mpg_ft1'])/2)
print(df['avg_mpg'])
df.convert_dtypes().dtypes

#Dropping non numbers of Variables from data set
print(f"Pre drop-na {len(df)} rows")
df2 = df
df2 = df.dropna(subset=[Var1, Var2])
print(f"After drop {len(df2)} rows")

#Verify column types are numeric
df2 = df2.astype({Var1: float})
df2 = df2.astype({Var2: float})
df2 = df2.astype({Var3: float})
df2 = df2.astype({Var4: float})
print(df2.dtypes)

#Data set training ratios, split into different sets
train_dataset = df2.sample(frac=0.8, random_state=0)
test_dataset = df2.drop(train_dataset.index)
#Split data visialization
print("Full dataset size: ", df.size)
print("Training rows", train_dataset.size)
print("Testing rows", test_dataset.size)

#Create features and label dataframes
train_features_df = train_dataset.copy()
test_features_df = test_dataset.copy()
train_labels = train_features_df.pop('avg_mpg')
test_labels = test_features_df.pop('avg_mpg')
print(train_labels)

#Choosing features that will be used for the Neural Network
train_features = train_features_df[[Var1, Var2]]
print(train_features)


#Running nomralizer on training features
normalizer = layers.Normalization(input_shape=[NUM_VAR,], axis=None)
normalizer.adapt(np.array(train_features))
print(normalizer.mean.numpy())
print(normalizer.get_weights())

#Normalized model inspection
model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=NUM_VAR),
    layers.Dense(units=1)
])
model.summary()

#Pre-prediction test
model.predict(train_features[:10])

#Compiling Model
model.compile(
    #optimizer=tf.optimizers.Adam(learning_rate=0.05),
    optimizer=tf.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='mean_squared_error')
#mean_absolute_error
#MeanSquaredLogarithmicError
#mean_squared_error

#Fitting Model and running nerual network
%%time
history = model.fit(
    train_features,
    train_labels,
    epochs=NUM_EPOCHS,
    verbose=1,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

#Plotting Data
plot_loss(history)