Artificial Neural Networks: Supriya A Jadhav
Artificial Neural Networks: Supriya A Jadhav
Instructions:
Please share your answers filled in-line in the word document. Submit code
separately wherever applicable.
Grading Guidelines:
1. An assignment submission is considered complete only when correct and executable
code(s) are submitted along with the documentation explaining the method and
results. Failing to submit either of those will be considered an invalid submission and
will not be considered for evaluation.
2. Assignments submitted after the deadline will affect your grades.
Grading:
Ans Date Ans Date
Correct On time A 100
80% & above On time B 85 Correct Late
50% & above On time C 75 80% & above Late
50% & below On time D 65 50% & above Late
E 55 50% & below
Copied/No Submission F 45
● Grade A: (>= 90): When all assignments are submitted on or before the given
deadline.
● Grade B: (>= 80 and < 90):
o When assignments are submitted on time but less than 80% of problems are
completed.
(OR)
o All assignments are submitted after the deadline.
1.Business Problem
1.1. What is the business objective?
1.2. Are there any constraints?
2.1 Make a table as shown above and provide information about the features such as
its data type and its relevance to the model building. And if not relevant, provide
reasons and a description of the feature.
3.Data Pre-processing
3.1 Data Cleaning, Feature Engineering, etc.
© 2013 - 2021 360DigiTMG. All Rights Reserved.
3.2 Outlier Treatment if applicable.
4.Exploratory Data Analysis (EDA):
4.1. Summary.
4.2. Univariate analysis.
4.3. Bivariate analysis.
5.Model Building:
5.1Build an Artificial Neural Network model on the given datasets.
5.2Use TensorFlow and Keras packages.
5.3Briefly explain the output in the documentation for each step in your
own words.
5.4Use different activation functions to get the best model.
6.Write about the benefits/impact of the solution - in what way does the
business (client) benefit from the solution provided?
Business Problem
What is the business objective?
To find out what factors affect a startup company and if it will be profitable or
not.
Data Dictionaries:
import pandas as pd
#details of startup
startup.info()
#data types
startup.dtypes
EDA
'standard deviation': rd_spend 45902.256482
Administration 28017.802755
m_spend 122290.310726
Profit 40306.180338
dtype: float64,
'variance': rd_spend 2.107017e+09
Administration 7.849973e+08
m_spend 1.495492e+10
Profit 1.624588e+09
dtype: float64,
'skewness': rd_spend 0.164002
Administration -0.489025
m_spend -0.046472
Profit 0.023291
© 2013 - 2021 360DigiTMG. All Rights Reserved.
dtype: float64,
'kurtosis': rd_spend -0.761465
Administration 0.225071
m_spend -0.671701
Profit -0.063859
# Correlation matrix
co = startup.corr()
co
# rd_spend
plt.bar(height = startup.rd_spend, x = np.arange(1, 51, 1))
plt.hist(startup.rd_spend) #histogram
plt.boxplot(startup.rd_spend) #boxplot
# m_spend
plt.bar(height = startup.m_spend, x = np.arange(1, 51, 1))
plt.hist(startup.m_spend) #histogram
plt.boxplot(startup.m_spend) #boxplot
# Jointplot
sns.jointplot(x=startup['Profit'], y=startup['rd_spend'])
#normal
"""
from sklearn.preprocessing import OneHotEncoder
# creating instance of one-hot-encoder
enc = OneHotEncoder(handle_unknown='ignore')
sta=startup.iloc[:,[3]]
enc_df = pd.DataFrame(enc.fit_transform(sta).toarray())"""
enc_df = pd.get_dummies(startup.iloc[:,[3]])
enc_df.columns
enc_df.rename(columns={"State_New York":'State_New_York'},inplace= True)
##################################
###upport Vector Machines MODEL###
"""
import numpy as np
np.random.seed(10)
X= model_df.iloc[:,1:]
Y= model_df.iloc[:,0]
model = keras.models.Sequential()
model.add(keras.layers.Dense(5000, activation='relu', input_dim=6))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(500, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(50, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(1, kernel_initializer='uniform'))
model.compile(loss=keras.losses.MeanSquaredError(),
optimizer=keras.optimizers.Nadam(
learning_rate=0.009,
beta_1=0.8,
beta_2=0.999),metrics=["mse"])
early_stopping = keras.callbacks.EarlyStopping(
monitor='val_loss',
verbose=1,
patience=20,
mode='auto',
restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=10,
verbose=1,
mode='auto',
min_delta=0.0005,
© 2013 - 2021 360DigiTMG. All Rights Reserved.
cooldown=0,
min_lr=1e-6)
#R2-score
result = skl_mtc.r2_score(y_test, predict_y)
print(f'R2-score in test set: {np.round(result, 4)}')
print(history.history.keys())
Business Objective:
To predict the size of the burnt area in forest fires annually so that they can be
better prepared in future calamities.
© 2013 - 2021 360DigiTMG. All Rights Reserved.
Data Dictionaries:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Layer, Lambda
forestfires =
pd.read_csv("C:\\Users\\ankush\\Desktop\\DataSets\\ANN\\fireforests.csv")
#As dummy variables are already created, we will remove the month and alsoday
columns
forestfires.drop(["month", "day"], axis = 1, inplace = True)
forestfires["area"].value_counts()
predictors1 = norm_func(predictors)
#data = pd.concat([predictors1,target],axis=1)
def prep_model(hidden_dim):
model = Sequential()
for i in range(1, len(hidden_dim) - 1):
if (i == 1):
model.add(Dense(hidden_dim[i], input_dim = hidden_dim[0], activation =
"relu"))
else:
model.add(Dense(hidden_dim[i], activation = "relu"))
model.add(Dense(hidden_dim[-1], kernel_initializer = "normal", activation =
"sigmoid"))
model.compile(loss = "binary_crossentropy", optimizer = "rmsprop", metrics =
["accuracy"])
return model
By observing above confusion matrix we can say that there is 10 times large fires
and 392 times small fires.
np.mean(pred_train_class == pd.Series(train["original_class"]).reset_index(drop =
True))
np.mean(pred_test_class==pd.Series(test["original_class"]).reset_index(drop =
True))
Accuracy is 93.36%
confusion_matrix(pred_test_class,test["original_class"])
By observing above confusion matrix we can say that there is 0 times large fires
and 97 times small fires.
Business Problem
What is the business objective?
To predict the compressive strength of Concrete.
Data Dictionaries:
Name of Description Type Relevance
© 2013 - 2021 360DigiTMG. All Rights Reserved.
Feature
Cement One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable)
Slag One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Ash One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Water One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Superplastic One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Coarseagg One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Fineagg One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
age One of the content Continuous Relevant(It’s our input
in concrete. variable i.e.
Independent variable
Strength One of the content Continuous Relevant(It’s our target
in concrete. variable i.e. Dependent
Variable)
import pandas as pd
from pathlib import Path
from sklearn import model_selection
from sklearn import preprocessing
import matplotlib.pyplot as plt
from keras import models, layers, metrics
import numpy as np
np.random.seed(22)
print(concrete_data.head())
predictors = concrete_data.iloc[:,0:8].values
outcomes = concrete_data.iloc[:,8].values
min_max_scaler = preprocessing.MinMaxScaler()
predictors_scaled = min_max_scaler.fit_transform(predictors)
predictors_scaled[:5,]
network = models.Sequential()
network.add(layers.Dense(10, activation='relu', input_shape=(X_train.shape[1], )))
network.add(layers.Dense(5, activation='relu'))
network.add(layers.Dense(1))
network.compile(optimizer='adam',
loss='mean_squared_error')
import pandas as pd
© 2013 - 2021 360DigiTMG. All Rights Reserved.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
ann_data = pd.read_csv("C:\\Users\\ankush\\Desktop\\DataSets\\ANN\\RPL.csv")
ann_data.head()
ann_data.info()
ann_data.describe()
corr = ann_data.corr()
ax =
sns.heatmap(corr,annot=True,cmap='RdYlGn',linewidths=0.1,annot_kws={'size':12})
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
fig=plt.gcf()
fig.set_size_inches(12,10)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()
X = ann1.iloc[:, 3:13].values
y = ann1.iloc[:, 13].values
X, y
PreProcessing
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
ax =
sns.heatmap(cm,annot=True,cmap='RdYlGn',linewidths=0.1,annot_kws={'size':12})
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
fig=plt.gcf()
fig.set_size_inches(12,10)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()
print(f"Accuracy: {1*100}%")
Accuracy: 100%