def
hypothesis(X, theta):
return
np.dot(X, theta)
def
gradient(X, y, theta):
h
=
hypothesis(X, theta)
grad
=
np.dot(X.transpose(), (h
-
y))
return
grad
def
cost(X, y, theta):
h
=
hypothesis(X, theta)
J
=
np.dot((h
-
y).transpose(), (h
-
y))
J
/
=
2
return
J[
0
]
def
create_mini_batches(X, y, batch_size):
mini_batches
=
[]
data
=
np.hstack((X, y))
np.random.shuffle(data)
n_minibatches
=
data.shape[
0
]
/
/
batch_size
i
=
0
for
i
in
range
(n_minibatches
+
1
):
mini_batch
=
data[i
*
batch_size:(i
+
1
)
*
batch_size, :]
X_mini
=
mini_batch[:, :
-
1
]
Y_mini
=
mini_batch[:,
-
1
].reshape((
-
1
,
1
))
mini_batches.append((X_mini, Y_mini))
if
data.shape[
0
]
%
batch_size !
=
0
:
mini_batch
=
data[i
*
batch_size:data.shape[
0
]]
X_mini
=
mini_batch[:, :
-
1
]
Y_mini
=
mini_batch[:,
-
1
].reshape((
-
1
,
1
))
mini_batches.append((X_mini, Y_mini))
return
mini_batches
def
gradientDescent(X, y, learning_rate
=
0.001
, batch_size
=
32
):
theta
=
np.zeros((X.shape[
1
],
1
))
error_list
=
[]
max_iters
=
3
for
itr
in
range
(max_iters):
mini_batches
=
create_mini_batches(X, y, batch_size)
for
mini_batch
in
mini_batches:
X_mini, y_mini
=
mini_batch
theta
=
theta
-
learning_rate
*
gradient(X_mini, y_mini, theta)
error_list.append(cost(X_mini, y_mini, theta))
return
theta, error_list