笔记来源于B站up主,@刘二大人
目标函数依旧是
假设函数为
梯度下降法公式:
寻找负梯度方向,是收敛的方向
利用梯度跟新权重:
,其中
是学习率
import matplotlib.pyplot as plt
# prepare the training set
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
# 假设一个初始权重
w = 1.0
# 定义前向传播的线性模型是y=wx
def forward(x):
return x * w
# 定义损失函数MSE平均均方误差
def cost(xs, ys):
cost = 0
for x, y in zip(xs, ys):
y_pred = forward(x)
cost += (y_pred - y) ** 2
return cost / len(xs)
# 定义梯度下降函数GD
def gradient(xs, ys):
grad = 0
for x, y in zip(xs, ys):
grad += 2 * x * (x * w - y)
return grad / len(xs)
epoch_list = []
cost_list = []
# 设置学习率为0.01
learning_rate = 0.01
# 训练前
print('predict (before training)', 4, forward(4))
for epoch in range(100):
cost_val = cost(x_data, y_data)
grad_val = gradient(x_data, y_data)
w -= learning_rate * grad_val
print('epoch:', epoch, 'w=', w, 'loss=', cost_val)
epoch_list.append(epoch)
cost_list.append(cost_val)
# 训练后
print('predict (after training)', 4, forward(4))
plt.plot(epoch_list, cost_list)
plt.ylabel('cost')
plt.xlabel('epoch')
plt.show()
输出结果:
随机梯度下降法SGD
通过随机取值的方式可以避开鞍点(梯度为0的点)
SGD中每次权重的更新与上一次计算的权重相关,无法并行计算
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0
def forward(x):
return x * w
# 定义损失函数
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
# 定义随机梯度函数SGD
def gradient(x, y):
return 2 * x * (x * w - y)
epoch_list = []
loss_list = []
# 设置学习率为0.01
learning_rate = 0.01
print('predict (before training)', 4, forward(4))
for epoch in range(100):
for x, y in zip(x_data, y_data):
grad = gradient(x, y)
w = w - learning_rate * grad # update weight by every grad of sample of training set
print("\tgrad:", x, y, grad)
l = loss(x, y)
print("progress:", epoch, "w=", w, "loss=", l)
epoch_list.append(epoch)
loss_list.append(l)
print('predict (after training)', 4, forward(4))
plt.plot(epoch_list, loss_list)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()
输出结果: