对率回归实现
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
def get_Data():
"""
数据获取
:return:
"""
data = np.array([[0.697,0.460,1],[0.774,0.376,1],[0.634,0.264,1],[0.608,0.318,1],[0.556,0.215,1],
[0.403,0.237,1],[0.481,0.149,1],[0.437,0.211,1],[0.666,0.091,0],[0.243,0.267,0],
[0.245,0.057,0],[0.343,0.099,0],[0.639,0.161,0],[0.657,0.198,0],[0.360,0.370,0],
[0.593,0.042,0],[0.719,0.103,0]])
x = data[:,:2]
temp = np.array([[1] for i in range(x.shape[0])])
x = np.concatenate((x,temp),axis = 1) # 作为阈值项
y = data[:,2:]
return np.mat(x),np.mat(y)
def init_FitParams(x_width):
"""
随机初始化参数
:param x_width:
:return:
"""
acc = 0.00000001 # 精度
return np.random.rand(x_width,1),acc
def get_P1(x,params):
"""
获取每一组数据第一类的概率值
:param x:
:param params:
:return:
"""
# 以下对每一组数据求第一类的概率
height = x.shape[0]
p1 = []
for i in range(height):
# 以下计算每一组数据的属于第一类的概率
p1.append(np.e ** ((x[i,:].dot(params))[0,0])/
(1+np.e ** (x[i,:].dot(params))[0,0]))
return np.mat(p1)
pass
def get_D1Params(x,y,params):
"""
获取参数的第一阶导数
:return:
"""
D1 = np.zeros((x.shape[1],1))
height = x.shape[0]
p1 = get_P1(x,params)
for i in range(height):
D1 = D1 + x[i,:].T*((y[i,0] - p1[0,i]))
return -np.mat(D1)
pass
def get_D2Params(x,params):
"""
获取二阶导数
:param x:
:param params:
:return:
"""
p1 = get_P1(x,params)
x_width = x.shape[1]
height = x.shape[0]
D2 = np.zeros((x_width,x_width))
for i in range(height):
D2 = D2 + x[i,:].T*x[i,:]*p1[0,i]*(1-p1[0,i]) # 这个D2是一个height*height的矩阵
return np.mat(D2)
pass
def main():
"""
拟合模型
:return:
"""
x,y = get_Data()
params,acc = init_FitParams(x.shape[1])
err = np.ones((x.shape[1],1))
err_List = []
while(acc<LA.norm(err)): # 二范数精度作为迭代停止条件
D1 = get_D1Params(x,y,params)
D2 = get_D2Params(x,params)
temp = params
params = params - D2.I*D1 # 更新参数
err = params - temp
err_List.append(abs(acc-LA.norm(err)))
plt.plot(range(len(err_List)),err_List,"bo-")
plt.title("Accuracy variation diagram")
plt.show()
temp = [params[i,0] for i in range(params.shape[0])]
print("最终所求的参数为:",temp)
if __name__ == "__main__":
main()
结果:
最终所求的参数为: [3.1583296622658614, 12.52119579193884, -4.428864510162988]
线性判别分析
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
def get_Data():
# 第一类数据
x0 = np.mat([[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215],
[0.403, 0.237], [0.481, 0.149], [0.437, 0.211]])
# 第二类数据
x1 = np.mat([[0.666, 0.091], [0.243, 0.267],
[0.245, 0.057], [0.343, 0.099], [0.639, 0.161], [0.657, 0.198], [0.360, 0.370],
[0.593, 0.042], [0.719, 0.103]])
return x0,x1
def get_WithinClass(**kwargs):
"""
获取类内之间距离
:param kwargs:
:return:
"""
x0 = kwargs["x0"]
x1 = kwargs["x1"]
mean0 = kwargs["mean0"]
mean1 = kwargs["mean1"]
return np.mat((x0 - mean0).T.dot(x0 - mean0) + (x1 - mean1).T.dot(x1 - mean1))
pass
def main():
x0,x1 = get_Data()
mean0 = np.mean(x0,axis=0) # 对第一类数据求均值
mean1 = np.mean(x1,axis=0) # 对第二类数据求均值
Sw = get_WithinClass(x0 = x0,x1 = x1,mean0 = mean0,mean1 = mean1)
W = Sw.I.dot((mean0 - mean1).T) # 获取投影方向参数
# 绘制原始数据分布散点图
plt.plot(x0[:, 0], x0[:, 1], 'b*') # 第一类数据点
plt.plot(x1[:, 0], x1[:, 1], 'r+') # 第二类数据点
# 数据的第一个维度的边界
left_x = 0
right_x = 1
# 数据的第二个维度的边界
left_y = 0
right_y = -(right_x * W[0]) / W[1] # y = (left_x - right_x)*k = (left_x - right_x)*(W(0)/W(1))
plt.plot([left_x, right_x], [left_y, right_y], 'r-')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('LDA')
plt.show()
pass
if __name__ == "__main__":
main()
详细请看代码,有疑问欢迎留言。