采用iris数据集,抽取sepal length 和 sepal width两个特征,抽取0和1两个类别,运用logistic回归对其进行分类,数据集如图所示
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
#sigmoid函数
def basic_sigmoid(x):
sigmoid = 1.0 / (1 + np.exp(-x))
return sigmoid
#sigmoid的导函数
def sigmoid_derivative(x):
s = basic_sigmoid(x)
ds = s * (1 - s)
return ds
#初始化权重
def initialize(dim):
W=np.zeros((dim,1))
b=0
return W,b
def propagate(X,Y,W,b,learn):
#正向传播X:nxm Y:1xm W:nx1
m=X.shape[1]
Z=np.dot(W.T,X)+b
A=basic_sigmoid(Z)
#print(A)
cost=-1/m*np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
#反向
dw=1/m*np.dot(X,(A-Y).T)
db=1/m*np.sum(A-Y)
#更新
W=W-learn*dw
b=b-learn*db
return W,b,A,cost
def accuray(A,Y,cost):
a=np.where(abs(A-Y)<0.5)
print("accuray:",len(a[0])/A.size)
print("error:",cost)
def train(X,Y,learn,limits):
W,b=initialize(X.shape[0])
for i in range(limits):
print("======epoch:%d======="%i)
W,b,A,cost=propagate(X,Y,W,b,learn)
accuray(A,Y,cost)
#def load_data():
# X=np.array([[4,5,8,9],[1,0,2,1]]).T
# Y=np.array([[0],[1]]).T
# return X,Y
def load_data():
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')
plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
data = np.array(df.iloc[:100, [0, 1, -1]])
X, Y = data[:,:-1], data[:,-1]
X=X.T
Y=Y.reshape(1,100)
return X,Y
if __name__ == "__main__":
X,Y=load_data()
train(X,Y,0.05,100)