天天看点

吴恩达logistic回归实现

采用iris数据集,抽取sepal length 和 sepal width两个特征,抽取0和1两个类别,运用logistic回归对其进行分类,数据集如图所示

吴恩达logistic回归实现
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

#sigmoid函数
def basic_sigmoid(x):

    sigmoid = 1.0 / (1 + np.exp(-x))
    return sigmoid
 
#sigmoid的导函数
def sigmoid_derivative(x):

    
    s = basic_sigmoid(x)
    ds = s * (1 - s)
    return ds

#初始化权重
def initialize(dim):
    W=np.zeros((dim,1))
    b=0
    return W,b

def propagate(X,Y,W,b,learn):
    #正向传播X:nxm  Y:1xm W:nx1
    m=X.shape[1]
    Z=np.dot(W.T,X)+b
    A=basic_sigmoid(Z)
    #print(A)
    cost=-1/m*np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    
    #反向
    dw=1/m*np.dot(X,(A-Y).T)
    db=1/m*np.sum(A-Y)
    #更新
    W=W-learn*dw
    b=b-learn*db
    return W,b,A,cost

def accuray(A,Y,cost):
    a=np.where(abs(A-Y)<0.5)
    print("accuray:",len(a[0])/A.size)   
    print("error:",cost)   
    
def train(X,Y,learn,limits):
    W,b=initialize(X.shape[0])
    for i in range(limits):
        print("======epoch:%d======="%i)

        W,b,A,cost=propagate(X,Y,W,b,learn)
        accuray(A,Y,cost)
    
#def load_data():
#    X=np.array([[4,5,8,9],[1,0,2,1]]).T
#    Y=np.array([[0],[1]]).T
#    return X,Y
        
def load_data():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    df['label'] = iris.target
    df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
    plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='0')
    plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='1')
    plt.xlabel('sepal length')
    plt.ylabel('sepal width')
    plt.legend()
    data = np.array(df.iloc[:100, [0, 1, -1]])
    X, Y = data[:,:-1], data[:,-1]
    X=X.T
    Y=Y.reshape(1,100)
    return X,Y
if __name__ == "__main__":
    X,Y=load_data()
    train(X,Y,0.05,100)
           

继续阅读