推薦觀看Jupyter版本:https://github.com/Momentum9/ML

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
%matplotlib inline 

iris = load_iris() # 鸢(yuan)尾花資料集
df = pd.DataFrame(iris.data,columns=iris.feature_names) # 花萼長、花兒寬、花瓣長、花瓣寬
df['label'] = iris.target # 插入label列
df.label.value_counts() #呈現label值的分布
df.columns = ['sepal length','sepal width','petal length','petal width','label'] # 重命名colums
# 繪制資料集
plt.scatter(df[:50]['sepal length'],df[:50]['sepal width'],c='b',marker='o',label='0')
plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],c='r',marker='x',label='1')
plt.legend()
plt.xlabel('sepal length'),plt.ylabel('sepal width')

data = np.array(df.iloc[:100,[0,1,-1]])
X,y = data[:,:-1],data[:,-1]
y = [1 if i == 0 else -1 for i in y] # 将label{0,1}改為{-1,1}

Perceptron 實作

class Model:
    def __init__(self):
        self.w = np.ones(len(X[0]),dtype=np.float32) # w的次元和特征X的數量一緻
        self.b = 0 # b∈ R
        self.learning_rate = 0.1
        self.fit_num = 0 # 記錄訓練集被完整訓練次數
    
    def sign(self,x,w,b):
        y = np.dot(x,w) + b
        return y

    # 随機梯度下降法
    def fit(self,X_train,y_train):
        # is_wrong 變量妙：當更改w和b之後，繼續會驗證後面的資料集，但未驗證此前的資料集
        # 是以用變量is_wrong控制，若wrong_count不為0(也即又有更新時),iswrong為false
        # 那麼将繼續從頭開始驗證資料集，直至所有資料集都被分開
        is_wrong = False 
        while not is_wrong: 
            self.fit_num += 1
            wrong_count = 0
            for i in range(len(X_train)): # 從頭到尾周遊訓練集X
                Xi = X_train[i]
                yi = y_train[i]
                # 随機梯度下降在此是按序挑選分類點
                if yi * self.sign(Xi, self.w, self.b) <= 0: # 從頭到尾找到不滿足所有目前模型下的誤分類點
                    self.w = self.w + self.learning_rate * np.dot(yi,Xi)
                    self.b = self.b + self.learning_rate * yi
                    wrong_count += 1 # 誤分個數
            if wrong_count == 0: # 隻有當本輪資料集驗證時沒有被誤分的樣本時，才會将iswrong設為true
                is_wrong = True
        return 'Perceptron Model!'

perceptron = Model()
perceptron.fit(X,y) # 訓練
print(f'資料集被完整訓練了{perceptron.fit_num}次')
print(perceptron.w,perceptron.b)

# 繪制決策邊界（分離超平面）
plt.figure(figsize=(12,8))
plt.scatter(df[:50]['sepal length'],df[:50]['sepal width'],c='b',marker='o',label='0')
plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],c='r',marker='x',label='1')
plt.xlabel('sepal length'),plt.ylabel('sepal width')
plt.title('鸢尾花線性資料示例',fontproperties='SimHei',fontsize=20)
plt.legend()
# 分離超平面為 wx+b=0
x1 = np.linspace(4,7,100)
x2 = -(perceptron.b + perceptron.w[0] * x1) / perceptron.w[1]
plt.plot(x1,x2,'r')

scikit-learn 方法

import sklearn
from sklearn.linear_model import Perceptron

# %%
clf = Perceptron(fit_intercept=True,max_iter=1000,shuffle=True,tol=None)
"""
:param fit_intercept:是否對截距b進行估計
:param max_iter:最大疊代次數，哪怕損失函數依舊大于0
:param shuffle:每輪訓練後是否打亂資料
:param tol:疊代停止的标準。如果不為None，那麼目前疊代造成的損失函數很小時便會停止下降
"""
clf.fit(X,y)

# %%
print(clf.coef_)
print(clf.intercept_)
print(type(clf.intercept_))

# %%
# 繪制決策邊界（分離超平面）
plt.figure(figsize=(12,8))
plt.scatter(df[:50]['sepal length'],df[:50]['sepal width'],c='b',marker='o',label='0')
plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],c='r',marker='x',label='1')
plt.xlabel('sepal length'),plt.ylabel('sepal width')
plt.legend()
# 分離超平面為 wx+b=0
x1 = np.linspace(4,7,100)
x2 = -(clf.intercept_[0] + clf.coef_[0][0] * x1) / clf.coef_[0][1]
plt.plot(x1,x2,'r')

參考：https://github.com/fengdu78/lihang-code

感覺機(Perceptron) Python實作scikit-learn 方法

Perceptron 實作

scikit-learn 方法

繼續閱讀

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

筆試面試題目：滑動視窗(二)

27. Remove Element(清單)題目代碼

資料結構與算法（27）——排序（二）

Dijkstra--簡易版（最短路徑）

GitHub連夜封殺！這份阿裡 10W 字内部 Java 字面試手冊到底有多強？

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入

hdu7108哈希