import numpy as np
import numpy.random as nr
import matplotlib.pyplot as pl
%matplotlib inline
# This notebook is based on an excellent tutorial by Kostis Gourgoulias (http://kgourgou.me/)
# Specify size of plot
pl.rcParams['figure.figsize'] = (, )
會一會感覺器算法
線性感覺器算法可以根據預先標明的特征對資料點進行分類。我們的想法是找到一個直線(或超平面),分離不同特征的點。一旦我們得到直線,我們就可以根據點相對直線的位置(在上方或下方),來判斷它是屬于哪一個類别。
現在,讓我們生成一個點集,然後根據一條線繪制它們。如果點線上的上面,它們是藍色的,如果它們在下面,綠色。
# Generate some points
N =
xn = nr.rand(N,)
x = np.linspace(,)
# Pick a line
#a, b = nr.rand(), nr.rand()
a, b = ,
f = lambda x : a*x + b
fig = pl.figure()
figa = pl.gca()
pl.plot(xn[:,],xn[:,],'bo')
pl.plot(x,f(x),'r')
# Linearly separate the points by the line
yn = np.zeros([N,])
for i in xrange(N):
if(f(xn[i,])>xn[i,]):
# Point is below line
yn[i] =
pl.plot(xn[i,],xn[i,],'go')
else:
# Point is above line
yn[i] = -
pl.legend(['Above','Separator','Below'],loc=)
pl.title('Selected points with their separating line.')
直線自然把空間分成兩個區域,一個是綠色點區域,一個是藍色點區域。是以,如果給我一個新的點,我可以根據曲線的位置給它配置設定一個顔色。真的很簡單。
不那麼簡單的是找到給定點的直線。然而,如果給定點是線性可分離的,我可以移動一條直線直到我得到正确的位置。這就是感覺器算法所做的。
def perceptron(xn, yn, max_iter=, w=np.zeros()):
'''
A very simple implementation of the perceptron algorithm for two dimensional data.
Given points (x,y) with x in R^{} and y in {-,}, the perceptron learning algorithm searches for the best
line that separates the data points according to the difference classes defined in y.
Input:
xn : Data points, an Nx2 vector.
yn : Classification of the previous data points, an Nx1 vector.
max_iter : Maximum number of iterations (optional).
w : Initial vector of parameters (optional).
Output:
w : Parameters of the best line, y = ax+b, that linearly separates the data.
Note:
Convergence will be slower than expected, since this implementation picks points
to update without a specific plan (randomly). This is enough for a demonstration, not
so good for actual work.
'''
N = xn.shape[]
# Separating curve
f = lambda x: np.sign(w[]+w[]*x[]+w[]*x[])
for _ in xrange(max_iter):
i = nr.randint(N) # try a random sample from the dataset
print i, xn[i,], xn[i,], f(xn[i,:]), yn[i]
if(yn[i] != f(xn[i,:])): # If not classified correctly, adjust the line to account for that point.
w[] = w[] + yn[i] # the first weight is effectively the bias
w[] = w[] + yn[i] * xn[i,]
w[] = w[] + yn[i] * xn[i,]
return w
我們已經寫好了感覺器算法,我們來看看它的效果
w = perceptron(xn, yn)
# Using weights w to compute a,b for a line y=a*x+b
bnew = -w[]/w[];
anew = -w[]/w[];
y = lambda x: anew * x + bnew;
# Computing the colors for the points
sep_color = (yn+)/;
pl.figure();
figa = pl.gca()
pl.scatter(xn[:,],xn[:,],c=sep_color, s=)
pl.plot(x,y(x),'b--',label='Line from perceptron implementation.')
pl.plot(x,f(x),'r',label='Original line.')
pl.legend()
pl.title('Comparison between the linear separator and the perceptron approximation.')
不錯,對吧?該算法應該能夠收斂到分離線的良好近似。如果沒有,請再次運作最後一段代碼。 因為該算法随機選擇點進行優化,是以在某些情況下收斂的不會很好。
若資料線性可分而且疊代次數足夠,則該算法一定可以得到正确劃分資料的直線。
如果資料集不是線性可分的該怎麼辦?
如果資料不能用一條直線分開,那麼在大多數情況下,這個過程将不能很好地工作。有些分數将被正确分類,有些則不會。
# Change this function to select points with respect to a different curve.
f = lambda x: x**;
x = np.linspace(,);
# Generate some data points to play with.
N =
xn = nr.rand(N,)
# Classify based on f(x)
yn = np.sign(f(xn[:,])-xn[:,])
colors = (yn+)/;
# Try percepton with that data.
w = perceptron(xn, yn, max_iter=)
# Re-scale the weights to construct a new representation
bnew = -w[]/w[];
anew = -w[]/w[];
y = lambda x: anew * x + bnew;
figa = pl.gca()
pl.scatter(xn[:,],xn[:,],c=colors,s=);
pl.title('Classification based on f(x)')
pl.plot(x,f(x),'r',label='Separating curve.')
pl.plot(x,y(x),'b--',label = 'Curve from perceptron algorithm.')
pl.legend()
在這種情況下,我們的分類器不能正确地得到所有的情況(藍點應該在藍線上方,黃點在下面)。當我們增加更多的資料時,情況可能變得更糟。
感覺器算法原理可參考:http://blog.csdn.net/castle_cc/article/details/78842170
本文内容基本來自:https://github.com/lexfridman/deepcars/blob/master/1_python_perceptron.ipynb