最速下降法是優化方法中很重要的一類基礎搜素算法,其具體算法如下:
在接下來的算法實作中,我們使用二階微分的方法求解最優步長(當然也可以用一維搜尋算法進行實作),直接取函數的一階偏導作為方向向量.
具體實作代碼如下:
from sympy import*
import math
import matplotlib.pyplot as plt
import numpy as np
def ObjFunction(x1,x2):
value =2*x1*x1+x2*x2
return value
def Square(x1,x2):
return math.sqrt(math.pow(x1,2)+math.pow(x2,2))
x1,x2=sy.symbols('x1,x2')
Obj=2*x1*x1+x2*x2
z=diff(Obj,x1,x1)
print(z)
result=z.subs({x1:1,x2:3})#替換資料
print(result)
#指定初始值
Epsilon=0.1
x=np.array([1,1])
df1=diff(Obj,x1)
df2=diff(Obj,x2)
Jacobian=np.array([df1,df2])
Hessian=([[0,0],[0,0]])
for i in range (0,2,1): #求解Hessian矩陣
for j in range(0,2,1):
if i==0:
Hessian[i][j]=diff(Jacobian[i],x1)
else:
Hessian[i][j]=diff(Jacobian[i],x2)
Direction=np.array([-Jacobian[0].subs({x1:x[0],x2:x[1]}),-Jacobian[1].subs({x1:x[0],x2:x[1]})]) #求解方向向量的初始向量
print(Direction)
#計算Lambda的初始值
Temp_Jacobian=np.array([0,0])
for i in range(0,2,1):
Temp_Jacobian[i]=Jacobian[i].subs({x1:x[0],x2:x[1]})
Temp_Hessian=np.array(([0,0],[0,0]))
for i in range(0,2,1): #求解Hessian矩陣的執行個體化
for j in range(0,2,1):
Temp_Hessian[i][j]=Hessian[i][j].subs({x1:x[0],x2:x[1]})
Temp=np.array([0,0])
Temp[0]=Temp_Jacobian[0]*Hessian[0][0]+Temp_Jacobian[1]*Hessian[1][0]
Temp[1]=Temp_Jacobian[1]*Hessian[0][1]+Temp_Jacobian[1]*Hessian[1][1]
Lambda=np.dot(Temp_Jacobian.T,Temp_Jacobian)/np.dot(Temp,Temp_Jacobian) #求解Lambda示例的初始化
#使用最速下降法求解目标函數最優值
result=[] #存儲計算的函數值
Minimum=[0,0] #求解最優值
while Square(Direction[0],Direction[1])<Epsilon:
#更新x的值
result.append(ObjFunction(x[0],x[1]))
Minimum=x[0]
Minimum=x[1]
print(Minimum)
x[0]=x[0]+Direction[0]*Lambda
x[1]=x[1]+Direction[1]*Lambda
#更新Direction的值
Direction=np.array([-Jacobian[0].subs({x1:x[0],x2:x[1]}),-Jacobian[1].subs({x1:x[0],x2:x[1]})])
#更新Lambda
for i in range(0,2,1):
Temp_Jacobian[i]=Jacobian[i].subs({x1:x[0],x2:x[1]})
Temp_Hessian=np.array(([0,0],[0,0]))
for i in range(0,2,1): #求解Hessian矩陣的執行個體化
for j in range(0,2,1):
Temp_Hessian[i][j]=Hessian[i][j].subs({x1:x[0],x2:x[1]})
Temp=np.array([0,0])
Temp[0]=Temp_Jacobian[0]*Hessian[0][0]+Temp_Jacobian[1]*Hessian[1][0]
Temp[1]=Temp_Jacobian[1]*Hessian[0][1]+Temp_Jacobian[1]*Hessian[1][1]
temp1=(Temp_Jacobian[0]*Temp_Jacobian[0]+Temp_Jacobian[1]*Temp_Jacobian[1])
temp2=(Temp[0]*Temp_Jacobian[0]+Temp[1]*Temp_Jacobian[1])
Lambda=temp1/temp2
print(Lambda)
print(ObjFunction(x[0],x[1]))
運作結果如上所示。