極大極小算法以及Alpha_beta剪枝
# python3
# author :李先生
# time :2018.10.28
# email :[email protected]
class TicTacToe(object):
'''
井字棋遊戲:
player:目前落子玩家,-1代表AI,1代表人類
board:棋盤
'''
# 傳入誰先手,不傳入預設AI先手
def __init__(self,depth,first = -1):
self.depth = depth
self.restart(first)
# 重新開始遊戲,player先手
def restart(self,player):
self.board = [0]*9
self.player = player
self.bestmove = -1
# 落子函數
def go(self,pos):
self.board[pos]=self.player
self.player = -self.player
# 撤銷落子函數
def cancel_go(self,pos):
self.board[pos]=0
self.player = -self.player
def print_board(self):
print("-"*20)
for i in range(3):
for j in range(3):
if self.board[i*3+j]==-1:
print("",end="X ")
elif self.board[i*3+j]==1:
print("",end="O ")
else:
print("",end="_ ")
print()
print("-"*20)
# 遊戲未結束傳回0,AI勝利傳回-1,人類獲勝傳回1,平局傳回2
def get_winner(self):
if any([var==3 or var ==-3 for var in [sum(self.board[i:i+3]) for i in range(0,9,3)]]):
return -self.player
if any([var==3 or var ==-3 for var in [sum(self.board[i:7+i:3]) for i in range(0,3)]]):
return -self.player
if any([var==3 or var ==-3 for var in [sum(self.board[:9:4]),sum(self.board[2:7:2])]]):
return -self.player
return 0 if self.board.count(0)>0 else 2
def evaluate_minmax(self):
winner = self.get_winner()
if winner == 2:
return 0
return 1000000*winner
def evaluate_nega_max(self):
winner = self.get_winner()
if winner == 2:
return 0
return 1000000*winner*self.player
# 極大極小算法
def min_max(self,depth):
# 搜尋深度耗盡或者某一方獲勝
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_minmax()
# 繼續搜尋
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
# 初始化最優值
bestvalue = -1000000*self.player
for pos in moves:
self.go(pos)
value = self.min_max(depth-1)
self.cancel_go(pos)
if self.player==-1:
if value<=bestvalue:
bestvalue = value
if depth==self.depth:
self.bestmove = pos
else:
if value>=bestvalue:
bestvalue = value
if depth==self.depth:
self.bestmove = pos
return bestvalue
# 負極大算法
def nega_max(self,depth):
# 搜尋深度耗盡或者某一方獲勝
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_nega_max()
# 繼續搜尋
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
# 初始化最優值
bestvalue = -1000000
for pos in moves:
self.go(pos)
value = -self.nega_max(depth-1)
self.cancel_go(pos)
if value>=bestvalue:
bestvalue = value
if depth == self.depth:
self.bestmove = pos
return bestvalue
# alpha_beta算法
def alpha_beta(self,depth,alpha,beta):
# 搜尋深度耗盡或者某一方獲勝
winner = self.get_winner()
if winner!=0 or depth==0:
return self.evaluate_nega_max()
# 繼續搜尋
# 走步生成
moves = [x for x in range(9) if self.board[x]==0]
for pos in moves:
self.go(pos)
value = -self.alpha_beta(depth-1,-beta,-alpha)
self.cancel_go(pos)
if value>=beta:
if self.depth==depth:
self.bestmove = pos
return beta
if value>alpha:
if self.depth==depth:
self.bestmove = pos
alpha = value
return alpha
# 8是深度,因為估值函數的問題,這裡深度要大于等于8才能算正常落子
game = TicTacToe(8)
while game.get_winner()==0:
# game.min_max(game.depth)
# game.nega_max(game.depth)
game.alpha_beta(game.depth,-1000000,1000000)
game.go(game.bestmove)
game.print_board()
print("result:",game.get_winner())