天天看點

基礎博弈算法之——極大極小以及AB剪枝極大極小算法以及Alpha_beta剪枝

極大極小算法以及Alpha_beta剪枝

# python3
# author  :李先生
# time    :2018.10.28
# email   :[email protected]

class TicTacToe(object):
    '''
    井字棋遊戲:
        player:目前落子玩家,-1代表AI,1代表人類
        board:棋盤
    '''
    # 傳入誰先手,不傳入預設AI先手
    def __init__(self,depth,first = -1):
        self.depth = depth
        self.restart(first)

    # 重新開始遊戲,player先手
    def restart(self,player):
        self.board = [0]*9
        self.player = player
        self.bestmove = -1

    # 落子函數
    def go(self,pos):
        self.board[pos]=self.player
        self.player = -self.player

    # 撤銷落子函數
    def cancel_go(self,pos):
        self.board[pos]=0
        self.player = -self.player

    def print_board(self):
        print("-"*20)
        for i in range(3):
            for j in range(3):
                if self.board[i*3+j]==-1:
                    print("",end="X ")
                elif self.board[i*3+j]==1:
                    print("",end="O ")
                else:
                    print("",end="_ ")
            print()
        print("-"*20)

    # 遊戲未結束傳回0,AI勝利傳回-1,人類獲勝傳回1,平局傳回2
    def get_winner(self):
        if any([var==3 or var ==-3 for var in [sum(self.board[i:i+3]) for i in range(0,9,3)]]):
            return -self.player
        if any([var==3 or var ==-3 for var in [sum(self.board[i:7+i:3]) for i in range(0,3)]]):
            return -self.player
        if any([var==3 or var ==-3 for var in [sum(self.board[:9:4]),sum(self.board[2:7:2])]]):
            return -self.player

        return 0 if self.board.count(0)>0 else 2

    def evaluate_minmax(self):
        winner = self.get_winner()
        if winner == 2:
            return 0
        return 1000000*winner

    def evaluate_nega_max(self):
        winner = self.get_winner()
        if winner == 2:
            return 0
        return 1000000*winner*self.player

    # 極大極小算法
    def min_max(self,depth):
        # 搜尋深度耗盡或者某一方獲勝
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_minmax()

        # 繼續搜尋
        # 走步生成
        moves = [x for x in range(9) if self.board[x]==0]
        # 初始化最優值
        bestvalue = -1000000*self.player

        for pos in moves:
            self.go(pos)
            value = self.min_max(depth-1)
            self.cancel_go(pos)

            if self.player==-1:
                if value<=bestvalue:
                    bestvalue = value
                    if depth==self.depth:
                        self.bestmove = pos
            else:
                if value>=bestvalue:
                    bestvalue = value
                    if depth==self.depth:
                        self.bestmove = pos
        return bestvalue

    # 負極大算法
    def nega_max(self,depth):
        # 搜尋深度耗盡或者某一方獲勝
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_nega_max()

        # 繼續搜尋
        # 走步生成
        moves = [x for x in range(9) if self.board[x]==0]
        # 初始化最優值
        bestvalue = -1000000

        for pos in moves:
            self.go(pos)
            value = -self.nega_max(depth-1)
            self.cancel_go(pos)
            if value>=bestvalue:
                bestvalue = value
                if depth == self.depth:
                    self.bestmove = pos

        return bestvalue

    # alpha_beta算法
    def alpha_beta(self,depth,alpha,beta):
        # 搜尋深度耗盡或者某一方獲勝
        winner = self.get_winner()
        if winner!=0 or depth==0:
            return self.evaluate_nega_max()

        # 繼續搜尋
        # 走步生成
        moves = [x for x in range(9) if self.board[x]==0]

        for pos in moves:
            self.go(pos)
            value = -self.alpha_beta(depth-1,-beta,-alpha)
            self.cancel_go(pos)
            if value>=beta:
                if self.depth==depth:
                    self.bestmove = pos
                return beta
            if value>alpha:
                if self.depth==depth:
                    self.bestmove = pos
                alpha = value
            
        return alpha


# 8是深度,因為估值函數的問題,這裡深度要大于等于8才能算正常落子
game = TicTacToe(8)
while game.get_winner()==0:
    # game.min_max(game.depth)
    # game.nega_max(game.depth)
    game.alpha_beta(game.depth,-1000000,1000000)
    game.go(game.bestmove)
    game.print_board()

print("result:",game.get_winner())