reinforcement-learning #1
@ -4,7 +4,7 @@
 | 
			
		||||
    <content url="file://$MODULE_DIR$">
 | 
			
		||||
      <excludeFolder url="file://$MODULE_DIR$/venv" />
 | 
			
		||||
    </content>
 | 
			
		||||
    <orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" />
 | 
			
		||||
    <orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" />
 | 
			
		||||
    <orderEntry type="sourceFolder" forTests="false" />
 | 
			
		||||
  </component>
 | 
			
		||||
</module>
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								Report.pdf
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								Report.pdf
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										45
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										45
									
								
								main.py
									
									
									
									
									
								
							@ -1,6 +1,7 @@
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
import pygame
 | 
			
		||||
import numpy as np
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
 | 
			
		||||
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
 | 
			
		||||
@ -178,23 +179,38 @@ def game(difficulty):
 | 
			
		||||
    clock = pygame.time.Clock()
 | 
			
		||||
    gameManager = GameManager(WIN, GREEN)
 | 
			
		||||
    rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
 | 
			
		||||
    model = rl.buildMainModel()
 | 
			
		||||
    model.load_weights("./modelWeights/model_final.h5")
 | 
			
		||||
    mm = MiniMax()
 | 
			
		||||
    totalReward = []
 | 
			
		||||
    for i in range(2000):
 | 
			
		||||
    winners = []
 | 
			
		||||
    for i in range(100):
 | 
			
		||||
        score = 0
 | 
			
		||||
        for j in range(200):
 | 
			
		||||
            print(j)
 | 
			
		||||
            clock.tick(FPS)
 | 
			
		||||
            reward = 0
 | 
			
		||||
            if gameManager.turn == WHITE:
 | 
			
		||||
                mm = MiniMax()
 | 
			
		||||
                value, newBoard = mm.AI(difficulty, WHITE, gameManager)
 | 
			
		||||
                # mm = MiniMax()
 | 
			
		||||
                # value, newBoard = mm.AI(difficulty, WHITE, gameManager)
 | 
			
		||||
                # gameManager.aiMove(newBoard)
 | 
			
		||||
                # reward, newBoard = rl.AI(gameManager.board)
 | 
			
		||||
                if newBoard is None:
 | 
			
		||||
                actionSpace = rl.encodeMoves(WHITE, gameManager.board)
 | 
			
		||||
                if len(actionSpace) == 0:
 | 
			
		||||
                    print("Cannot make move")
 | 
			
		||||
                    continue
 | 
			
		||||
                totalMoves = len(actionSpace)
 | 
			
		||||
                # moves = np.squeeze(moves)
 | 
			
		||||
                moves = np.pad(actionSpace, (0, rl.maxSize - totalMoves), 'constant', constant_values=(1, 1))
 | 
			
		||||
                act_values = model.predict(rl.normalise(moves))
 | 
			
		||||
                val = np.argmax(act_values[0])
 | 
			
		||||
                val = val if val < totalMoves else totalMoves - 1
 | 
			
		||||
                reward, newBoard, done = gameManager.board.step(actionSpace[val], WHITE)
 | 
			
		||||
 | 
			
		||||
                # if newBoard is None:
 | 
			
		||||
                #     print("Cannot make move")
 | 
			
		||||
                #     continue
 | 
			
		||||
                gameManager.aiMove(newBoard)
 | 
			
		||||
                #
 | 
			
		||||
 | 
			
		||||
            gameManager.update()
 | 
			
		||||
            pygame.display.update()
 | 
			
		||||
@ -206,7 +222,10 @@ def game(difficulty):
 | 
			
		||||
            score += reward
 | 
			
		||||
 | 
			
		||||
            if gameManager.winner() is not None:
 | 
			
		||||
                print(gameManager.winner())
 | 
			
		||||
                print("Green" if gameManager.winner() == GREEN else "White", " wins")
 | 
			
		||||
                with open("winners.txt", "a+") as f:
 | 
			
		||||
                    f.write(str(gameManager.winner()) + "\n")
 | 
			
		||||
                winners.append(gameManager.winner())
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            # for event in pygame.event.get():
 | 
			
		||||
@ -221,9 +240,16 @@ def game(difficulty):
 | 
			
		||||
            gameManager.update()
 | 
			
		||||
            pygame.display.update()
 | 
			
		||||
 | 
			
		||||
        if gameManager.winner() is None:
 | 
			
		||||
            with open("winners.txt", "a+") as f:
 | 
			
		||||
                f.write(str(0) + "\n")
 | 
			
		||||
            winners.append(0)
 | 
			
		||||
        gameManager.reset()
 | 
			
		||||
        rl.resetScore()
 | 
			
		||||
        print("Game: ", i, " Reward: ", score)
 | 
			
		||||
        with open("rewards.txt", "a+") as f:
 | 
			
		||||
            f.write(str(score) + "\n")
 | 
			
		||||
 | 
			
		||||
        totalReward.append(score)
 | 
			
		||||
        # save model weights every 25 games
 | 
			
		||||
        if i % 250 == 0 and i != 0:
 | 
			
		||||
@ -237,5 +263,12 @@ def game(difficulty):
 | 
			
		||||
    plt.ylabel("Reward")
 | 
			
		||||
    plt.show()
 | 
			
		||||
 | 
			
		||||
    fig, ax = plt.subplots()
 | 
			
		||||
    bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
 | 
			
		||||
    ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
 | 
			
		||||
    ax.set_title("Winners")
 | 
			
		||||
    ax.bar_label(bar)
 | 
			
		||||
    plt.show()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
main(3)
 | 
			
		||||
 | 
			
		||||
@ -37,9 +37,10 @@ class ReinforcementLearning():
 | 
			
		||||
        self.maxSize = 32
 | 
			
		||||
        self.epsilonMin = .01
 | 
			
		||||
        self.epsilonDecay = .995
 | 
			
		||||
        self.learningRate = 0.001
 | 
			
		||||
        self.learningRate = 0.0001
 | 
			
		||||
        self.memory = deque(maxlen=10000000)
 | 
			
		||||
        self.model = self._buildMainModel()
 | 
			
		||||
        self.model = self.buildMainModel()
 | 
			
		||||
        print(self.model.summary())
 | 
			
		||||
 | 
			
		||||
    def AI(self, board: Board) -> tuple:
 | 
			
		||||
        """
 | 
			
		||||
@ -48,7 +49,7 @@ class ReinforcementLearning():
 | 
			
		||||
        """
 | 
			
		||||
        self.board = board
 | 
			
		||||
        self.state = self._convertState(self.board.board)
 | 
			
		||||
        self.actionSpace = self._encodeMoves(self.colour, self.board)
 | 
			
		||||
        self.actionSpace = self.encodeMoves(self.colour, self.board)
 | 
			
		||||
        if len(self.actionSpace) == 0:
 | 
			
		||||
            return self.score, None
 | 
			
		||||
 | 
			
		||||
@ -61,7 +62,7 @@ class ReinforcementLearning():
 | 
			
		||||
 | 
			
		||||
        return self.score, nextState
 | 
			
		||||
 | 
			
		||||
    def _buildMainModel(self) -> Sequential:
 | 
			
		||||
    def buildMainModel(self) -> Sequential:
 | 
			
		||||
        """
 | 
			
		||||
        Build the model for the AI
 | 
			
		||||
        :return: the model
 | 
			
		||||
@ -69,26 +70,24 @@ class ReinforcementLearning():
 | 
			
		||||
        # Board model
 | 
			
		||||
        modelLayers = [
 | 
			
		||||
            Lambda(lambda x: tf.reshape(x, [-1, 32])),
 | 
			
		||||
            Dense(256, activation='relu'),
 | 
			
		||||
            Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(128, activation='relu'),
 | 
			
		||||
            Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(64, activation='relu'),
 | 
			
		||||
            Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
 | 
			
		||||
            Dropout(0.2),
 | 
			
		||||
            Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))
 | 
			
		||||
            Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
 | 
			
		||||
        ]
 | 
			
		||||
        boardModel = Sequential(modelLayers)
 | 
			
		||||
 | 
			
		||||
        # boardModel.add(BatchNormalization())
 | 
			
		||||
        boardModel.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error')
 | 
			
		||||
        boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
 | 
			
		||||
        boardModel.build(input_shape=(None, None))
 | 
			
		||||
 | 
			
		||||
        print(boardModel.summary())
 | 
			
		||||
 | 
			
		||||
        return boardModel
 | 
			
		||||
 | 
			
		||||
    def _replay(self) -> None:
 | 
			
		||||
@ -111,7 +110,7 @@ class ReinforcementLearning():
 | 
			
		||||
        # Encoded moves
 | 
			
		||||
        encodedMoves = []
 | 
			
		||||
        for state in states:
 | 
			
		||||
            encodedMoves.append(self._encodeMoves(self.colour, state))
 | 
			
		||||
            encodedMoves.append(self.encodeMoves(self.colour, state))
 | 
			
		||||
 | 
			
		||||
        # Calculate targets
 | 
			
		||||
        targets = []
 | 
			
		||||
@ -126,7 +125,7 @@ class ReinforcementLearning():
 | 
			
		||||
        encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
 | 
			
		||||
                                 for m in encodedMoves])
 | 
			
		||||
        targets = np.array(targets)
 | 
			
		||||
        self.model.fit(self._normalise(encodedMoves), self._normalise(targets), epochs=20)
 | 
			
		||||
        self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
 | 
			
		||||
        if self.epsilon > self.epsilonMin:
 | 
			
		||||
            self.epsilon *= self.epsilonDecay
 | 
			
		||||
 | 
			
		||||
@ -160,8 +159,10 @@ class ReinforcementLearning():
 | 
			
		||||
            return self.actionSpace[0]
 | 
			
		||||
        encodedMoves = np.squeeze(self.actionSpace)
 | 
			
		||||
        encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
 | 
			
		||||
        act_values = self.model.predict(self._normalise(encodedMoves))
 | 
			
		||||
        return self.actionSpace[np.argmax(act_values[0])]
 | 
			
		||||
        act_values = self.model.predict(self.normalise(encodedMoves))
 | 
			
		||||
        val = np.argmax(act_values[0])
 | 
			
		||||
        val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
 | 
			
		||||
        return self.actionSpace[val]
 | 
			
		||||
 | 
			
		||||
    def resetScore(self):
 | 
			
		||||
        self.score = 0
 | 
			
		||||
@ -209,20 +210,14 @@ class ReinforcementLearning():
 | 
			
		||||
 | 
			
		||||
    def _maxNextQ(self) -> float:
 | 
			
		||||
        colour = WHITE if self.colour == GREEN else GREEN
 | 
			
		||||
        encodedMoves = self._encodeMoves(colour, self.board)
 | 
			
		||||
        encodedMoves = self.encodeMoves(colour, self.board)
 | 
			
		||||
        if len(encodedMoves) == 0:
 | 
			
		||||
            return -1
 | 
			
		||||
        paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
 | 
			
		||||
        # paddedMoves = np.reshape(paddedMoves, (32, 8, 8))
 | 
			
		||||
        # paddedMoves = paddedMoves / np.max(paddedMoved
 | 
			
		||||
        # paddedMoves = paddedMoves.reshape(32,)
 | 
			
		||||
        # pm = tf.convert_to_tensor(paddedMoves, dtype=tf.float32)
 | 
			
		||||
        # pm = tf.reshape(pm, [32])
 | 
			
		||||
        print(paddedMoves.shape)
 | 
			
		||||
        nextQValues = self.model.predict_on_batch(self._normalise(paddedMoves))
 | 
			
		||||
        nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
 | 
			
		||||
        return np.max(nextQValues)
 | 
			
		||||
 | 
			
		||||
    def _encodeMoves(self, colour: int, board: Board) -> list:
 | 
			
		||||
    def encodeMoves(self, colour: int, board: Board) -> list:
 | 
			
		||||
        """
 | 
			
		||||
        Encodes the moves into a list encoded moves
 | 
			
		||||
        :param colour: colour of the player
 | 
			
		||||
@ -243,10 +238,8 @@ class ReinforcementLearning():
 | 
			
		||||
        diff = np.nonzero(diff)
 | 
			
		||||
        return diff
 | 
			
		||||
 | 
			
		||||
    def _normalise(self, data):
 | 
			
		||||
    def normalise(self, data):
 | 
			
		||||
        """
 | 
			
		||||
        Normalise the data
 | 
			
		||||
        """
 | 
			
		||||
        for i in range(len(data)):
 | 
			
		||||
            data[i] = data[i] / np.linalg.norm(data[i])
 | 
			
		||||
        return data
 | 
			
		||||
        return data / 10000
 | 
			
		||||
							
								
								
									
										27
									
								
								results.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								results.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,27 @@
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
 | 
			
		||||
from utilities.constants import GREEN, WHITE
 | 
			
		||||
 | 
			
		||||
# winners = []
 | 
			
		||||
with open("winners.txt") as f:
 | 
			
		||||
    winners = f.readlines()
 | 
			
		||||
 | 
			
		||||
winners = [int(x.strip()) for x in winners]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
fig, ax = plt.subplots()
 | 
			
		||||
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
 | 
			
		||||
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
 | 
			
		||||
ax.set_title("Winners")
 | 
			
		||||
ax.bar_label(bar)
 | 
			
		||||
plt.show()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
with open("rewardsA.txt") as f:
 | 
			
		||||
    totalReward = f.readlines()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
plt.plot([i for i in range(len(totalReward))], totalReward)
 | 
			
		||||
plt.xlabel("Games")
 | 
			
		||||
plt.ylabel("Reward")
 | 
			
		||||
plt.show()
 | 
			
		||||
@ -63,19 +63,44 @@ class Board:
 | 
			
		||||
            if piece != 0:
 | 
			
		||||
                if piece.colour == GREEN:
 | 
			
		||||
                    self.greenLeft -= 1
 | 
			
		||||
                    return
 | 
			
		||||
                    continue
 | 
			
		||||
                self.whiteLeft -= 1
 | 
			
		||||
 | 
			
		||||
    def getAllMoves(self, colour):
 | 
			
		||||
        moves = []
 | 
			
		||||
        possibleMoves = []
 | 
			
		||||
        possiblePieces = []
 | 
			
		||||
        pieces = self.getAllPieces(colour)
 | 
			
		||||
        hasForcedCapture = False
 | 
			
		||||
 | 
			
		||||
        for piece in self.getAllPieces(colour):
 | 
			
		||||
        for piece in pieces:
 | 
			
		||||
            validMoves = self.getValidMoves(piece)
 | 
			
		||||
            for move, skip in validMoves.items():
 | 
			
		||||
                tempBoard = deepcopy(self)
 | 
			
		||||
                tempPiece = tempBoard.getPiece(piece.row, piece.col)
 | 
			
		||||
                newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
 | 
			
		||||
                moves.append(newBoard)
 | 
			
		||||
 | 
			
		||||
            # Check if there are forced capture moves for this piece
 | 
			
		||||
            forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
 | 
			
		||||
            if forcedCaptureMoves:
 | 
			
		||||
                hasForcedCapture = True
 | 
			
		||||
                possiblePieces.append(piece)
 | 
			
		||||
                possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
 | 
			
		||||
 | 
			
		||||
        if hasForcedCapture:
 | 
			
		||||
            # If there are forced capture moves, consider only those
 | 
			
		||||
            for i in range(len(possibleMoves)):
 | 
			
		||||
                for move, skip in possibleMoves[i].items():
 | 
			
		||||
                    tempBoard = deepcopy(self)
 | 
			
		||||
                    tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
 | 
			
		||||
                    newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
 | 
			
		||||
                    moves.append(newBoard)
 | 
			
		||||
        else:
 | 
			
		||||
            # If no forced capture moves, consider all valid moves
 | 
			
		||||
            for piece in pieces:
 | 
			
		||||
                validMoves = self.getValidMoves(piece)
 | 
			
		||||
                for move, skip in validMoves.items():
 | 
			
		||||
                    tempBoard = deepcopy(self)
 | 
			
		||||
                    tempPiece = tempBoard.getPiece(piece.row, piece.col)
 | 
			
		||||
                    newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
 | 
			
		||||
                    moves.append(newBoard)
 | 
			
		||||
 | 
			
		||||
        return moves
 | 
			
		||||
 | 
			
		||||
    def _simulateMove(self, piece, move, board, skip):
 | 
			
		||||
@ -134,6 +159,7 @@ class Board:
 | 
			
		||||
                forcedCapture = forced
 | 
			
		||||
        else:
 | 
			
		||||
            forcedCapture = forced
 | 
			
		||||
 | 
			
		||||
        return forcedCapture
 | 
			
		||||
 | 
			
		||||
    def scoreOfTheBoard(self):
 | 
			
		||||
@ -241,7 +267,7 @@ class Board:
 | 
			
		||||
    def _decode(self, move):
 | 
			
		||||
        # Split digits back out
 | 
			
		||||
        str_code = str(move)
 | 
			
		||||
        print(str_code)
 | 
			
		||||
        # print(str_code)
 | 
			
		||||
        start_row = int(str_code[0])
 | 
			
		||||
        start_col = int(str_code[1])
 | 
			
		||||
        end_row = int(str_code[2])
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user