Final code version before submitting

2023-09-18 20:11:39 +01:00
parent e34df3a166
commit 5b253369ee
20 changed files with 1663 additions and 1495 deletions
@@ -22,8 +22,8 @@ class ReinforcementLearning():
    def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
        """
        Constructor for the ReinforcementLearning class
-        :param actionSpace: the number of possible actions
-        :param board: the game board
+        :param actionSpace: The number of possible actions
+        :param board: The game board
        """
        self.gameManager = gameManager
        self.actionSpace = actionSpace
@@ -33,7 +33,7 @@ class ReinforcementLearning():
        self.score = 0
        self.epsilon = 1
        self.gamma = .95
-        self.batchSize = 256
+        self.batchSize = 512
        self.maxSize = 32
        self.epsilonMin = .01
        self.epsilonDecay = .995
@@ -42,10 +42,10 @@ class ReinforcementLearning():
        self.model = self.buildMainModel()
        print(self.model.summary())

-    def AI(self, board: Board) -> tuple:
+    def AITrain(self, board: Board) -> tuple:
        """
        Learns to play the draughts game
-        :return: the loss
+        :return: The loss
        """
        self.board = board
        self.state = self._convertState(self.board.board)
@@ -62,10 +62,29 @@ class ReinforcementLearning():

        return self.score, nextState

+    def AITest(self, board: Board) -> Board:
+        """
+        Runs the AI
+        :param board: The board
+        :return: The new board
+        """
+        actionSpace = self.encodeMoves(WHITE, board)
+        if len(actionSpace) == 0:
+            print("Cannot make move")
+            return None
+        totalMoves = len(actionSpace)
+        # moves = np.squeeze(moves)
+        moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
+        act_values = self.model.predict(self.normalise(moves))
+        val = np.argmax(act_values[0])
+        val = val if val < totalMoves else totalMoves - 1
+        reward, newBoard, done = board.step(actionSpace[val], WHITE)
+        return newBoard
+
    def buildMainModel(self) -> Sequential:
        """
        Build the model for the AI
-        :return: the model
+        :return: The model
        """
        # Board model
        modelLayers = [
@@ -93,7 +112,7 @@ class ReinforcementLearning():
    def _replay(self) -> None:
        """
        trains the model
-        :return: None (void)
+        :return: None
        """
        if len(self.memory) < self.batchSize:
            # Not enough data to replay and test the model
@@ -132,19 +151,19 @@ class ReinforcementLearning():
    def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
        """
        Remembers what it has learnt
-        :param state: the current state
-        :param action: the action taken
-        :param reward: the reward for the action
-        :param nextState: the next state
-        :param done: whether the game is finished
-        :return: None (void)
+        :param state: The current state
+        :param action: The action taken
+        :param reward: The reward for the action
+        :param nextState: The next state
+        :param done: Whether the game is finished
+        :return: None
        """
        self.memory.append((state, action, reward, nextState, done))

    def _act(self) -> Any:
        """
        Chooses an action based on the available moves
-        :return: the action
+        :return: The action
        """
        if np.random.rand() <= self.epsilon:
            # choose a random action from the action spaces list
@@ -159,12 +178,16 @@ class ReinforcementLearning():
            return self.actionSpace[0]
        encodedMoves = np.squeeze(self.actionSpace)
        encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
-        act_values = self.model.predict(self.normalise(encodedMoves))
-        val = np.argmax(act_values[0])
+        actValues = self.model.predict(self.normalise(encodedMoves))
+        val = np.argmax(actValues[0])
        val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
        return self.actionSpace[val]

-    def resetScore(self):
+    def resetScore(self) -> None:
+        """
+        Resets the score
+        :return: None
+        """
        self.score = 0

    def _convertState(self, board: list) -> list:
@@ -195,9 +218,9 @@ class ReinforcementLearning():
    def _encode(self, start: tuple, end: tuple) -> int:
        """
        Encodes the move into an integer
-        :param start: tuple of start position
-        :param end: tuple of end position
-        :return: encoded move
+        :param start: Tuple of start position
+        :param end: Tuple of end position
+        :return: Encoded move
        """
        start_row = start[0]
        start_col = end[0]
@@ -209,6 +232,10 @@ class ReinforcementLearning():
        return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))

    def _maxNextQ(self) -> float:
+        """
+        Calculates the max Q value for the next state
+        :return: the max Q value
+        """
        colour = WHITE if self.colour == GREEN else GREEN
        encodedMoves = self.encodeMoves(colour, self.board)
        if len(encodedMoves) == 0:
@@ -220,9 +247,9 @@ class ReinforcementLearning():
    def encodeMoves(self, colour: int, board: Board) -> list:
        """
        Encodes the moves into a list encoded moves
-        :param colour: colour of the player
-        :param board: the board
-        :return: list of encoded moves
+        :param colour: Colour of the player
+        :param board: The board
+        :return: list Of encoded moves
        """
        encodedMoves = []
        moves = board.getAllMoves(colour)
@@ -231,15 +258,23 @@ class ReinforcementLearning():
            encodedMoves.append(self._encode(where[0]+1, where[1]+1))
        return encodedMoves

-    def _boardDiff(self, board, move):
+    def _boardDiff(self, board: Board, move: Board) -> np.array:
+        """
+        Finds the difference between the two boards
+        :param board: The current board
+        :param move:  The new board
+        :return: the difference between the two boards
+        """
        cnvState = np.array(self._convertState(board.board))
        cnvMove = np.array(self._convertState(move.board))
        diff = np.subtract(cnvMove, cnvState)
        diff = np.nonzero(diff)
        return diff

-    def normalise(self, data):
+    def normalise(self, data: np.array) -> np.array:
        """
        Normalise the data
+        :param data: the data to normalise
+        :return: normalised data
        """
        return data / 10000