Merge pull request 'reinforcement-learning' (#1) from reinforcement-learning into master

Reviewed-on: #1
2023-09-28 23:59:04 +01:00 · 2023-09-28 23:59:04 +01:00 · 7e9b0a475e
commit 7e9b0a475e
parent 1eb0a04f30 5b253369ee
24 changed files with 2514 additions and 401 deletions
--- a/.idea/draughts.iml
+++ b/.idea/draughts.iml
@ -4,7 +4,7 @@
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/venv" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/other.xml
+++ b/.idea/other.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PySciProjectComponent">
+    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
+  </component>
+</project>
--- a/Report.pdf
+++ b/Report.pdf
--- a/changeInRewards-3.txt
+++ b/changeInRewards-3.txt
@ -0,0 +1,100 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
--- a/changeInRewards-5.txt
+++ b/changeInRewards-5.txt
@ -0,0 +1,100 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
--- a/main.py
+++ b/main.py
@ -1,7 +1,10 @@
 import sys

 import pygame
+import numpy as np
+from matplotlib import pyplot as plt

+from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
 from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
 from utilities.gameManager import GameManager
 from minimax.minimaxAlgo import MiniMax
@ -11,21 +14,45 @@ WIN = pygame.display.set_mode((WIDTH, HEIGHT))
 pygame.display.set_caption("Draughts")


-def getRowColFromMouse(pos):
+def getRowColFromMouse(pos: dict) -> tuple:
+    """
+    Gets the row and column from the mouse position
+    :param pos: X and Y position of the mouse
+    :return: Row and column
+    """
    x, y = pos
    row = y // SQUARE_SIZE
    col = x // SQUARE_SIZE
    return row, col


-def drawText(text, font, color, surface, x, y):
-    textobj = font.render(text, 1, color)
+def drawText(text: str, font:  pygame.font.SysFont, colour: tuple, surface: pygame.display, x: float, y: int) -> None:
+    """
+    Draws text on the screen
+    :param text: Text to draw
+    :param font: System font
+    :param colour: Colour of the text
+    :param surface: The display surface
+    :param x: X position of the text
+    :param y: Y position of the text
+    :return None
+    """
+    textobj = font.render(text, 1, colour)
    textrect = textobj.get_rect()
    textrect.topleft = (x, y)
    surface.blit(textobj, textrect)


-def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
+def drawMultiLineText(surface: pygame.display, text: str, pos: dict, font: pygame.font.SysFont, colour: tuple = pygame.Color('black')) -> None:
+    """
+    Draws multiline text on the screen
+    :param surface: the display surface
+    :param text: text to draw
+    :param pos: X and Y position of the text
+    :param font: System font
+    :param colour: colour of the text
+    :return None
+    """
    words = [word.split(' ') for word in text.splitlines()]  # 2D array where each row is a list of words.
    space = font.size(' ')[0]  # The width of a space.
    max_width, max_height = surface.get_size()
@ -33,7 +60,7 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
    word_height = None
    for line in words:
        for word in line:
-            word_surface = font.render(word, 0, color)
+            word_surface = font.render(word, 0, colour)
            word_width, word_height = word_surface.get_size()
            if x + word_width >= max_width:
                x = pos[0]  # Reset the x.
@ -44,80 +71,89 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
        y += word_height  # Start on new row.


-def main():
+def main(difficulty: int = 0) -> None:
+    """
+    Main function, that shows the menu before running the game
+    :param difficulty: difficulty of minimax
+    :return: None
+    """
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    menuClock = pygame.time.Clock()
    click = False
    width = screen.get_width()
-    font = pygame.font.SysFont(None, 25)
-    difficulty = 0
+    font = pygame.font.SysFont("", 25)

-    while True:
-        # menu
-        screen.fill((128, 128, 128))
-        drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
+    if difficulty == 0:
+        while True:
+            # menu
+            screen.fill((128, 128, 128))
+            drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)

-        mx, my = pygame.mouse.get_pos()
+            mx, my = pygame.mouse.get_pos()

-        easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
-        pygame.draw.rect(screen, (0, 255, 0), easy)
-        drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
-        medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
-        pygame.draw.rect(screen, (255, 125, 0), medium)
-        drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
-        hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
-        pygame.draw.rect(screen, (255, 0, 0), hard)
-        drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
-        rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
-        pygame.draw.rect(screen, (0, 0, 255), rules)
-        drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
-        quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
-        pygame.draw.rect(screen, (0, 0, 0), quitGame)
-        drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
+            easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
+            pygame.draw.rect(screen, (0, 255, 0), easy)
+            drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
+            medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
+            pygame.draw.rect(screen, (255, 125, 0), medium)
+            drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
+            hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
+            pygame.draw.rect(screen, (255, 0, 0), hard)
+            drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
+            rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
+            pygame.draw.rect(screen, (0, 0, 255), rules)
+            drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
+            quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
+            pygame.draw.rect(screen, (0, 0, 0), quitGame)
+            drawText("quit", font, (255, 255, 255), screen, width / 2, 500)

-        if easy.collidepoint((mx, my)):
-            if click:
-                difficulty = 1
-                break
-        if medium.collidepoint((mx, my)):
-            if click:
-                difficulty = 3
-                break
-        if hard.collidepoint((mx, my)):
-            if click:
-                difficulty = 5
-                break
-        if rules.collidepoint((mx, my)):
-            if click:
-                rulesGUI()
-                break
-        if quitGame.collidepoint((mx, my)):
-            if click:
-                pygame.quit()
-                sys.exit()
-        click = False
-        for event in pygame.event.get():
-            if event.type == pygame.QUIT:
-                pygame.quit()
-                sys.exit()
-            if event.type == pygame.MOUSEBUTTONDOWN:
-                if event.button == 1:
-                    click = True
+            if easy.collidepoint((mx, my)):
+                if click:
+                    difficulty = 1
+                    break
+            if medium.collidepoint((mx, my)):
+                if click:
+                    difficulty = 3
+                    break
+            if hard.collidepoint((mx, my)):
+                if click:
+                    difficulty = 5
+                    break
+            if rules.collidepoint((mx, my)):
+                if click:
+                    rulesGUI()
+                    break
+            if quitGame.collidepoint((mx, my)):
+                if click:
+                    pygame.quit()
+                    sys.exit()
+            click = False
+            for event in pygame.event.get():
+                if event.type == pygame.QUIT:
+                    pygame.quit()
+                    sys.exit()
+                if event.type == pygame.MOUSEBUTTONDOWN:
+                    if event.button == 1:
+                        click = True

-        pygame.display.update()
-        menuClock.tick(60)
-    if difficulty != 0:
-        game(difficulty)
+            pygame.display.update()
+            menuClock.tick(60)
+
+    game(difficulty)


-def rulesGUI():
+def rulesGUI() -> None:
+    """
+    Shows the rules of the game
+    :return: None
+    """
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    menuClock = pygame.time.Clock()
    click = False
    width = screen.get_width()
-    titleFont = pygame.font.SysFont(None, 48)
-    font = pygame.font.SysFont(None, 21)
+    titleFont = pygame.font.SysFont("", 48)
+    font = pygame.font.SysFont("", 21)
    while True:
        screen.fill((128, 128, 128))
        drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@ -171,43 +207,116 @@ multi-jump until the next move.""", (50, 50), font)
        menuClock.tick(60)


-def game(difficulty):
+def game(difficulty: int) -> None:
+    """
+    Runs the game with the given difficulty. Used for training and testing the RL algorithm
+    :param difficulty: The difficulty of the minimax algorithm
+    """
    run = True
    clock = pygame.time.Clock()
    gameManager = GameManager(WIN, GREEN)
+    rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
+    # model = rl.buildMainModel()
+    rl.model.load_weights("./modelWeights/model_final.h5")
+    mm = MiniMax()
+    totalReward = []
+    winners = []
+    for i in range(50):
+        score = 0
+        for j in range(200):
+            print(j)
+            clock.tick(FPS)
+            reward = 0
+            if gameManager.turn == WHITE:
+                # mm = MiniMax()
+                # value, newBoard = mm.AI(difficulty, WHITE, gameManager)
+                # gameManager.aiMove(newBoard)
+                # reward, newBoard = rl.AITrain(gameManager.board)
+                newBoard = rl.AITest(gameManager.board)

-    while run:
-        clock.tick(FPS)
+                if newBoard is None:
+                    print("Cannot make move")
+                    continue
+                gameManager.aiMove(newBoard)

-        if gameManager.turn == WHITE:
-            mm = MiniMax()
-            value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
-            gameManager.aiMove(newBoard)
-            # time.sleep(0.15)
+            gameManager.update()
+            pygame.display.update()

-        if gameManager.turn == GREEN:
-            mm = MiniMax()
-            value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
-            gameManager.aiMove(newBoard)
-            # time.sleep(0.15)
+            if gameManager.turn == GREEN:
+                value, newBoard = mm.AI(difficulty, GREEN, gameManager)
+                gameManager.aiMove(newBoard)

-        if gameManager.winner() != None:
-            print(gameManager.winner())
-            run = False
+            score += reward

-        for event in pygame.event.get():
-            if event.type == pygame.QUIT:
-                run = False
-            if event.type == pygame.MOUSEBUTTONDOWN:
-                pos = pygame.mouse.get_pos()
-                row, col = getRowColFromMouse(pos)
-                # if gameManager.turn == GREEN:
-                gameManager.select(row, col)
+            if gameManager.winner() is not None:
+                print("Green" if gameManager.winner() == GREEN else "White", " wins")
+                # with open(f"winners-{difficulty}.txt", "a+") as f:
+                #     f.write(str(gameManager.winner()) + "\n")
+                winners.append(gameManager.winner())
+                break

-        gameManager.update()
-        pygame.display.update()
+            # for event in pygame.event.get():
+            #     if event.type == pygame.QUIT:
+            #         break
+            #     if event.type == pygame.MOUSEBUTTONDOWN:
+            #         pos = pygame.mouse.get_pos()
+            #         row, col = getRowColFromMouse(pos)
+            #         # if gameManager.turn == GREEN:
+            #         gameManager.select(row, col)

+            gameManager.update()
+            pygame.display.update()
+
+        if gameManager.winner() is None:
+            # with open(f"winners-{difficulty}.txt", "a+") as f:
+            #     f.write(str(0) + "\n")
+            winners.append(0)
+        gameManager.reset()
+        rl.resetScore()
+        print("Game: ", i, " Reward: ", score)
+        # with open(f"rewards-{difficulty}.txt", "a+") as f:
+        #     f.write(str(score) + "\n")
+
+        totalReward.append(score)
+        # save model weights every 25 games
+        # if i % 250 == 0 and i != 0:
+        #     rl.model.save("./modelWeights/model_" + str(i) + ".h5")
    # pygame.quit()

+    # rl.model.save("./modelWeights/model_final.h5")
+    change_in_rewards = [0]  # Initialize with 0 for the first episode
+    for i in range(1, len(totalReward)):
+        change_in_reward = totalReward[i] - totalReward[i - 1]
+        change_in_rewards.append(change_in_reward)

-main()
+    # with open(f"changeInRewards-{difficulty}.txt", "a+") as f:
+    #     for i in change_in_rewards:
+    #         f.write(str(i) + "\n")
+
+    # episodes = list(range(1, len(totalReward) + 1))
+    #
+    # plt.plot(episodes, change_in_rewards)
+    # plt.xlabel('Training Games')
+    # plt.ylabel('Change in Game Reward')
+    # plt.title('Change in Game Reward vs. Training Games')
+    # plt.grid(True)
+    # plt.show()
+    #
+    # plt.plot([i for i in range(len(totalReward))], totalReward)
+    # plt.xlabel("Games")
+    # plt.ylabel("Reward")
+    # plt.show()
+
+    fig, ax = plt.subplots()
+    bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+    ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
+    ax.set_title(f"Winners for difficulty — {difficulty}")
+    ax.bar_label(bar)
+    plt.show()
+
+
+# difficulties = [3, 5, 7, 9]
+#
+# for diff in difficulties:
+#     main(diff)
+main(3)
--- a/minimax/minimaxAlgo.py
+++ b/minimax/minimaxAlgo.py
@ -1,56 +1,46 @@
 import random
-from copy import deepcopy
 from math import inf

 from utilities.constants import GREEN, WHITE
+from utilities.gameManager import GameManager


-class MiniMax():
+class MiniMax:

-    def AI(self, board, depth, maxPlayer, gameManager):
-        if depth == 0 or board.winner() is not None:
-            return board.scoreOfTheBoard(), board
+    def AI(self, depth: int, maxPlayer: int, gameManager: GameManager) -> tuple:
+        """
+        The minimax algorithm
+        :param depth: How deep the algorithm should go
+        :param maxPlayer: The current player
+        :param gameManager: The game manager
+        :return: the best evaluation and board
+        """
+        if depth == 0 or gameManager.board.winner() is not None:
+            return gameManager.board.scoreOfTheBoard(), gameManager.board

-        if maxPlayer:
+        if type(maxPlayer) == int:
            maxEval = -inf
            bestMove = None
-            for move in self.getAllMoves(board, maxPlayer):
-                evaluation = self.AI(move, depth - 1, False, gameManager)[0]
+            for move in gameManager.board.getAllMoves(maxPlayer):
+                evaluation = self.AI(depth - 1, False, gameManager)[0]
                maxEval = max(maxEval, evaluation)
                if maxEval > evaluation:
                    bestMove = move
                if maxEval == evaluation:
+                    # bestMove = move
                    bestMove = bestMove if random.choice([True, False]) else move
            return maxEval, bestMove
        else:
            minEval = inf
            bestMove = None
            colour = WHITE if gameManager.turn == GREEN else GREEN
-            for move in self.getAllMoves(board, colour):
-                evaluation = self.AI(move, depth - 1, True, gameManager)[0]
+            for move in gameManager.board.getAllMoves(colour):
+                evaluation = self.AI(depth - 1, True, gameManager)[0]
                minEval = min(minEval, evaluation)
                if minEval < evaluation:
                    bestMove = move
                if minEval == evaluation:
+                    # bestMove = move
                    bestMove = bestMove if random.choice([True, False]) else move

            return minEval, bestMove
-
-    def _simulateMove(self, piece, move, board, skip):
-        board.move(piece, move[0], move[1])
-        if skip:
-            board.remove(skip)
-
-        return board
-
-    def getAllMoves(self, board, colour):
-        moves = []
-
-        for piece in board.getAllPieces(colour):
-            validMoves = board.getValidMoves(piece)
-            for move, skip in validMoves.items():
-                tempBoard = deepcopy(board)
-                tempPiece = tempBoard.getPiece(piece.row, piece.col)
-                newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
-                moves.append(newBoard)
-        return moves
--- a/modelWeights/model_250.h5
+++ b/modelWeights/model_250.h5
--- a/modelWeights/model_final.h5
+++ b/modelWeights/model_final.h5
--- a/reinforcementLearning/ReinforcementLearning.py
+++ b/reinforcementLearning/ReinforcementLearning.py
@ -1,96 +1,280 @@
 import random
 from collections import deque
+from typing import Any
+from copy import deepcopy

 import numpy as np
 import tensorflow as tf
-from tensorflow.python.keras import Sequential, regularizers
-from tensorflow.python.keras.layers import Dense
+from keras.engine.input_layer import InputLayer
+from keras.layers import BatchNormalization
+from tensorflow.python.keras import Sequential, regularizers, Input
+from tensorflow.python.keras.layers import Dense, Lambda, Dropout
+from tensorflow.python.keras.optimizer_v2.adam import Adam
+
+from minimax.minimaxAlgo import MiniMax
+from utilities import Board
+from utilities.constants import WHITE, GREEN
+from utilities.gameManager import GameManager


 class ReinforcementLearning():

-    def __init__(self, action_space, state_space, env):
-        self.action_space = action_space
-        self.state_space = state_space
-        self.env = env
+    def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
+        """
+        Constructor for the ReinforcementLearning class
+        :param actionSpace: The number of possible actions
+        :param board: The game board
+        """
+        self.gameManager = gameManager
+        self.actionSpace = actionSpace
+        self.board = board
+        self.state = self.board.board
+        self.colour = colour
+        self.score = 0
        self.epsilon = 1
        self.gamma = .95
-        self.batch_size = 64
-        self.epsilon_min = .01
-        self.epsilon_decay = .995
-        self.learning_rate = 0.001
-        self.memory = deque(maxlen=100000)
-        self.model = self._buildModel()
+        self.batchSize = 512
+        self.maxSize = 32
+        self.epsilonMin = .01
+        self.epsilonDecay = .995
+        self.learningRate = 0.0001
+        self.memory = deque(maxlen=10000000)
+        self.model = self.buildMainModel()
+        print(self.model.summary())

-    def AI(self, episode):
-        loss = []
+    def AITrain(self, board: Board) -> tuple:
+        """
+        Learns to play the draughts game
+        :return: The loss
+        """
+        self.board = board
+        self.state = self._convertState(self.board.board)
+        self.actionSpace = self.encodeMoves(self.colour, self.board)
+        if len(self.actionSpace) == 0:
+            return self.score, None

-        max_steps = 1000
+        action = self._act()
+        reward, nextState, done = self.board.step(action, self.colour)
+        self.score += reward
+        self.state = self._convertState(nextState.board)
+        self._remember(deepcopy(self.board), action, reward, self.state, done)
+        self._replay()

-        for e in range(episode):
-            state = self.env.reset()
-            state = np.reshape(state, (1, self.state_space))
-            score = 0
-            for i in range(max_steps):
-                action = self.act(state)
-                reward, next_state, done = self.env.step(action)
-                score += reward
-                next_state = np.reshape(next_state, (1, self.state_space))
-                self.remember(state, action, reward, next_state, done)
-                state = next_state
-                self.replay()
-                if done:
-                    print("episode: {}/{}, score: {}".format(e, episode, score))
-                    break
-            loss.append(score)
+        return self.score, nextState

-    def _buildModel(self):
+    def AITest(self, board: Board) -> Board:
+        """
+        Runs the AI
+        :param board: The board
+        :return: The new board
+        """
+        actionSpace = self.encodeMoves(WHITE, board)
+        if len(actionSpace) == 0:
+            print("Cannot make move")
+            return None
+        totalMoves = len(actionSpace)
+        # moves = np.squeeze(moves)
+        moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
+        act_values = self.model.predict(self.normalise(moves))
+        val = np.argmax(act_values[0])
+        val = val if val < totalMoves else totalMoves - 1
+        reward, newBoard, done = board.step(actionSpace[val], WHITE)
+        return newBoard
+
+    def buildMainModel(self) -> Sequential:
+        """
+        Build the model for the AI
+        :return: The model
+        """
        # Board model
-        board_model = Sequential()
+        modelLayers = [
+            Lambda(lambda x: tf.reshape(x, [-1, 32])),
+            Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+            Dropout(0.2),
+            Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+            Dropout(0.2),
+            Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+            Dropout(0.2),
+            Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+            Dropout(0.2),
+            Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+            Dropout(0.2),
+            Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
+        ]
+        boardModel = Sequential(modelLayers)

-        # input dimensions is 32 board position values
-        board_model.add(Dense(64, activation='relu', input_dim=32))
+        # boardModel.add(BatchNormalization())
+        boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
+        boardModel.build(input_shape=(None, None))

-        # use regularizers, to prevent fitting noisy labels
-        board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
-        board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))  # 16
-        board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))  # 8
+        return boardModel

-        # output isn't squashed, because it might lose information
-        board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
-        board_model.compile(optimizer='nadam', loss='binary_crossentropy')
-
-        return board_model
-
-    def remember(self, state, action, reward, next_state, done):
-        self.memory.append((state, action, reward, next_state, done))
-
-    def replay(self):
-        if len(self.memory) < self.batch_size:
+    def _replay(self) -> None:
+        """
+        trains the model
+        :return: None
+        """
+        if len(self.memory) < self.batchSize:
+            # Not enough data to replay and test the model
            return

-        minibatch = random.sample(self.memory, self.batch_size)
-        states = np.array([i[0] for i in minibatch])
-        actions = np.array([i[1] for i in minibatch])
-        rewards = np.array([i[2] for i in minibatch])
-        next_states = np.array([i[3] for i in minibatch])
-        dones = np.array([i[4] for i in minibatch])
+        # Get a random sample from the memory
+        minibatch = random.sample(self.memory, int(self.maxSize))

-        states = np.squeeze(states)
-        next_states = np.squeeze(next_states)
+        # Extract states, rewards, dones
+        states = [m[0] for m in minibatch]
+        rewards = [m[2] for m in minibatch]
+        dones = [m[4] for m in minibatch]

-        targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
-        targets_full = self.model.predict_on_batch(states)
+        # Encoded moves
+        encodedMoves = []
+        for state in states:
+            encodedMoves.append(self.encodeMoves(self.colour, state))

-        ind = np.array([i for i in range(self.batch_size)])
-        targets_full[[ind], [actions]] = targets
+        # Calculate targets
+        targets = []
+        for i, moves in enumerate(encodedMoves):
+            if dones[i]:
+                target = rewards[i]
+            else:
+                target = rewards[i] + self.gamma * self._maxNextQ()

-        self.model.fit(states, targets_full, epochs=1, verbose=0)
-        if self.epsilon > self.epsilon_min:
-            self.epsilon *= self.epsilon_decay
+            targets.append(target)

-    def act(self, state):
+        encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
+                                 for m in encodedMoves])
+        targets = np.array(targets)
+        self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
+        if self.epsilon > self.epsilonMin:
+            self.epsilon *= self.epsilonDecay
+
+    def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
+        """
+        Remembers what it has learnt
+        :param state: The current state
+        :param action: The action taken
+        :param reward: The reward for the action
+        :param nextState: The next state
+        :param done: Whether the game is finished
+        :return: None
+        """
+        self.memory.append((state, action, reward, nextState, done))
+
+    def _act(self) -> Any:
+        """
+        Chooses an action based on the available moves
+        :return: The action
+        """
        if np.random.rand() <= self.epsilon:
-            return random.randrange(self.action_space)
-        act_values = self.model.predict(state)
-        return np.argmax(act_values[0])
+            # choose a random action from the action spaces list
+            mm = MiniMax()
+            value, newBoard = mm.AI(3, self.colour, self.gameManager)
+            if newBoard is None:
+                return random.choice(self.actionSpace)
+            where = self._boardDiff(self.board, newBoard)
+            return self._encode(where[0]+1, where[1]+1)
+
+        if len(self.actionSpace) == 1:
+            return self.actionSpace[0]
+        encodedMoves = np.squeeze(self.actionSpace)
+        encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
+        actValues = self.model.predict(self.normalise(encodedMoves))
+        val = np.argmax(actValues[0])
+        val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
+        return self.actionSpace[val]
+
+    def resetScore(self) -> None:
+        """
+        Resets the score
+        :return: None
+        """
+        self.score = 0
+
+    def _convertState(self, board: list) -> list:
+        """
+        Converts the board into a 2D list of numbers
+        :param board: 2D list of pieces
+        :return: new 2D list of numbers
+        """
+        num_board = []
+
+        for row in board:
+            num_row = []
+            for piece in row:
+                if piece == 0:
+                    num_row.append(0)
+                    continue
+
+                if piece.colour == 1:
+                    num_row.append(1)
+                    continue
+
+                num_row.append(2)
+
+            num_board.append(num_row)
+
+        return num_board
+
+    def _encode(self, start: tuple, end: tuple) -> int:
+        """
+        Encodes the move into an integer
+        :param start: Tuple of start position
+        :param end: Tuple of end position
+        :return: Encoded move
+        """
+        start_row = start[0]
+        start_col = end[0]
+
+        end_row = start[-1]
+        end_col = end[-1]
+
+        # Concatenate into integer
+        return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
+
+    def _maxNextQ(self) -> float:
+        """
+        Calculates the max Q value for the next state
+        :return: the max Q value
+        """
+        colour = WHITE if self.colour == GREEN else GREEN
+        encodedMoves = self.encodeMoves(colour, self.board)
+        if len(encodedMoves) == 0:
+            return -1
+        paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
+        nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
+        return np.max(nextQValues)
+
+    def encodeMoves(self, colour: int, board: Board) -> list:
+        """
+        Encodes the moves into a list encoded moves
+        :param colour: Colour of the player
+        :param board: The board
+        :return: list Of encoded moves
+        """
+        encodedMoves = []
+        moves = board.getAllMoves(colour)
+        for move in moves:
+            where = self._boardDiff(board, move)
+            encodedMoves.append(self._encode(where[0]+1, where[1]+1))
+        return encodedMoves
+
+    def _boardDiff(self, board: Board, move: Board) -> np.array:
+        """
+        Finds the difference between the two boards
+        :param board: The current board
+        :param move:  The new board
+        :return: the difference between the two boards
+        """
+        cnvState = np.array(self._convertState(board.board))
+        cnvMove = np.array(self._convertState(move.board))
+        diff = np.subtract(cnvMove, cnvState)
+        diff = np.nonzero(diff)
+        return diff
+
+    def normalise(self, data: np.array) -> np.array:
+        """
+        Normalise the data
+        :param data: the data to normalise
+        :return: normalised data
+        """
+        return data / 10000
--- a/results.py
+++ b/results.py
@ -0,0 +1,80 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from utilities.constants import GREEN, WHITE
+
+# winners = []
+with open("winners-5.txt", "r") as f:
+    winners = f.readlines()
+
+winners = [int(x.strip()) for x in winners]
+
+# lavg = []
+# for i in range(0, len(winners), 25):
+#     lavg.append(winners[i:i+25].count(2) / 25)
+#
+# x = np.arange(0, len(lavg))
+# y = np.array(lavg) * 100
+#
+# a, b = np.polyfit(x, y, 1)
+#
+# fig, ax = plt.subplots(figsize=(10, 5))
+# ax.plot(y)
+# ax.set_xticks(np.arange(0, len(lavg), 2))
+# ax.minorticks_on()
+# ax.plot(x, a*x+b, color='red', linestyle='--', linewidth=2)
+# ax.set_ylim([0, 100])
+# ax.set_title("Winners Average")
+# ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
+# ax.grid(which='minor', linestyle=':', linewidth='0.5')
+# ax.set_xlabel("Average Set")
+# ax.set_ylabel("Percentage of Wins")
+# ax.tick_params(which="minor", bottom=False, left=False)
+# plt.show()
+
+fig, ax = plt.subplots()
+bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 100])
+ax.set_title("Winners at Depth 5")
+ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey', axis='y')
+ax.bar_label(bar)
+plt.show()
+
+# with open("trainedRewards.txt", "r") as f:
+#     totalReward = f.readlines()
+#
+# totalReward = [float(x.strip()) for x in totalReward]
+# filteredReward = list(filter(lambda x: x > -1500, totalReward))
+
+# change_in_rewards = [0]  # Initialize with 0 for the first episode
+# for i in range(1, len(totalReward)):
+#     change_in_reward = totalReward[i] - totalReward[i - 1]
+#     change_in_rewards.append(change_in_reward)
+#
+# games = list(range(1, len(totalReward) + 1))
+
+# plt.plot(games, change_in_rewards)
+# plt.xlabel('Training Games')
+# plt.ylabel('Change in Game Reward')
+# plt.title('Change in Game Reward vs. Training Games')
+# plt.grid(True)
+# plt.show()
+# major_ticks = np.arange(0, 101, 20)
+# minor_ticks = np.arange(0, 101, 5)
+#
+# plt.plot([i for i in range(len(totalReward))], totalReward)
+# plt.title("Rewards to Games")
+# plt.xlabel("Games")
+# plt.ylabel("Reward")
+# plt.xticks(major_ticks)
+# plt.xticks(minor_ticks, minor=True)
+# plt.yticks(major_ticks)
+# plt.yticks(minor_ticks, minor=True)
+# plt.grid(which='both')
+# plt.show()
+#
+# plt.plot([i for i in range(len(filteredReward))], filteredReward)
+# plt.title("Filtered Rewards to Games")
+# plt.xlabel("Games")
+# plt.ylabel("Reward")
+# plt.grid(which='both')
+# plt.show()
--- a/rewards-5.txt
+++ b/rewards-5.txt
@ -0,0 +1,56 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
--- a/rewardsA.png
+++ b/rewardsA.png
--- a/run.sh
+++ b/run.sh
@ -0,0 +1,2 @@
+conda activate
+python main.py
--- a/trainedRewards.txt
+++ b/trainedRewards.txt
@ -0,0 +1,500 @@
+180.5
+115.19999999999999
+-155.39999999999998
+-5169.4000000000015
+100.0
+-3354.2999999999956
+123.79999999999998
+-1738.0
+261.40000000000015
+120.89999999999999
+147.80000000000004
+108.0
+113.50000000000001
+110.5000000000002
+-1048.3000000000006
+75.8
+232.70000000000016
+89.10000000000001
+279.9000000000002
+165.40000000000003
+85.4
+34.20000000000016
+266.20000000000016
+101.69999999999999
+283.0
+-264.5
+225.0
+328.0
+215.5
+150.0
+-217.5
+-2920.0
+82.5
+-208.5
+150.5
+196.5
+223.0
+265.5
+-282.5
+175.5
+206.5
+221.5
+127.5
+-6337.5
+147.5
+231.5
+137.5
+-180.5
+108.0
+-339.5
+190.0
+-69.0
+52.5
+58.0
+-5575.0
+-159.5
+197.5
+177.5
+-5547.5
+-65.5
+136.5
+292.5
+-169.5
+185.0
+115.5
+198.0
+30.0
+162.5
+95.5
+170.0
+113.0
+-1405.0
+-27.0
+-4832.199999999999
+147.5
+228.0
+59.0
+262.5
+-220.0
+150.5
+177.5
+140.0
+123.0
+119.0
+137.5
+134.0
+175.5
+-5598.5
+46.5
+135.0
+205.0
+186.5
+177.5
+120.1
+332.5
+162.5
+122.5
+262.5
+-70.0
+159.0
+138.0
+240.5
+215.0
+147.5
+-118.0
+260.5
+199.0
+130.0
+265.0
+142.5
+230.0
+135.0
+197.5
+-179.5
+198.0
+288.0
+200.5
+-222.5
+165.5
+139.0
+228.0
+211.5
+197.5
+102.5
+233.0
+95.5
+-129.0
+187.5
+158.0
+295.0
+240.5
+-222.5
+-1841.5
+198.0
+113.0
+305.0
+-482.5
+125.5
+215.0
+110.0
+-180.0
+170.0
+-62.5
+215.5
+132.5
+187.5
+135.0
+-65.0
+138.0
+-1972.0
+240.5
+-237.5
+610.0
+267.5
+52.5
+-211.5
+217.5
+88.0
+305.5
+165.5
+115.0
+182.5
+-69.5
+333.0
+363.0
+112.5
+-15.5
+150.5
+118.0
+-52.5
+318.0
+174.0
+198.0
+-5705.0
+160.5
+155.0
+125.0
+165.0
+259.0
+165.5
+155.0
+-236.0
+220.5
+-15.5
+117.5
+367.5
+237.5
+255.0
+85.0
+-5342.5
+141.5
+-3582.5
+-600.0
+915.5
+179.0
+190.0
+-47.5
+275.5
+-5.0
+195.0
+128.0
+146.5
+750.5
+153.0
+-5157.5
+-279.5
+219.0
+154.0
+153.0
+-234.5
+248.0
+182.5
+122.5
+155.5
+1078.0
+102.5
+358.0
+152.5
+261.5
+239.0
+128.0
+111.5
+93.0
+310.5
+-87.0
+158.0
+113.0
+165.5
+120.0
+256.5
+90.5
+245.0
+159.0
+160.0
+-5272.0
+-88.5
+159.0
+169.0
+147.5
+-1149.5
+-372.0
+-270.0
+95.0
+142.5
+212.5
+154.0
+425.0
+153.0
+213.0
+280.5
+-80.5
+-45.90000000000003
+-2250.5
+123.50000000000003
+149.40000000000006
+219.0
+108.0
+180.0
+271.19999999999993
+202.5
+121.8000000000001
+47.599999999999966
+-35.0
+281.5
+307.5
+99.80000000000001
+154.0
+166.30000000000004
+271.5
+205.5
+145.5
+265.0
+113.0
+144.0
+88.0
+-204.5
+204.0
+215.0
+177.5
+168.0
+263.0
+66.5
+258.0
+-5477.5
+94.5
+-139.0
+190.5
+160.0
+-35.5
+149.0
+100.5
+130.0
+-40.0
+175.0
+132.5
+107.5
+143.0
+-5097.5
+97.5
+-1880.0
+-15.0
+213.0
+-601.0
+282.5
+276.5
+113.0
+106.5
+-1011.5
+128.0
+150.0
+145.5
+233.0
+209.0
+136.5
+240.0
+7.5
+-1535.0
+238.0
+185.0
+157.5
+-1660.0
+-15.5
+-145.0
+178.0
+-4997.5
+182.5
+197.5
+355.5
+130.0
+232.5
+-5420.0
+190.0
+128.0
+115.0
+2.5
+149.0
+220.0
+-87.0
+-447.5
+-4122.5
+-67.5
+-425.0
+283.0
+925.0
+49.5
+-15.0
+233.0
+215.5
+234.0
+154.0
+141.5
+226.5
+220.0
+110.5
+270.0
+253.0
+-1944.0
+215.0
+250.5
+155.0
+260.5
+185.0
+261.5
+232.5
+177.5
+-97.5
+-196.0
+230.0
+205.5
+-367.0
+265.5
+180.0
+135.5
+139.0
+103.0
+314.0
+192.5
+179.0
+97.5
+52.5
+135.0
+184.0
+-305.0
+147.5
+206.5
+157.5
+243.0
+-6125.0
+257.5
+125.60000000000002
+190.0
+-6225.0
+96.5
+350.0
+193.0
+185.5
+206.5
+223.0
+-225.0
+117.5
+170.0
+223.0
+175.5
+210.0
+-222.0
+148.60000000000002
+-133.0
+-193.5
+152.5
+-152.0
+-6245.0
+-2.0
+50.5
+-140.5
+185.5
+125.5
+208.0
+-200.0
+202.5
+112.5
+119.0
+210.5
+-1199.5
+-2.5
+102.5
+0.5
+275.5
+135.5
+-32.5
+235.5
+-617.0
+110.0
+222.5
+-372.0
+-53.0
+306.5
+117.5
+-5095.0
+223.0
+-257.0
+-5760.0
+11.5
+182.5
+160.0
+325.5
+151.5
+-327.5
+-5655.0
+62.5
+-5550.0
+195.0
+92.5
+-5290.0
+215.0
+59.0
+179.0
+188.0
+-2103.5
+253.0
+118.0
+335.0
+85.0
+207.5
+229.0
+152.5
+-188.5
+-177.5
+274.0
+220.0
+-5169.5
+128.0
+-1260.0
+140.0
+147.5
+140.0
+-505.0
+155.0
+225.5
+188.0
+131.5
+1111.5
+180.0
+-4897.5
+-687.5
+125.0
+180.0
+111.5
+-5582.5
+232.5
+153.0
+-130.5
+102.5
+189.0
+157.5
+-5685.0
+325.0
+-6870.0
+-520.5
+-3027.0
+32.5
--- a/trainedWinners.txt
+++ b/trainedWinners.txt
@ -0,0 +1,500 @@
+2
+0
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+2
+1
+2
+2
+0
+2
+2
+2
+0
+1
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+2
+1
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+22
+2
+2
+1
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+1
+1
+2
+2
+1
+2
+2
+2
+1
+2
+2
+2
+2
+0
+2
+2
+2
+2
+1
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+1
+1
+2
+2
+0
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+1
+2
+0
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+3
+2
+2
+2
+2
+2
+1
+2
+2
+2
+0
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+0
+2
+2
+0
+2
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+0
+2
+2
+1
+2
+0
+2
+0
+2
+0
+1
+2
+2
--- a/utilities/Board.py
+++ b/utilities/Board.py
@ -0,0 +1,390 @@
+from __future__ import annotations
+
+import pygame
+from copy import deepcopy
+from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
+from .piece import Piece
+
+
+class Board:
+    def __init__(self) -> None:
+        """
+        Constructor for the Board class
+        :return: None
+        """
+        self.board = []
+        self.greenLeft = self.whiteLeft = 12
+        self.greenKings = self.whiteKings = 0
+        self.green = (144, 184, 59)
+        self._createBoard()
+
+    def _drawSquares(self, win: pygame.display) -> None:
+        """
+        Draws the squares on the board
+        :param win: The window
+        """
+        win.fill(BLACK)
+        for row in range(ROWS):
+            for col in range(row % 2, ROWS, 2):
+                pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
+
+    def _createBoard(self) -> None:
+        """
+        Creates a board representation of the game
+        :return: None
+        """
+        for row in range(ROWS):
+            self.board.append([])
+            for col in range(COLS):
+                if col % 2 == ((row + 1) % 2):
+                    if row < 3:
+                        self.board[row].append(Piece(row, col, WHITE))
+                        continue
+
+                    if row > 4:
+                        self.board[row].append(Piece(row, col, GREEN))
+                        continue
+
+                    self.board[row].append(0)
+                    continue
+
+                self.board[row].append(0)
+
+    def draw(self, win: pygame.display) -> None:
+        """
+        Draws the pieces on the board
+        :param win: The window
+        :return: None
+        """
+        self._drawSquares(win)
+        for row in range(ROWS):
+            for col in range(COLS):
+                piece = self.board[row][col]
+                if piece != 0:
+                    piece.draw(win)
+
+    def move(self, piece: Piece, row: int, col: int) -> None:
+        """
+        Moves a piece and make it a king if it reaches the end of the board
+        :param piece: Piece to move
+        :param row: Row to move to
+        :param col: Column to move to
+        :return: None
+        """
+        self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
+        piece.move(row, col)
+
+        if row == ROWS - 1 or row == 0:
+            piece.makeKing()
+
+            if piece.colour == WHITE:
+                self.whiteKings += 1
+
+            if piece.colour == GREEN:
+                self.greenKings += 1
+
+    def remove(self, skipped: tuple) -> None:
+        """
+        Removes a piece from the board
+        :param skipped: A tuple of the piece to remove
+        """
+        for piece in skipped:
+            self.board[piece.row][piece.col] = 0
+            if piece != 0:
+                if piece.colour == GREEN:
+                    self.greenLeft -= 1
+                    continue
+                self.whiteLeft -= 1
+
+    def getAllMoves(self, colour: int) -> list:
+        """
+        Gets all the possible moves for a player
+        :param colour: colour of the player
+        :return:
+        """
+        moves = []
+        possibleMoves = []
+        possiblePieces = []
+        pieces = self.getAllPieces(colour)
+        hasForcedCapture = False
+
+        for piece in pieces:
+            validMoves = self.getValidMoves(piece)
+
+            # Check if there are forced capture moves for this piece
+            forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
+            if forcedCaptureMoves:
+                hasForcedCapture = True
+                possiblePieces.append(piece)
+                possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
+
+        if hasForcedCapture:
+            # If there are forced capture moves, consider only those
+            for i in range(len(possibleMoves)):
+                for move, skip in possibleMoves[i].items():
+                    tempBoard = deepcopy(self)
+                    tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
+                    newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+                    moves.append(newBoard)
+        else:
+            # If no forced capture moves, consider all valid moves
+            for piece in pieces:
+                validMoves = self.getValidMoves(piece)
+                for move, skip in validMoves.items():
+                    tempBoard = deepcopy(self)
+                    tempPiece = tempBoard.getPiece(piece.row, piece.col)
+                    newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+                    moves.append(newBoard)
+
+        return moves
+
+    def _simulateMove(self, piece: Piece, move: list, board: Board, skip: tuple) -> Board:
+        """
+        Simulates a move on the board
+        :param piece: Piece to move
+        :param move:  Move to make
+        :param board: Board to make the move on
+        :param skip: Tuple of pieces to skip
+        :return: Board after the move
+        """
+        board.move(piece, move[0], move[1])
+        if skip:
+            board.remove(skip)
+
+        return board
+
+    def getPiece(self, row: int, col: int) -> Piece:
+        """
+        Gets a piece from the board
+        :param row: Row of the piece
+        :param col: Column of the piece
+        :return: Piece
+        """
+        return self.board[row][col]
+
+    def winner(self):
+        if self.greenLeft <= 0:
+            return WHITE
+
+        if self.whiteLeft <= 0:
+            return GREEN
+
+        return None
+
+    def getValidMoves(self, piece: Piece) -> dict:
+        """
+        Gets all the valid moves for a piece
+        :param piece: Piece to get the moves for
+        :return: dictionary of moves
+        """
+        moves = {}
+        forcedCapture = {}
+        left = piece.col - 1
+        right = piece.col + 1
+        row = piece.row
+        if piece.colour == GREEN or piece.king:
+            moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
+            moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
+        if piece.colour == WHITE or piece.king:
+            moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
+            moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
+
+        if len(moves.values()) <= 1:
+            return moves
+
+        movesValues = list(moves.values())
+        movesKeys = list(moves.keys())
+
+        forced = {}
+
+        for i in range(len(movesKeys)):
+            if not movesValues[i]:
+                forced[movesKeys[i]] = moves[movesKeys[i]]
+        if len(forced) != len(moves):
+            forced.clear()
+            for i in range(len(movesKeys)):
+                if movesValues[i]:
+                    forced[movesKeys[i]] = moves[movesKeys[i]]
+            if len(forced) != len(moves):
+                for i in range(len(movesKeys)):
+                    if movesValues[i]:
+                        forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
+            else:
+                forcedCapture = forced
+        else:
+            forcedCapture = forced
+
+        return forcedCapture
+
+    def scoreOfTheBoard(self) -> int:
+        """
+        Calculates the score of the board
+        :return: score of the board
+        """
+        return self.whiteLeft - self.greenLeft
+
+    def getAllPieces(self, colour):
+        """
+        Gets all the pieces of a player
+        :param colour: Piece colour
+        :return: Pieces of the player
+        """
+        pieces = []
+        for row in self.board:
+            for piece in row:
+                if piece != 0 and piece.colour == colour:
+                    pieces.append(piece)
+        return pieces
+
+    def _traverseLeft(self, start: int, stop: int, step: int, colour: int, left: int, skipped: list = []) -> dict:
+        """
+        Traverses the left side of the board
+        :param start: Start position
+        :param stop:  Stop position
+        :param step:  Step size
+        :param colour: colour of the player
+        :param left: Left position
+        :param skipped: List of pieces to skip
+        :return: dictionary of moves
+        """
+        moves = {}
+        last = []
+        for row in range(start, stop, step):
+            if left < 0:
+                break
+            mvs = self._traverse(row, left, skipped, moves, step, last, colour)
+            if mvs is None:
+                break
+            elif isinstance(mvs, list):
+                last = mvs
+            else:
+                moves.update(mvs)
+            left -= 1
+        return moves
+
+    def _traverseRight(self, start: int, stop: int, step: int, colour: int, right: int, skipped: list = []) -> dict:
+        """
+        Traverses the left side of the board
+        :param start: Start position
+        :param stop:  Stop position
+        :param step:  Step size
+        :param colour: colour of the player
+        :param right: Right position
+        :param skipped: List of pieces to skip
+        :return: dictionary of moves
+        """
+        moves = {}
+        last = []
+        for row in range(start, stop, step):
+            if right >= COLS:
+                break
+
+            mvs = self._traverse(row, right, skipped, moves, step, last, colour)
+            if mvs is None:
+                break
+            elif isinstance(mvs, list):
+                last = mvs
+            else:
+                moves.update(mvs)
+
+            right += 1
+        return moves
+
+    def _traverse(self, row: int, col: int, skipped: list, moves: dict, step: int, last: list, colour: int) -> list or None:
+        """
+        Traverses the board
+        :param row: Row to traverse
+        :param col: Column to traverse
+        :param skipped: List of pieces to jump
+        :param moves: Dictionary of moves
+        :param step: Step size
+        :param last: List of last pieces
+        :param colour: Colour of the player
+        :return: list of last pieces or None
+        """
+        current = self.board[row][col]
+        if current == 0:
+            if skipped and not last:
+                return None
+            elif skipped:
+                moves[(row, col)] = last + skipped
+            else:
+                moves[(row, col)] = last
+
+            if last:
+                if step == -1:
+                    rowCalc = max(row - 3, 0)
+                else:
+                    rowCalc = min(row + 3, ROWS)
+                moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
+                moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
+            return None
+        elif current.colour == colour:
+            return None
+        else:
+            last = [current]
+            return last
+
+    def step(self, move: int, colour: int) -> None:
+        """
+        Takes a move and executes it
+        :param move: The move to execute
+        :param colour: The colour of the player
+        :return: None
+        """
+        start, end = self._decode(move)
+        start[0] = start[0] - 1
+        start[1] = start[1] - 1
+        end[0] = end[0] - 1
+        end[1] = end[1] - 1
+        reward = 0
+        done = False
+        piece = self.getPiece(start[0], start[1])
+        if piece == 0:
+            newStart = end
+            end = start
+            start = newStart
+            piece = self.getPiece(start[0], start[1])
+        moves = self.getValidMoves(piece)
+        for move, skip in moves.items():
+            if tuple(end) == move:
+                self._simulateMove(piece, move, self, skip)
+                if len(skip) == 1:
+                    reward = 2
+                    break
+                if len(skip) > 1:
+                    reward = 3 + len(skip) * 0.2
+                    break
+                reward = -0.5
+                break
+
+        if self.winner() == colour:
+            done = True
+            reward = 10
+        return reward, self, done
+
+
+    def _decode(self, move: int) -> tuple:
+        """
+        Decodes the move from a integer to a start and end tuple
+        :param move: The move to decode
+        :return: Start and end tuple
+        """
+        # Split digits back out
+        str_code = str(move)
+        # print(str_code)
+        start_row = int(str_code[0])
+        start_col = int(str_code[1])
+        end_row = int(str_code[2])
+        end_col = int(str_code[3])
+        # Reconstruct positions
+        start = [start_row, start_col]
+        end = [end_row, end_col]
+        return start, end
+
+    # def reset(self):
+    #     self.board = []
+    #     self.whiteLeft = self.greenLeft = 12
+    #     self.whiteKings = self.greenKings = 0
+    #     self._createBoard()
+    #     return self.board
--- a/utilities/board.py
+++ b/utilities/board.py
@ -1,185 +0,0 @@
-import pygame
-
-from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
-from .piece import Piece
-
-
-class Board:
-    def __init__(self):
-        self.board = []
-        self.greenLeft = self.whiteLeft = 12
-        self.greenKings = self.whiteKings = 0
-        self.createBoard()
-
-    def drawSquares(self, win):
-        win.fill(BLACK)
-        for row in range(ROWS):
-            for col in range(row % 2, ROWS, 2):
-                pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
-
-    def createBoard(self):
-        for row in range(ROWS):
-            self.board.append([])
-            for col in range(COLS):
-                if col % 2 == ((row + 1) % 2):
-                    if row < 3:
-                        self.board[row].append(Piece(row, col, WHITE))
-                    elif row > 4:
-                        self.board[row].append(Piece(row, col, GREEN))
-                    else:
-                        self.board[row].append(None)
-                else:
-                    self.board[row].append(None)
-
-    def draw(self, win):
-        self.drawSquares(win)
-        for row in range(ROWS):
-            for col in range(COLS):
-                piece = self.board[row][col]
-                if piece is not None:
-                    piece.draw(win)
-
-    def move(self, piece, row, col):
-        self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
-        piece.move(row, col)
-
-        if row == ROWS - 1 or row == 0:
-            piece.makeKing()
-        if piece.colour == WHITE:
-            self.whiteKings += 1
-        else:
-            self.greenKings += 1
-
-    def remove(self, skipped):
-        for piece in skipped:
-            self.board[piece.row][piece.col] = None
-            if piece is not None:
-                if piece.colour == GREEN:
-                    self.greenLeft -= 1
-                else:
-                    self.whiteLeft -= 1
-
-    def getPiece(self, row, col):
-        return self.board[row][col]
-
-    def winner(self):
-        if self.greenLeft <= 0:
-            return WHITE
-        elif self.whiteLeft <= 0:
-            return GREEN
-
-        return None
-
-    def getValidMoves(self, piece):
-        moves = {}
-        forcedCapture = {}
-        left = piece.col - 1
-        right = piece.col + 1
-        row = piece.row
-        if piece.colour == GREEN:
-            moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
-            moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
-        if piece.colour == WHITE:
-            moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
-            moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
-
-        if piece.king:
-            moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
-            moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
-            moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
-            moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
-
-        if len(moves.values()) <= 1:
-            return moves
-
-        movesValues = list(moves.values())
-        movesKeys = list(moves.keys())
-
-        forced = {}
-
-        for i in range(len(movesKeys)):
-            if not movesValues[i]:
-                forced[movesKeys[i]] = moves[movesKeys[i]]
-        if len(forced) != len(moves):
-            forced.clear()
-            for i in range(len(movesKeys)):
-                if movesValues[i]:
-                    forced[movesKeys[i]] = moves[movesKeys[i]]
-            if len(forced) != len(moves):
-                for i in range(len(movesKeys)):
-                    if movesValues[i]:
-                        forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
-            else:
-                forcedCapture = forced
-        else:
-            forcedCapture = forced
-        return forcedCapture
-
-    def scoreOfTheBoard(self):
-        return self.whiteLeft - self.greenLeft
-
-    def getAllPieces(self, colour):
-        pieces = []
-        for row in self.board:
-            for piece in row:
-                if piece is not None and piece.colour == colour:
-                    pieces.append(piece)
-        return pieces
-
-    def _traverseLeft(self, start, stop, step, colour, left, skipped=[]):
-        moves = {}
-        last = []
-        for row in range(start, stop, step):
-            if left < 0:
-                break
-            mvs = self._traverse(row, left, skipped, moves, step, last, colour)
-            if mvs is None:
-                break
-            elif isinstance(mvs, list):
-                last = mvs
-            else:
-                moves.update(mvs)
-            left -= 1
-        return moves
-
-    def _traverseRight(self, start, stop, step, colour, right, skipped=[]):
-        moves = {}
-        last = []
-        for row in range(start, stop, step):
-            if right >= COLS:
-                break
-
-            mvs = self._traverse(row, right, skipped, moves, step, last, colour)
-            if mvs is None:
-                break
-            elif isinstance(mvs, list):
-                last = mvs
-            else:
-                moves.update(mvs)
-
-            right += 1
-        return moves
-
-    def _traverse(self, row, col, skipped, moves, step, last, colour):
-        current = self.board[row][col]
-        if current is None:
-            if skipped and not last:
-                return None
-            elif skipped:
-                moves[(row, col)] = last + skipped
-            else:
-                moves[(row, col)] = last
-
-            if last:
-                if step == -1:
-                    rowCalc = max(row - 3, 0)
-                else:
-                    rowCalc = min(row + 3, ROWS)
-                moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
-                moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
-            return None
-        elif current.colour == colour:
-            return None
-        else:
-            last = [current]
-            return last
--- a/utilities/constants.py
+++ b/utilities/constants.py
@ -4,10 +4,10 @@ WIDTH, HEIGHT = 800, 800
 ROWS, COLS = 8, 8
 SQUARE_SIZE = WIDTH // COLS

-# RGB color
+# RGB colour

-GREEN = (144, 184, 59)
-WHITE = (255, 255, 255)
+GREEN = 1
+WHITE = 2
 BLACK = (0, 0, 0)
 BLUE = (0, 0, 255)
 GREY = (128, 128, 128)
--- a/utilities/gameManager.py
+++ b/utilities/gameManager.py
@ -1,42 +1,74 @@
+from __future__ import annotations
+
 import pygame
-from utilities.board import Board
+from utilities.Board import Board
 from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE

+
 class GameManager:
-    def __init__(self, win, colour):
+    def __init__(self, win: pygame.display, colour: int) -> None:
+        """
+        Constructor for the GameManager class
+        :param win: The window
+        :param colour: The colour of the player
+        """
        self._init(colour)
        self.win = win

-    def _init(self, colour):
+    def _init(self, colour: int) -> None:
+        """
+        Initializes the game
+        :param colour: the colour of the player
+        """
        self.selected = None
        self.board = Board()
        self.turn = colour
        self.validMoves = {}
        self.legCount = 0

-    def update(self):
+    def update(self) -> None:
+        """
+        Updates the GUI
+        return: None
+        """
        self.board.draw(self.win)
        self.drawValidMoves(self.validMoves)
        pygame.display.update()

-    def reset(self):
+    def reset(self) -> None:
+        """
+        Resets the game
+        :return: None
+        """
        self._init(self.turn)

-    def select(self, row, col):
+    def select(self, row: int, col: int) -> bool:
+        """
+        Selects a piece
+        :param row: Row of the piece
+        :param col: Column of the piece
+        :return: True
+        """
        if self.selected:
            result = self._move(row, col)
            if not result:
                self.selected = None
                self.select(row, col)
        piece = self.board.getPiece(row, col)
-        if piece is not None and piece.colour == self.turn:
+        if piece != 0 and piece.colour == self.turn:
            self.selected = piece
            self.validMoves = self.board.getValidMoves(piece)
            return True

-    def _move(self, row, col):
+    def _move(self, row: int, col: int) -> bool:
+        """
+        Moves a piece
+        :param row: Row of the piece
+        :param col: Column of the piece
+        :return: True if the move was successful, False otherwise
+        """
        piece = self.board.getPiece(row, col)
-        if self.selected and piece is None and (row, col) in self.validMoves:
+        if self.selected and piece == 0 and (row, col) in self.validMoves:
            self.board.move(self.selected, row, col)
            skipped = self.validMoves[row, col]
            if self.validMoves[list(self.validMoves.keys())[0]]:
@ -58,21 +90,39 @@ class GameManager:
        self.validMoves = {}
        if self.turn == GREEN:
            self.turn = WHITE
-        else:
-            self.turn = GREEN
+            return
+        self.turn = GREEN

-    def drawValidMoves(self, moves):
+    def drawValidMoves(self, moves: list) -> None:
+        """
+        Draws the valid moves
+        :param moves: list of valid moves
+        :return: None
+        """
        for row, col in moves:
            pygame.draw.circle(self.win, BLUE,
                               (col * SQUARE_SIZE + SQUARE_SIZE // 2, row * SQUARE_SIZE + SQUARE_SIZE // 2), 15)

-    def winner(self):
+    def winner(self) -> int or None:
+        """
+        Gets the winner
+        :return: The winner
+        """
        return self.board.winner()

-    def getBoard(self):
+    def getBoard(self) -> Board:
+        """
+        Gets the board
+        :return: The board
+        """
        return self.board

-    def aiMove(self, board):
+    def aiMove(self, board: Board) -> None:
+        """
+        Makes a move for the AI
+        :param board: The new board
+        :return: None
+        """
        if board is None:
            # colour = "green" if self.turn == GREEN else "white"
            # print("no move left for " + colour + " to make")
--- a/utilities/piece.py
+++ b/utilities/piece.py
@ -1,10 +1,16 @@
 import pygame.draw

-from utilities.constants import SQUARE_SIZE, GREY, CROWN
+from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN


 class Piece:
-    def __init__(self, row, col, colour):
+    def __init__(self, row: int, col: int, colour: int) -> None:
+        """
+        Initialises the piece class, which represents a piece on the board. Constructor for the piece class
+        :param row: Row of the piece
+        :param col: Column of the piece
+        :param colour: Colour of the piece
+        """
        self.row = row
        self.col = col
        self.colour = colour
@ -14,25 +20,50 @@ class Piece:
        self.calcPosition()
        self.padding = 20
        self.border = 2
+        self.green = (144, 184, 59)
+        self.white = (255, 255, 255)

-    def calcPosition(self):
+    def calcPosition(self) -> None:
+        """
+        Calculates the position of the piece
+        :return: None
+        """
        self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
        self.y = SQUARE_SIZE * self.row + SQUARE_SIZE // 2

-    def makeKing(self):
+    def makeKing(self) -> None:
+        """
+        Makes the piece a king
+        :return: None
+        """
        self.king = True

-    def draw(self, win):
+    def draw(self, win) -> None:
+        """
+        Draws the piece
+        :param win: The window to draw the piece on
+        :return: None
+        """
        radius = SQUARE_SIZE // 2 - self.padding
        pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
-        pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
+        pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
        if self.king:
            win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))

-    def move(self, row, col):
+    def move(self, row: int, col: int) -> None:
+        """
+        Moves the piece to a new position
+        :param row: Row to move to
+        :param col: Column to move to
+        :return: None
+        """
        self.row = row
        self.col = col
        self.calcPosition()

-    def __repr__(self):
+    def __repr__(self) -> str:
+        """
+        String representation of the piece
+        :return: String representation of the colour
+        """
        return str(self.colour)
--- a/winners-3.txt
+++ b/winners-3.txt
@ -0,0 +1,100 @@
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+0
+0
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+1
+0
+2
+0
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
--- a/winners-5.txt
+++ b/winners-5.txt
@ -0,0 +1,100 @@
+2
+2
+1
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+0
+2
+2
+0
+2
+2
+0
+0
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+1
+0
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2