diff --git a/.idea/draughts.iml b/.idea/draughts.iml index 3c96bee..3858d78 100644 --- a/.idea/draughts.iml +++ b/.idea/draughts.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index d22234a..582a97a 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..a708ec7 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/Report.pdf b/Report.pdf deleted file mode 100644 index 3f309d9..0000000 Binary files a/Report.pdf and /dev/null differ diff --git a/changeInRewards-3.txt b/changeInRewards-3.txt new file mode 100644 index 0000000..47ba1a2 --- /dev/null +++ b/changeInRewards-3.txt @@ -0,0 +1,100 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/changeInRewards-5.txt b/changeInRewards-5.txt new file mode 100644 index 0000000..47ba1a2 --- /dev/null +++ b/changeInRewards-5.txt @@ -0,0 +1,100 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/main.py b/main.py index 921efef..7df3bca 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,10 @@ import sys import pygame +import numpy as np +from matplotlib import pyplot as plt +from reinforcementLearning.ReinforcementLearning import ReinforcementLearning from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN from utilities.gameManager import GameManager from minimax.minimaxAlgo import MiniMax @@ -11,21 +14,45 @@ WIN = pygame.display.set_mode((WIDTH, HEIGHT)) pygame.display.set_caption("Draughts") -def getRowColFromMouse(pos): +def getRowColFromMouse(pos: dict) -> tuple: + """ + Gets the row and column from the mouse position + :param pos: X and Y position of the mouse + :return: Row and column + """ x, y = pos row = y // SQUARE_SIZE col = x // SQUARE_SIZE return row, col -def drawText(text, font, color, surface, x, y): - textobj = font.render(text, 1, color) +def drawText(text: str, font: pygame.font.SysFont, colour: tuple, surface: pygame.display, x: float, y: int) -> None: + """ + Draws text on the screen + :param text: Text to draw + :param font: System font + :param colour: Colour of the text + :param surface: The display surface + :param x: X position of the text + :param y: Y position of the text + :return None + """ + textobj = font.render(text, 1, colour) textrect = textobj.get_rect() textrect.topleft = (x, y) surface.blit(textobj, textrect) -def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')): +def drawMultiLineText(surface: pygame.display, text: str, pos: dict, font: pygame.font.SysFont, colour: tuple = pygame.Color('black')) -> None: + """ + Draws multiline text on the screen + :param surface: the display surface + :param text: text to draw + :param pos: X and Y position of the text + :param font: System font + :param colour: colour of the text + :return None + """ words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words. space = font.size(' ')[0] # The width of a space. max_width, max_height = surface.get_size() @@ -33,7 +60,7 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')): word_height = None for line in words: for word in line: - word_surface = font.render(word, 0, color) + word_surface = font.render(word, 0, colour) word_width, word_height = word_surface.get_size() if x + word_width >= max_width: x = pos[0] # Reset the x. @@ -44,80 +71,89 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')): y += word_height # Start on new row. -def main(): +def main(difficulty: int = 0) -> None: + """ + Main function, that shows the menu before running the game + :param difficulty: difficulty of minimax + :return: None + """ pygame.init() screen = pygame.display.set_mode((WIDTH, HEIGHT)) menuClock = pygame.time.Clock() click = False width = screen.get_width() - font = pygame.font.SysFont(None, 25) - difficulty = 0 + font = pygame.font.SysFont("", 25) - while True: - # menu - screen.fill((128, 128, 128)) - drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20) + if difficulty == 0: + while True: + # menu + screen.fill((128, 128, 128)) + drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20) - mx, my = pygame.mouse.get_pos() + mx, my = pygame.mouse.get_pos() - easy = pygame.Rect(width / 2 - 50, 100, 200, 50) - pygame.draw.rect(screen, (0, 255, 0), easy) - drawText("easy", font, (255, 255, 255), screen, width / 2, 100) - medium = pygame.Rect(width / 2 - 50, 200, 200, 50) - pygame.draw.rect(screen, (255, 125, 0), medium) - drawText("medium", font, (255, 255, 255), screen, width / 2, 200) - hard = pygame.Rect(width / 2 - 50, 300, 200, 50) - pygame.draw.rect(screen, (255, 0, 0), hard) - drawText("hard", font, (255, 255, 255), screen, width / 2, 300) - rules = pygame.Rect(width / 2 - 50, 400, 200, 50) - pygame.draw.rect(screen, (0, 0, 255), rules) - drawText("rules", font, (255, 255, 255), screen, width / 2, 400) - quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50) - pygame.draw.rect(screen, (0, 0, 0), quitGame) - drawText("quit", font, (255, 255, 255), screen, width / 2, 500) + easy = pygame.Rect(width / 2 - 50, 100, 200, 50) + pygame.draw.rect(screen, (0, 255, 0), easy) + drawText("easy", font, (255, 255, 255), screen, width / 2, 100) + medium = pygame.Rect(width / 2 - 50, 200, 200, 50) + pygame.draw.rect(screen, (255, 125, 0), medium) + drawText("medium", font, (255, 255, 255), screen, width / 2, 200) + hard = pygame.Rect(width / 2 - 50, 300, 200, 50) + pygame.draw.rect(screen, (255, 0, 0), hard) + drawText("hard", font, (255, 255, 255), screen, width / 2, 300) + rules = pygame.Rect(width / 2 - 50, 400, 200, 50) + pygame.draw.rect(screen, (0, 0, 255), rules) + drawText("rules", font, (255, 255, 255), screen, width / 2, 400) + quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50) + pygame.draw.rect(screen, (0, 0, 0), quitGame) + drawText("quit", font, (255, 255, 255), screen, width / 2, 500) - if easy.collidepoint((mx, my)): - if click: - difficulty = 1 - break - if medium.collidepoint((mx, my)): - if click: - difficulty = 3 - break - if hard.collidepoint((mx, my)): - if click: - difficulty = 5 - break - if rules.collidepoint((mx, my)): - if click: - rulesGUI() - break - if quitGame.collidepoint((mx, my)): - if click: - pygame.quit() - sys.exit() - click = False - for event in pygame.event.get(): - if event.type == pygame.QUIT: - pygame.quit() - sys.exit() - if event.type == pygame.MOUSEBUTTONDOWN: - if event.button == 1: - click = True + if easy.collidepoint((mx, my)): + if click: + difficulty = 1 + break + if medium.collidepoint((mx, my)): + if click: + difficulty = 3 + break + if hard.collidepoint((mx, my)): + if click: + difficulty = 5 + break + if rules.collidepoint((mx, my)): + if click: + rulesGUI() + break + if quitGame.collidepoint((mx, my)): + if click: + pygame.quit() + sys.exit() + click = False + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + sys.exit() + if event.type == pygame.MOUSEBUTTONDOWN: + if event.button == 1: + click = True - pygame.display.update() - menuClock.tick(60) - if difficulty != 0: - game(difficulty) + pygame.display.update() + menuClock.tick(60) + + game(difficulty) -def rulesGUI(): +def rulesGUI() -> None: + """ + Shows the rules of the game + :return: None + """ screen = pygame.display.set_mode((WIDTH, HEIGHT)) menuClock = pygame.time.Clock() click = False width = screen.get_width() - titleFont = pygame.font.SysFont(None, 48) - font = pygame.font.SysFont(None, 21) + titleFont = pygame.font.SysFont("", 48) + font = pygame.font.SysFont("", 21) while True: screen.fill((128, 128, 128)) drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20) @@ -171,43 +207,116 @@ multi-jump until the next move.""", (50, 50), font) menuClock.tick(60) -def game(difficulty): +def game(difficulty: int) -> None: + """ + Runs the game with the given difficulty. Used for training and testing the RL algorithm + :param difficulty: The difficulty of the minimax algorithm + """ run = True clock = pygame.time.Clock() gameManager = GameManager(WIN, GREEN) + rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager) + # model = rl.buildMainModel() + rl.model.load_weights("./modelWeights/model_final.h5") + mm = MiniMax() + totalReward = [] + winners = [] + for i in range(50): + score = 0 + for j in range(200): + print(j) + clock.tick(FPS) + reward = 0 + if gameManager.turn == WHITE: + # mm = MiniMax() + # value, newBoard = mm.AI(difficulty, WHITE, gameManager) + # gameManager.aiMove(newBoard) + # reward, newBoard = rl.AITrain(gameManager.board) + newBoard = rl.AITest(gameManager.board) - while run: - clock.tick(FPS) + if newBoard is None: + print("Cannot make move") + continue + gameManager.aiMove(newBoard) - if gameManager.turn == WHITE: - mm = MiniMax() - value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager) - gameManager.aiMove(newBoard) - # time.sleep(0.15) + gameManager.update() + pygame.display.update() - if gameManager.turn == GREEN: - mm = MiniMax() - value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager) - gameManager.aiMove(newBoard) - # time.sleep(0.15) + if gameManager.turn == GREEN: + value, newBoard = mm.AI(difficulty, GREEN, gameManager) + gameManager.aiMove(newBoard) - if gameManager.winner() != None: - print(gameManager.winner()) - run = False + score += reward - for event in pygame.event.get(): - if event.type == pygame.QUIT: - run = False - if event.type == pygame.MOUSEBUTTONDOWN: - pos = pygame.mouse.get_pos() - row, col = getRowColFromMouse(pos) - # if gameManager.turn == GREEN: - gameManager.select(row, col) + if gameManager.winner() is not None: + print("Green" if gameManager.winner() == GREEN else "White", " wins") + # with open(f"winners-{difficulty}.txt", "a+") as f: + # f.write(str(gameManager.winner()) + "\n") + winners.append(gameManager.winner()) + break - gameManager.update() - pygame.display.update() + # for event in pygame.event.get(): + # if event.type == pygame.QUIT: + # break + # if event.type == pygame.MOUSEBUTTONDOWN: + # pos = pygame.mouse.get_pos() + # row, col = getRowColFromMouse(pos) + # # if gameManager.turn == GREEN: + # gameManager.select(row, col) + gameManager.update() + pygame.display.update() + + if gameManager.winner() is None: + # with open(f"winners-{difficulty}.txt", "a+") as f: + # f.write(str(0) + "\n") + winners.append(0) + gameManager.reset() + rl.resetScore() + print("Game: ", i, " Reward: ", score) + # with open(f"rewards-{difficulty}.txt", "a+") as f: + # f.write(str(score) + "\n") + + totalReward.append(score) + # save model weights every 25 games + # if i % 250 == 0 and i != 0: + # rl.model.save("./modelWeights/model_" + str(i) + ".h5") # pygame.quit() + # rl.model.save("./modelWeights/model_final.h5") + change_in_rewards = [0] # Initialize with 0 for the first episode + for i in range(1, len(totalReward)): + change_in_reward = totalReward[i] - totalReward[i - 1] + change_in_rewards.append(change_in_reward) -main() + # with open(f"changeInRewards-{difficulty}.txt", "a+") as f: + # for i in change_in_rewards: + # f.write(str(i) + "\n") + + # episodes = list(range(1, len(totalReward) + 1)) + # + # plt.plot(episodes, change_in_rewards) + # plt.xlabel('Training Games') + # plt.ylabel('Change in Game Reward') + # plt.title('Change in Game Reward vs. Training Games') + # plt.grid(True) + # plt.show() + # + # plt.plot([i for i in range(len(totalReward))], totalReward) + # plt.xlabel("Games") + # plt.ylabel("Reward") + # plt.show() + + fig, ax = plt.subplots() + bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)]) + ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500]) + ax.set_title(f"Winners for difficulty — {difficulty}") + ax.bar_label(bar) + plt.show() + + +# difficulties = [3, 5, 7, 9] +# +# for diff in difficulties: +# main(diff) +main(3) diff --git a/minimax/minimaxAlgo.py b/minimax/minimaxAlgo.py index 063b8cd..d6d2af0 100644 --- a/minimax/minimaxAlgo.py +++ b/minimax/minimaxAlgo.py @@ -1,56 +1,46 @@ import random -from copy import deepcopy from math import inf from utilities.constants import GREEN, WHITE +from utilities.gameManager import GameManager -class MiniMax(): +class MiniMax: - def AI(self, board, depth, maxPlayer, gameManager): - if depth == 0 or board.winner() is not None: - return board.scoreOfTheBoard(), board + def AI(self, depth: int, maxPlayer: int, gameManager: GameManager) -> tuple: + """ + The minimax algorithm + :param depth: How deep the algorithm should go + :param maxPlayer: The current player + :param gameManager: The game manager + :return: the best evaluation and board + """ + if depth == 0 or gameManager.board.winner() is not None: + return gameManager.board.scoreOfTheBoard(), gameManager.board - if maxPlayer: + if type(maxPlayer) == int: maxEval = -inf bestMove = None - for move in self.getAllMoves(board, maxPlayer): - evaluation = self.AI(move, depth - 1, False, gameManager)[0] + for move in gameManager.board.getAllMoves(maxPlayer): + evaluation = self.AI(depth - 1, False, gameManager)[0] maxEval = max(maxEval, evaluation) if maxEval > evaluation: bestMove = move if maxEval == evaluation: + # bestMove = move bestMove = bestMove if random.choice([True, False]) else move return maxEval, bestMove else: minEval = inf bestMove = None colour = WHITE if gameManager.turn == GREEN else GREEN - for move in self.getAllMoves(board, colour): - evaluation = self.AI(move, depth - 1, True, gameManager)[0] + for move in gameManager.board.getAllMoves(colour): + evaluation = self.AI(depth - 1, True, gameManager)[0] minEval = min(minEval, evaluation) if minEval < evaluation: bestMove = move if minEval == evaluation: + # bestMove = move bestMove = bestMove if random.choice([True, False]) else move return minEval, bestMove - - def _simulateMove(self, piece, move, board, skip): - board.move(piece, move[0], move[1]) - if skip: - board.remove(skip) - - return board - - def getAllMoves(self, board, colour): - moves = [] - - for piece in board.getAllPieces(colour): - validMoves = board.getValidMoves(piece) - for move, skip in validMoves.items(): - tempBoard = deepcopy(board) - tempPiece = tempBoard.getPiece(piece.row, piece.col) - newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) - moves.append(newBoard) - return moves diff --git a/modelWeights/model_250.h5 b/modelWeights/model_250.h5 new file mode 100644 index 0000000..dc44e7a Binary files /dev/null and b/modelWeights/model_250.h5 differ diff --git a/modelWeights/model_final.h5 b/modelWeights/model_final.h5 new file mode 100644 index 0000000..f7dfd74 Binary files /dev/null and b/modelWeights/model_final.h5 differ diff --git a/reinforcementLearning/ReinforcementLearning.py b/reinforcementLearning/ReinforcementLearning.py index e554162..1addec2 100644 --- a/reinforcementLearning/ReinforcementLearning.py +++ b/reinforcementLearning/ReinforcementLearning.py @@ -1,96 +1,280 @@ import random from collections import deque +from typing import Any +from copy import deepcopy import numpy as np import tensorflow as tf -from tensorflow.python.keras import Sequential, regularizers -from tensorflow.python.keras.layers import Dense +from keras.engine.input_layer import InputLayer +from keras.layers import BatchNormalization +from tensorflow.python.keras import Sequential, regularizers, Input +from tensorflow.python.keras.layers import Dense, Lambda, Dropout +from tensorflow.python.keras.optimizer_v2.adam import Adam + +from minimax.minimaxAlgo import MiniMax +from utilities import Board +from utilities.constants import WHITE, GREEN +from utilities.gameManager import GameManager class ReinforcementLearning(): - def __init__(self, action_space, state_space, env): - self.action_space = action_space - self.state_space = state_space - self.env = env + def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None: + """ + Constructor for the ReinforcementLearning class + :param actionSpace: The number of possible actions + :param board: The game board + """ + self.gameManager = gameManager + self.actionSpace = actionSpace + self.board = board + self.state = self.board.board + self.colour = colour + self.score = 0 self.epsilon = 1 self.gamma = .95 - self.batch_size = 64 - self.epsilon_min = .01 - self.epsilon_decay = .995 - self.learning_rate = 0.001 - self.memory = deque(maxlen=100000) - self.model = self._buildModel() + self.batchSize = 512 + self.maxSize = 32 + self.epsilonMin = .01 + self.epsilonDecay = .995 + self.learningRate = 0.0001 + self.memory = deque(maxlen=10000000) + self.model = self.buildMainModel() + print(self.model.summary()) - def AI(self, episode): - loss = [] + def AITrain(self, board: Board) -> tuple: + """ + Learns to play the draughts game + :return: The loss + """ + self.board = board + self.state = self._convertState(self.board.board) + self.actionSpace = self.encodeMoves(self.colour, self.board) + if len(self.actionSpace) == 0: + return self.score, None - max_steps = 1000 + action = self._act() + reward, nextState, done = self.board.step(action, self.colour) + self.score += reward + self.state = self._convertState(nextState.board) + self._remember(deepcopy(self.board), action, reward, self.state, done) + self._replay() - for e in range(episode): - state = self.env.reset() - state = np.reshape(state, (1, self.state_space)) - score = 0 - for i in range(max_steps): - action = self.act(state) - reward, next_state, done = self.env.step(action) - score += reward - next_state = np.reshape(next_state, (1, self.state_space)) - self.remember(state, action, reward, next_state, done) - state = next_state - self.replay() - if done: - print("episode: {}/{}, score: {}".format(e, episode, score)) - break - loss.append(score) + return self.score, nextState - def _buildModel(self): + def AITest(self, board: Board) -> Board: + """ + Runs the AI + :param board: The board + :return: The new board + """ + actionSpace = self.encodeMoves(WHITE, board) + if len(actionSpace) == 0: + print("Cannot make move") + return None + totalMoves = len(actionSpace) + # moves = np.squeeze(moves) + moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1)) + act_values = self.model.predict(self.normalise(moves)) + val = np.argmax(act_values[0]) + val = val if val < totalMoves else totalMoves - 1 + reward, newBoard, done = board.step(actionSpace[val], WHITE) + return newBoard + + def buildMainModel(self) -> Sequential: + """ + Build the model for the AI + :return: The model + """ # Board model - board_model = Sequential() + modelLayers = [ + Lambda(lambda x: tf.reshape(x, [-1, 32])), + Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01)) + ] + boardModel = Sequential(modelLayers) - # input dimensions is 32 board position values - board_model.add(Dense(64, activation='relu', input_dim=32)) + # boardModel.add(BatchNormalization()) + boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error') + boardModel.build(input_shape=(None, None)) - # use regularizers, to prevent fitting noisy labels - board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01))) - board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16 - board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8 + return boardModel - # output isn't squashed, because it might lose information - board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))) - board_model.compile(optimizer='nadam', loss='binary_crossentropy') - - return board_model - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def replay(self): - if len(self.memory) < self.batch_size: + def _replay(self) -> None: + """ + trains the model + :return: None + """ + if len(self.memory) < self.batchSize: + # Not enough data to replay and test the model return - minibatch = random.sample(self.memory, self.batch_size) - states = np.array([i[0] for i in minibatch]) - actions = np.array([i[1] for i in minibatch]) - rewards = np.array([i[2] for i in minibatch]) - next_states = np.array([i[3] for i in minibatch]) - dones = np.array([i[4] for i in minibatch]) + # Get a random sample from the memory + minibatch = random.sample(self.memory, int(self.maxSize)) - states = np.squeeze(states) - next_states = np.squeeze(next_states) + # Extract states, rewards, dones + states = [m[0] for m in minibatch] + rewards = [m[2] for m in minibatch] + dones = [m[4] for m in minibatch] - targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones) - targets_full = self.model.predict_on_batch(states) + # Encoded moves + encodedMoves = [] + for state in states: + encodedMoves.append(self.encodeMoves(self.colour, state)) - ind = np.array([i for i in range(self.batch_size)]) - targets_full[[ind], [actions]] = targets + # Calculate targets + targets = [] + for i, moves in enumerate(encodedMoves): + if dones[i]: + target = rewards[i] + else: + target = rewards[i] + self.gamma * self._maxNextQ() - self.model.fit(states, targets_full, epochs=1, verbose=0) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay + targets.append(target) - def act(self, state): + encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1)) + for m in encodedMoves]) + targets = np.array(targets) + self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20) + if self.epsilon > self.epsilonMin: + self.epsilon *= self.epsilonDecay + + def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None: + """ + Remembers what it has learnt + :param state: The current state + :param action: The action taken + :param reward: The reward for the action + :param nextState: The next state + :param done: Whether the game is finished + :return: None + """ + self.memory.append((state, action, reward, nextState, done)) + + def _act(self) -> Any: + """ + Chooses an action based on the available moves + :return: The action + """ if np.random.rand() <= self.epsilon: - return random.randrange(self.action_space) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) + # choose a random action from the action spaces list + mm = MiniMax() + value, newBoard = mm.AI(3, self.colour, self.gameManager) + if newBoard is None: + return random.choice(self.actionSpace) + where = self._boardDiff(self.board, newBoard) + return self._encode(where[0]+1, where[1]+1) + + if len(self.actionSpace) == 1: + return self.actionSpace[0] + encodedMoves = np.squeeze(self.actionSpace) + encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)) + actValues = self.model.predict(self.normalise(encodedMoves)) + val = np.argmax(actValues[0]) + val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1 + return self.actionSpace[val] + + def resetScore(self) -> None: + """ + Resets the score + :return: None + """ + self.score = 0 + + def _convertState(self, board: list) -> list: + """ + Converts the board into a 2D list of numbers + :param board: 2D list of pieces + :return: new 2D list of numbers + """ + num_board = [] + + for row in board: + num_row = [] + for piece in row: + if piece == 0: + num_row.append(0) + continue + + if piece.colour == 1: + num_row.append(1) + continue + + num_row.append(2) + + num_board.append(num_row) + + return num_board + + def _encode(self, start: tuple, end: tuple) -> int: + """ + Encodes the move into an integer + :param start: Tuple of start position + :param end: Tuple of end position + :return: Encoded move + """ + start_row = start[0] + start_col = end[0] + + end_row = start[-1] + end_col = end[-1] + + # Concatenate into integer + return int(str(start_row) + str(start_col) + str(end_row) + str(end_col)) + + def _maxNextQ(self) -> float: + """ + Calculates the max Q value for the next state + :return: the max Q value + """ + colour = WHITE if self.colour == GREEN else GREEN + encodedMoves = self.encodeMoves(colour, self.board) + if len(encodedMoves) == 0: + return -1 + paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))) + nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves)) + return np.max(nextQValues) + + def encodeMoves(self, colour: int, board: Board) -> list: + """ + Encodes the moves into a list encoded moves + :param colour: Colour of the player + :param board: The board + :return: list Of encoded moves + """ + encodedMoves = [] + moves = board.getAllMoves(colour) + for move in moves: + where = self._boardDiff(board, move) + encodedMoves.append(self._encode(where[0]+1, where[1]+1)) + return encodedMoves + + def _boardDiff(self, board: Board, move: Board) -> np.array: + """ + Finds the difference between the two boards + :param board: The current board + :param move: The new board + :return: the difference between the two boards + """ + cnvState = np.array(self._convertState(board.board)) + cnvMove = np.array(self._convertState(move.board)) + diff = np.subtract(cnvMove, cnvState) + diff = np.nonzero(diff) + return diff + + def normalise(self, data: np.array) -> np.array: + """ + Normalise the data + :param data: the data to normalise + :return: normalised data + """ + return data / 10000 \ No newline at end of file diff --git a/results.py b/results.py new file mode 100644 index 0000000..7d0b17d --- /dev/null +++ b/results.py @@ -0,0 +1,80 @@ +import matplotlib.pyplot as plt +import numpy as np +from utilities.constants import GREEN, WHITE + +# winners = [] +with open("winners-5.txt", "r") as f: + winners = f.readlines() + +winners = [int(x.strip()) for x in winners] + +# lavg = [] +# for i in range(0, len(winners), 25): +# lavg.append(winners[i:i+25].count(2) / 25) +# +# x = np.arange(0, len(lavg)) +# y = np.array(lavg) * 100 +# +# a, b = np.polyfit(x, y, 1) +# +# fig, ax = plt.subplots(figsize=(10, 5)) +# ax.plot(y) +# ax.set_xticks(np.arange(0, len(lavg), 2)) +# ax.minorticks_on() +# ax.plot(x, a*x+b, color='red', linestyle='--', linewidth=2) +# ax.set_ylim([0, 100]) +# ax.set_title("Winners Average") +# ax.grid(which='major', linestyle='-', linewidth='0.5', color='black') +# ax.grid(which='minor', linestyle=':', linewidth='0.5') +# ax.set_xlabel("Average Set") +# ax.set_ylabel("Percentage of Wins") +# ax.tick_params(which="minor", bottom=False, left=False) +# plt.show() + +fig, ax = plt.subplots() +bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)]) +ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 100]) +ax.set_title("Winners at Depth 5") +ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey', axis='y') +ax.bar_label(bar) +plt.show() + +# with open("trainedRewards.txt", "r") as f: +# totalReward = f.readlines() +# +# totalReward = [float(x.strip()) for x in totalReward] +# filteredReward = list(filter(lambda x: x > -1500, totalReward)) + +# change_in_rewards = [0] # Initialize with 0 for the first episode +# for i in range(1, len(totalReward)): +# change_in_reward = totalReward[i] - totalReward[i - 1] +# change_in_rewards.append(change_in_reward) +# +# games = list(range(1, len(totalReward) + 1)) + +# plt.plot(games, change_in_rewards) +# plt.xlabel('Training Games') +# plt.ylabel('Change in Game Reward') +# plt.title('Change in Game Reward vs. Training Games') +# plt.grid(True) +# plt.show() +# major_ticks = np.arange(0, 101, 20) +# minor_ticks = np.arange(0, 101, 5) +# +# plt.plot([i for i in range(len(totalReward))], totalReward) +# plt.title("Rewards to Games") +# plt.xlabel("Games") +# plt.ylabel("Reward") +# plt.xticks(major_ticks) +# plt.xticks(minor_ticks, minor=True) +# plt.yticks(major_ticks) +# plt.yticks(minor_ticks, minor=True) +# plt.grid(which='both') +# plt.show() +# +# plt.plot([i for i in range(len(filteredReward))], filteredReward) +# plt.title("Filtered Rewards to Games") +# plt.xlabel("Games") +# plt.ylabel("Reward") +# plt.grid(which='both') +# plt.show() diff --git a/rewards-5.txt b/rewards-5.txt new file mode 100644 index 0000000..5688a80 --- /dev/null +++ b/rewards-5.txt @@ -0,0 +1,56 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/rewardsA.png b/rewardsA.png new file mode 100644 index 0000000..f29c5f3 Binary files /dev/null and b/rewardsA.png differ diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..14d2031 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +conda activate +python main.py diff --git a/trainedRewards.txt b/trainedRewards.txt new file mode 100644 index 0000000..9a20a5d --- /dev/null +++ b/trainedRewards.txt @@ -0,0 +1,500 @@ +180.5 +115.19999999999999 +-155.39999999999998 +-5169.4000000000015 +100.0 +-3354.2999999999956 +123.79999999999998 +-1738.0 +261.40000000000015 +120.89999999999999 +147.80000000000004 +108.0 +113.50000000000001 +110.5000000000002 +-1048.3000000000006 +75.8 +232.70000000000016 +89.10000000000001 +279.9000000000002 +165.40000000000003 +85.4 +34.20000000000016 +266.20000000000016 +101.69999999999999 +283.0 +-264.5 +225.0 +328.0 +215.5 +150.0 +-217.5 +-2920.0 +82.5 +-208.5 +150.5 +196.5 +223.0 +265.5 +-282.5 +175.5 +206.5 +221.5 +127.5 +-6337.5 +147.5 +231.5 +137.5 +-180.5 +108.0 +-339.5 +190.0 +-69.0 +52.5 +58.0 +-5575.0 +-159.5 +197.5 +177.5 +-5547.5 +-65.5 +136.5 +292.5 +-169.5 +185.0 +115.5 +198.0 +30.0 +162.5 +95.5 +170.0 +113.0 +-1405.0 +-27.0 +-4832.199999999999 +147.5 +228.0 +59.0 +262.5 +-220.0 +150.5 +177.5 +140.0 +123.0 +119.0 +137.5 +134.0 +175.5 +-5598.5 +46.5 +135.0 +205.0 +186.5 +177.5 +120.1 +332.5 +162.5 +122.5 +262.5 +-70.0 +159.0 +138.0 +240.5 +215.0 +147.5 +-118.0 +260.5 +199.0 +130.0 +265.0 +142.5 +230.0 +135.0 +197.5 +-179.5 +198.0 +288.0 +200.5 +-222.5 +165.5 +139.0 +228.0 +211.5 +197.5 +102.5 +233.0 +95.5 +-129.0 +187.5 +158.0 +295.0 +240.5 +-222.5 +-1841.5 +198.0 +113.0 +305.0 +-482.5 +125.5 +215.0 +110.0 +-180.0 +170.0 +-62.5 +215.5 +132.5 +187.5 +135.0 +-65.0 +138.0 +-1972.0 +240.5 +-237.5 +610.0 +267.5 +52.5 +-211.5 +217.5 +88.0 +305.5 +165.5 +115.0 +182.5 +-69.5 +333.0 +363.0 +112.5 +-15.5 +150.5 +118.0 +-52.5 +318.0 +174.0 +198.0 +-5705.0 +160.5 +155.0 +125.0 +165.0 +259.0 +165.5 +155.0 +-236.0 +220.5 +-15.5 +117.5 +367.5 +237.5 +255.0 +85.0 +-5342.5 +141.5 +-3582.5 +-600.0 +915.5 +179.0 +190.0 +-47.5 +275.5 +-5.0 +195.0 +128.0 +146.5 +750.5 +153.0 +-5157.5 +-279.5 +219.0 +154.0 +153.0 +-234.5 +248.0 +182.5 +122.5 +155.5 +1078.0 +102.5 +358.0 +152.5 +261.5 +239.0 +128.0 +111.5 +93.0 +310.5 +-87.0 +158.0 +113.0 +165.5 +120.0 +256.5 +90.5 +245.0 +159.0 +160.0 +-5272.0 +-88.5 +159.0 +169.0 +147.5 +-1149.5 +-372.0 +-270.0 +95.0 +142.5 +212.5 +154.0 +425.0 +153.0 +213.0 +280.5 +-80.5 +-45.90000000000003 +-2250.5 +123.50000000000003 +149.40000000000006 +219.0 +108.0 +180.0 +271.19999999999993 +202.5 +121.8000000000001 +47.599999999999966 +-35.0 +281.5 +307.5 +99.80000000000001 +154.0 +166.30000000000004 +271.5 +205.5 +145.5 +265.0 +113.0 +144.0 +88.0 +-204.5 +204.0 +215.0 +177.5 +168.0 +263.0 +66.5 +258.0 +-5477.5 +94.5 +-139.0 +190.5 +160.0 +-35.5 +149.0 +100.5 +130.0 +-40.0 +175.0 +132.5 +107.5 +143.0 +-5097.5 +97.5 +-1880.0 +-15.0 +213.0 +-601.0 +282.5 +276.5 +113.0 +106.5 +-1011.5 +128.0 +150.0 +145.5 +233.0 +209.0 +136.5 +240.0 +7.5 +-1535.0 +238.0 +185.0 +157.5 +-1660.0 +-15.5 +-145.0 +178.0 +-4997.5 +182.5 +197.5 +355.5 +130.0 +232.5 +-5420.0 +190.0 +128.0 +115.0 +2.5 +149.0 +220.0 +-87.0 +-447.5 +-4122.5 +-67.5 +-425.0 +283.0 +925.0 +49.5 +-15.0 +233.0 +215.5 +234.0 +154.0 +141.5 +226.5 +220.0 +110.5 +270.0 +253.0 +-1944.0 +215.0 +250.5 +155.0 +260.5 +185.0 +261.5 +232.5 +177.5 +-97.5 +-196.0 +230.0 +205.5 +-367.0 +265.5 +180.0 +135.5 +139.0 +103.0 +314.0 +192.5 +179.0 +97.5 +52.5 +135.0 +184.0 +-305.0 +147.5 +206.5 +157.5 +243.0 +-6125.0 +257.5 +125.60000000000002 +190.0 +-6225.0 +96.5 +350.0 +193.0 +185.5 +206.5 +223.0 +-225.0 +117.5 +170.0 +223.0 +175.5 +210.0 +-222.0 +148.60000000000002 +-133.0 +-193.5 +152.5 +-152.0 +-6245.0 +-2.0 +50.5 +-140.5 +185.5 +125.5 +208.0 +-200.0 +202.5 +112.5 +119.0 +210.5 +-1199.5 +-2.5 +102.5 +0.5 +275.5 +135.5 +-32.5 +235.5 +-617.0 +110.0 +222.5 +-372.0 +-53.0 +306.5 +117.5 +-5095.0 +223.0 +-257.0 +-5760.0 +11.5 +182.5 +160.0 +325.5 +151.5 +-327.5 +-5655.0 +62.5 +-5550.0 +195.0 +92.5 +-5290.0 +215.0 +59.0 +179.0 +188.0 +-2103.5 +253.0 +118.0 +335.0 +85.0 +207.5 +229.0 +152.5 +-188.5 +-177.5 +274.0 +220.0 +-5169.5 +128.0 +-1260.0 +140.0 +147.5 +140.0 +-505.0 +155.0 +225.5 +188.0 +131.5 +1111.5 +180.0 +-4897.5 +-687.5 +125.0 +180.0 +111.5 +-5582.5 +232.5 +153.0 +-130.5 +102.5 +189.0 +157.5 +-5685.0 +325.0 +-6870.0 +-520.5 +-3027.0 +32.5 diff --git a/trainedWinners.txt b/trainedWinners.txt new file mode 100644 index 0000000..8105b8b --- /dev/null +++ b/trainedWinners.txt @@ -0,0 +1,500 @@ +2 +0 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +1 +2 +2 +2 +1 +2 +2 +0 +2 +2 +2 +0 +1 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +1 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +0 +2 +2 +2 +0 +2 +2 +1 +2 +2 +2 +2 +2 +0 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +22 +2 +2 +1 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +1 +1 +2 +2 +1 +2 +2 +2 +1 +2 +2 +2 +2 +0 +2 +2 +2 +2 +1 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +1 +1 +2 +2 +0 +2 +2 +2 +2 +2 +0 +2 +2 +2 +1 +2 +2 +1 +2 +2 +2 +1 +2 +0 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +3 +2 +2 +2 +2 +2 +1 +2 +2 +2 +0 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +0 +2 +2 +0 +2 +2 +2 +2 +2 +2 +0 +2 +0 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +0 +2 +2 +2 +2 +0 +2 +2 +1 +2 +0 +2 +0 +2 +0 +1 +2 +2 diff --git a/utilities/Board.py b/utilities/Board.py new file mode 100644 index 0000000..ebd53c5 --- /dev/null +++ b/utilities/Board.py @@ -0,0 +1,390 @@ +from __future__ import annotations + +import pygame +from copy import deepcopy +from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE +from .piece import Piece + + +class Board: + def __init__(self) -> None: + """ + Constructor for the Board class + :return: None + """ + self.board = [] + self.greenLeft = self.whiteLeft = 12 + self.greenKings = self.whiteKings = 0 + self.green = (144, 184, 59) + self._createBoard() + + def _drawSquares(self, win: pygame.display) -> None: + """ + Draws the squares on the board + :param win: The window + """ + win.fill(BLACK) + for row in range(ROWS): + for col in range(row % 2, ROWS, 2): + pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE)) + + def _createBoard(self) -> None: + """ + Creates a board representation of the game + :return: None + """ + for row in range(ROWS): + self.board.append([]) + for col in range(COLS): + if col % 2 == ((row + 1) % 2): + if row < 3: + self.board[row].append(Piece(row, col, WHITE)) + continue + + if row > 4: + self.board[row].append(Piece(row, col, GREEN)) + continue + + self.board[row].append(0) + continue + + self.board[row].append(0) + + def draw(self, win: pygame.display) -> None: + """ + Draws the pieces on the board + :param win: The window + :return: None + """ + self._drawSquares(win) + for row in range(ROWS): + for col in range(COLS): + piece = self.board[row][col] + if piece != 0: + piece.draw(win) + + def move(self, piece: Piece, row: int, col: int) -> None: + """ + Moves a piece and make it a king if it reaches the end of the board + :param piece: Piece to move + :param row: Row to move to + :param col: Column to move to + :return: None + """ + self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col] + piece.move(row, col) + + if row == ROWS - 1 or row == 0: + piece.makeKing() + + if piece.colour == WHITE: + self.whiteKings += 1 + + if piece.colour == GREEN: + self.greenKings += 1 + + def remove(self, skipped: tuple) -> None: + """ + Removes a piece from the board + :param skipped: A tuple of the piece to remove + """ + for piece in skipped: + self.board[piece.row][piece.col] = 0 + if piece != 0: + if piece.colour == GREEN: + self.greenLeft -= 1 + continue + self.whiteLeft -= 1 + + def getAllMoves(self, colour: int) -> list: + """ + Gets all the possible moves for a player + :param colour: colour of the player + :return: + """ + moves = [] + possibleMoves = [] + possiblePieces = [] + pieces = self.getAllPieces(colour) + hasForcedCapture = False + + for piece in pieces: + validMoves = self.getValidMoves(piece) + + # Check if there are forced capture moves for this piece + forcedCaptureMoves = [move for move, skip in validMoves.items() if skip] + if forcedCaptureMoves: + hasForcedCapture = True + possiblePieces.append(piece) + possibleMoves.append({move: skip for move, skip in validMoves.items() if skip}) + + if hasForcedCapture: + # If there are forced capture moves, consider only those + for i in range(len(possibleMoves)): + for move, skip in possibleMoves[i].items(): + tempBoard = deepcopy(self) + tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col) + newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) + moves.append(newBoard) + else: + # If no forced capture moves, consider all valid moves + for piece in pieces: + validMoves = self.getValidMoves(piece) + for move, skip in validMoves.items(): + tempBoard = deepcopy(self) + tempPiece = tempBoard.getPiece(piece.row, piece.col) + newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) + moves.append(newBoard) + + return moves + + def _simulateMove(self, piece: Piece, move: list, board: Board, skip: tuple) -> Board: + """ + Simulates a move on the board + :param piece: Piece to move + :param move: Move to make + :param board: Board to make the move on + :param skip: Tuple of pieces to skip + :return: Board after the move + """ + board.move(piece, move[0], move[1]) + if skip: + board.remove(skip) + + return board + + def getPiece(self, row: int, col: int) -> Piece: + """ + Gets a piece from the board + :param row: Row of the piece + :param col: Column of the piece + :return: Piece + """ + return self.board[row][col] + + def winner(self): + if self.greenLeft <= 0: + return WHITE + + if self.whiteLeft <= 0: + return GREEN + + return None + + def getValidMoves(self, piece: Piece) -> dict: + """ + Gets all the valid moves for a piece + :param piece: Piece to get the moves for + :return: dictionary of moves + """ + moves = {} + forcedCapture = {} + left = piece.col - 1 + right = piece.col + 1 + row = piece.row + if piece.colour == GREEN or piece.king: + moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) + moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) + if piece.colour == WHITE or piece.king: + moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) + moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) + + if len(moves.values()) <= 1: + return moves + + movesValues = list(moves.values()) + movesKeys = list(moves.keys()) + + forced = {} + + for i in range(len(movesKeys)): + if not movesValues[i]: + forced[movesKeys[i]] = moves[movesKeys[i]] + if len(forced) != len(moves): + forced.clear() + for i in range(len(movesKeys)): + if movesValues[i]: + forced[movesKeys[i]] = moves[movesKeys[i]] + if len(forced) != len(moves): + for i in range(len(movesKeys)): + if movesValues[i]: + forcedCapture[movesKeys[i]] = moves[movesKeys[i]] + else: + forcedCapture = forced + else: + forcedCapture = forced + + return forcedCapture + + def scoreOfTheBoard(self) -> int: + """ + Calculates the score of the board + :return: score of the board + """ + return self.whiteLeft - self.greenLeft + + def getAllPieces(self, colour): + """ + Gets all the pieces of a player + :param colour: Piece colour + :return: Pieces of the player + """ + pieces = [] + for row in self.board: + for piece in row: + if piece != 0 and piece.colour == colour: + pieces.append(piece) + return pieces + + def _traverseLeft(self, start: int, stop: int, step: int, colour: int, left: int, skipped: list = []) -> dict: + """ + Traverses the left side of the board + :param start: Start position + :param stop: Stop position + :param step: Step size + :param colour: colour of the player + :param left: Left position + :param skipped: List of pieces to skip + :return: dictionary of moves + """ + moves = {} + last = [] + for row in range(start, stop, step): + if left < 0: + break + mvs = self._traverse(row, left, skipped, moves, step, last, colour) + if mvs is None: + break + elif isinstance(mvs, list): + last = mvs + else: + moves.update(mvs) + left -= 1 + return moves + + def _traverseRight(self, start: int, stop: int, step: int, colour: int, right: int, skipped: list = []) -> dict: + """ + Traverses the left side of the board + :param start: Start position + :param stop: Stop position + :param step: Step size + :param colour: colour of the player + :param right: Right position + :param skipped: List of pieces to skip + :return: dictionary of moves + """ + moves = {} + last = [] + for row in range(start, stop, step): + if right >= COLS: + break + + mvs = self._traverse(row, right, skipped, moves, step, last, colour) + if mvs is None: + break + elif isinstance(mvs, list): + last = mvs + else: + moves.update(mvs) + + right += 1 + return moves + + def _traverse(self, row: int, col: int, skipped: list, moves: dict, step: int, last: list, colour: int) -> list or None: + """ + Traverses the board + :param row: Row to traverse + :param col: Column to traverse + :param skipped: List of pieces to jump + :param moves: Dictionary of moves + :param step: Step size + :param last: List of last pieces + :param colour: Colour of the player + :return: list of last pieces or None + """ + current = self.board[row][col] + if current == 0: + if skipped and not last: + return None + elif skipped: + moves[(row, col)] = last + skipped + else: + moves[(row, col)] = last + + if last: + if step == -1: + rowCalc = max(row - 3, 0) + else: + rowCalc = min(row + 3, ROWS) + moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last)) + moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last)) + return None + elif current.colour == colour: + return None + else: + last = [current] + return last + + def step(self, move: int, colour: int) -> None: + """ + Takes a move and executes it + :param move: The move to execute + :param colour: The colour of the player + :return: None + """ + start, end = self._decode(move) + start[0] = start[0] - 1 + start[1] = start[1] - 1 + end[0] = end[0] - 1 + end[1] = end[1] - 1 + reward = 0 + done = False + piece = self.getPiece(start[0], start[1]) + if piece == 0: + newStart = end + end = start + start = newStart + piece = self.getPiece(start[0], start[1]) + moves = self.getValidMoves(piece) + for move, skip in moves.items(): + if tuple(end) == move: + self._simulateMove(piece, move, self, skip) + if len(skip) == 1: + reward = 2 + break + if len(skip) > 1: + reward = 3 + len(skip) * 0.2 + break + reward = -0.5 + break + + if self.winner() == colour: + done = True + reward = 10 + return reward, self, done + + + def _decode(self, move: int) -> tuple: + """ + Decodes the move from a integer to a start and end tuple + :param move: The move to decode + :return: Start and end tuple + """ + # Split digits back out + str_code = str(move) + # print(str_code) + start_row = int(str_code[0]) + start_col = int(str_code[1]) + end_row = int(str_code[2]) + end_col = int(str_code[3]) + # Reconstruct positions + start = [start_row, start_col] + end = [end_row, end_col] + return start, end + + # def reset(self): + # self.board = [] + # self.whiteLeft = self.greenLeft = 12 + # self.whiteKings = self.greenKings = 0 + # self._createBoard() + # return self.board diff --git a/utilities/board.py b/utilities/board.py deleted file mode 100644 index a1bf435..0000000 --- a/utilities/board.py +++ /dev/null @@ -1,185 +0,0 @@ -import pygame - -from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE -from .piece import Piece - - -class Board: - def __init__(self): - self.board = [] - self.greenLeft = self.whiteLeft = 12 - self.greenKings = self.whiteKings = 0 - self.createBoard() - - def drawSquares(self, win): - win.fill(BLACK) - for row in range(ROWS): - for col in range(row % 2, ROWS, 2): - pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE)) - - def createBoard(self): - for row in range(ROWS): - self.board.append([]) - for col in range(COLS): - if col % 2 == ((row + 1) % 2): - if row < 3: - self.board[row].append(Piece(row, col, WHITE)) - elif row > 4: - self.board[row].append(Piece(row, col, GREEN)) - else: - self.board[row].append(None) - else: - self.board[row].append(None) - - def draw(self, win): - self.drawSquares(win) - for row in range(ROWS): - for col in range(COLS): - piece = self.board[row][col] - if piece is not None: - piece.draw(win) - - def move(self, piece, row, col): - self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col] - piece.move(row, col) - - if row == ROWS - 1 or row == 0: - piece.makeKing() - if piece.colour == WHITE: - self.whiteKings += 1 - else: - self.greenKings += 1 - - def remove(self, skipped): - for piece in skipped: - self.board[piece.row][piece.col] = None - if piece is not None: - if piece.colour == GREEN: - self.greenLeft -= 1 - else: - self.whiteLeft -= 1 - - def getPiece(self, row, col): - return self.board[row][col] - - def winner(self): - if self.greenLeft <= 0: - return WHITE - elif self.whiteLeft <= 0: - return GREEN - - return None - - def getValidMoves(self, piece): - moves = {} - forcedCapture = {} - left = piece.col - 1 - right = piece.col + 1 - row = piece.row - if piece.colour == GREEN: - moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) - moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) - if piece.colour == WHITE: - moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) - moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) - - if piece.king: - moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) - moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) - moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) - moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) - - if len(moves.values()) <= 1: - return moves - - movesValues = list(moves.values()) - movesKeys = list(moves.keys()) - - forced = {} - - for i in range(len(movesKeys)): - if not movesValues[i]: - forced[movesKeys[i]] = moves[movesKeys[i]] - if len(forced) != len(moves): - forced.clear() - for i in range(len(movesKeys)): - if movesValues[i]: - forced[movesKeys[i]] = moves[movesKeys[i]] - if len(forced) != len(moves): - for i in range(len(movesKeys)): - if movesValues[i]: - forcedCapture[movesKeys[i]] = moves[movesKeys[i]] - else: - forcedCapture = forced - else: - forcedCapture = forced - return forcedCapture - - def scoreOfTheBoard(self): - return self.whiteLeft - self.greenLeft - - def getAllPieces(self, colour): - pieces = [] - for row in self.board: - for piece in row: - if piece is not None and piece.colour == colour: - pieces.append(piece) - return pieces - - def _traverseLeft(self, start, stop, step, colour, left, skipped=[]): - moves = {} - last = [] - for row in range(start, stop, step): - if left < 0: - break - mvs = self._traverse(row, left, skipped, moves, step, last, colour) - if mvs is None: - break - elif isinstance(mvs, list): - last = mvs - else: - moves.update(mvs) - left -= 1 - return moves - - def _traverseRight(self, start, stop, step, colour, right, skipped=[]): - moves = {} - last = [] - for row in range(start, stop, step): - if right >= COLS: - break - - mvs = self._traverse(row, right, skipped, moves, step, last, colour) - if mvs is None: - break - elif isinstance(mvs, list): - last = mvs - else: - moves.update(mvs) - - right += 1 - return moves - - def _traverse(self, row, col, skipped, moves, step, last, colour): - current = self.board[row][col] - if current is None: - if skipped and not last: - return None - elif skipped: - moves[(row, col)] = last + skipped - else: - moves[(row, col)] = last - - if last: - if step == -1: - rowCalc = max(row - 3, 0) - else: - rowCalc = min(row + 3, ROWS) - moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last)) - moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last)) - return None - elif current.colour == colour: - return None - else: - last = [current] - return last diff --git a/utilities/constants.py b/utilities/constants.py index 526c64e..be0f817 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -4,10 +4,10 @@ WIDTH, HEIGHT = 800, 800 ROWS, COLS = 8, 8 SQUARE_SIZE = WIDTH // COLS -# RGB color +# RGB colour -GREEN = (144, 184, 59) -WHITE = (255, 255, 255) +GREEN = 1 +WHITE = 2 BLACK = (0, 0, 0) BLUE = (0, 0, 255) GREY = (128, 128, 128) diff --git a/utilities/gameManager.py b/utilities/gameManager.py index 93db2a1..46289b7 100644 --- a/utilities/gameManager.py +++ b/utilities/gameManager.py @@ -1,42 +1,74 @@ +from __future__ import annotations + import pygame -from utilities.board import Board +from utilities.Board import Board from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE + class GameManager: - def __init__(self, win, colour): + def __init__(self, win: pygame.display, colour: int) -> None: + """ + Constructor for the GameManager class + :param win: The window + :param colour: The colour of the player + """ self._init(colour) self.win = win - def _init(self, colour): + def _init(self, colour: int) -> None: + """ + Initializes the game + :param colour: the colour of the player + """ self.selected = None self.board = Board() self.turn = colour self.validMoves = {} self.legCount = 0 - def update(self): + def update(self) -> None: + """ + Updates the GUI + return: None + """ self.board.draw(self.win) self.drawValidMoves(self.validMoves) pygame.display.update() - def reset(self): + def reset(self) -> None: + """ + Resets the game + :return: None + """ self._init(self.turn) - def select(self, row, col): + def select(self, row: int, col: int) -> bool: + """ + Selects a piece + :param row: Row of the piece + :param col: Column of the piece + :return: True + """ if self.selected: result = self._move(row, col) if not result: self.selected = None self.select(row, col) piece = self.board.getPiece(row, col) - if piece is not None and piece.colour == self.turn: + if piece != 0 and piece.colour == self.turn: self.selected = piece self.validMoves = self.board.getValidMoves(piece) return True - def _move(self, row, col): + def _move(self, row: int, col: int) -> bool: + """ + Moves a piece + :param row: Row of the piece + :param col: Column of the piece + :return: True if the move was successful, False otherwise + """ piece = self.board.getPiece(row, col) - if self.selected and piece is None and (row, col) in self.validMoves: + if self.selected and piece == 0 and (row, col) in self.validMoves: self.board.move(self.selected, row, col) skipped = self.validMoves[row, col] if self.validMoves[list(self.validMoves.keys())[0]]: @@ -58,21 +90,39 @@ class GameManager: self.validMoves = {} if self.turn == GREEN: self.turn = WHITE - else: - self.turn = GREEN + return + self.turn = GREEN - def drawValidMoves(self, moves): + def drawValidMoves(self, moves: list) -> None: + """ + Draws the valid moves + :param moves: list of valid moves + :return: None + """ for row, col in moves: pygame.draw.circle(self.win, BLUE, (col * SQUARE_SIZE + SQUARE_SIZE // 2, row * SQUARE_SIZE + SQUARE_SIZE // 2), 15) - def winner(self): + def winner(self) -> int or None: + """ + Gets the winner + :return: The winner + """ return self.board.winner() - def getBoard(self): + def getBoard(self) -> Board: + """ + Gets the board + :return: The board + """ return self.board - def aiMove(self, board): + def aiMove(self, board: Board) -> None: + """ + Makes a move for the AI + :param board: The new board + :return: None + """ if board is None: # colour = "green" if self.turn == GREEN else "white" # print("no move left for " + colour + " to make") diff --git a/utilities/piece.py b/utilities/piece.py index c808fd9..4d033d0 100644 --- a/utilities/piece.py +++ b/utilities/piece.py @@ -1,10 +1,16 @@ import pygame.draw -from utilities.constants import SQUARE_SIZE, GREY, CROWN +from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN class Piece: - def __init__(self, row, col, colour): + def __init__(self, row: int, col: int, colour: int) -> None: + """ + Initialises the piece class, which represents a piece on the board. Constructor for the piece class + :param row: Row of the piece + :param col: Column of the piece + :param colour: Colour of the piece + """ self.row = row self.col = col self.colour = colour @@ -14,25 +20,50 @@ class Piece: self.calcPosition() self.padding = 20 self.border = 2 + self.green = (144, 184, 59) + self.white = (255, 255, 255) - def calcPosition(self): + def calcPosition(self) -> None: + """ + Calculates the position of the piece + :return: None + """ self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2 self.y = SQUARE_SIZE * self.row + SQUARE_SIZE // 2 - def makeKing(self): + def makeKing(self) -> None: + """ + Makes the piece a king + :return: None + """ self.king = True - def draw(self, win): + def draw(self, win) -> None: + """ + Draws the piece + :param win: The window to draw the piece on + :return: None + """ radius = SQUARE_SIZE // 2 - self.padding pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border) - pygame.draw.circle(win, self.colour, (self.x, self.y), radius) + pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius) if self.king: win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2)) - def move(self, row, col): + def move(self, row: int, col: int) -> None: + """ + Moves the piece to a new position + :param row: Row to move to + :param col: Column to move to + :return: None + """ self.row = row self.col = col self.calcPosition() - def __repr__(self): + def __repr__(self) -> str: + """ + String representation of the piece + :return: String representation of the colour + """ return str(self.colour) diff --git a/winners-3.txt b/winners-3.txt new file mode 100644 index 0000000..c771e8b --- /dev/null +++ b/winners-3.txt @@ -0,0 +1,100 @@ +2 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +0 +0 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +1 +0 +2 +0 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/winners-5.txt b/winners-5.txt new file mode 100644 index 0000000..94e3b68 --- /dev/null +++ b/winners-5.txt @@ -0,0 +1,100 @@ +2 +2 +1 +2 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +1 +2 +2 +0 +2 +2 +0 +2 +2 +0 +0 +2 +2 +2 +2 +2 +0 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +2 +2 +2 +0 +2 +0 +2 +2 +2 +2 +1 +0 +2 +2 +2 +2 +2 +2 +1 +2 +2 +2 +2 +0 +2 +0 +2 +2 +2 +2 +2 +1 +2 +2 +1 +2 +2 +2 +2 +2 +2 +2 +2