From 1aa8ffa8fc4f407f9a3aa0fc26e11943452ff369 Mon Sep 17 00:00:00 2001 From: rodude123 Date: Tue, 22 Aug 2023 16:31:16 +0100 Subject: [PATCH] created working reinforcement learning model --- .idea/misc.xml | 2 +- main.py | 194 +++++++----- minimax/minimaxAlgo.py | 40 +-- .../ReinforcementLearning.py | 292 ++++++++++++++---- run.sh | 2 + utilities/{board.py => Board.py} | 136 ++++++-- utilities/constants.py | 4 +- utilities/gameManager.py | 11 +- utilities/piece.py | 6 +- 9 files changed, 466 insertions(+), 221 deletions(-) create mode 100755 run.sh rename utilities/{board.py => Board.py} (60%) diff --git a/.idea/misc.xml b/.idea/misc.xml index d22234a..582a97a 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/main.py b/main.py index 921efef..40c2f98 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,9 @@ import sys import pygame +from matplotlib import pyplot as plt +from reinforcementLearning.ReinforcementLearning import ReinforcementLearning from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN from utilities.gameManager import GameManager from minimax.minimaxAlgo import MiniMax @@ -44,71 +46,71 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')): y += word_height # Start on new row. -def main(): +def main(difficulty=0): pygame.init() screen = pygame.display.set_mode((WIDTH, HEIGHT)) menuClock = pygame.time.Clock() click = False width = screen.get_width() - font = pygame.font.SysFont(None, 25) - difficulty = 0 + font = pygame.font.SysFont("", 25) - while True: - # menu - screen.fill((128, 128, 128)) - drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20) + if difficulty == 0: + while True: + # menu + screen.fill((128, 128, 128)) + drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20) - mx, my = pygame.mouse.get_pos() + mx, my = pygame.mouse.get_pos() - easy = pygame.Rect(width / 2 - 50, 100, 200, 50) - pygame.draw.rect(screen, (0, 255, 0), easy) - drawText("easy", font, (255, 255, 255), screen, width / 2, 100) - medium = pygame.Rect(width / 2 - 50, 200, 200, 50) - pygame.draw.rect(screen, (255, 125, 0), medium) - drawText("medium", font, (255, 255, 255), screen, width / 2, 200) - hard = pygame.Rect(width / 2 - 50, 300, 200, 50) - pygame.draw.rect(screen, (255, 0, 0), hard) - drawText("hard", font, (255, 255, 255), screen, width / 2, 300) - rules = pygame.Rect(width / 2 - 50, 400, 200, 50) - pygame.draw.rect(screen, (0, 0, 255), rules) - drawText("rules", font, (255, 255, 255), screen, width / 2, 400) - quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50) - pygame.draw.rect(screen, (0, 0, 0), quitGame) - drawText("quit", font, (255, 255, 255), screen, width / 2, 500) + easy = pygame.Rect(width / 2 - 50, 100, 200, 50) + pygame.draw.rect(screen, (0, 255, 0), easy) + drawText("easy", font, (255, 255, 255), screen, width / 2, 100) + medium = pygame.Rect(width / 2 - 50, 200, 200, 50) + pygame.draw.rect(screen, (255, 125, 0), medium) + drawText("medium", font, (255, 255, 255), screen, width / 2, 200) + hard = pygame.Rect(width / 2 - 50, 300, 200, 50) + pygame.draw.rect(screen, (255, 0, 0), hard) + drawText("hard", font, (255, 255, 255), screen, width / 2, 300) + rules = pygame.Rect(width / 2 - 50, 400, 200, 50) + pygame.draw.rect(screen, (0, 0, 255), rules) + drawText("rules", font, (255, 255, 255), screen, width / 2, 400) + quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50) + pygame.draw.rect(screen, (0, 0, 0), quitGame) + drawText("quit", font, (255, 255, 255), screen, width / 2, 500) - if easy.collidepoint((mx, my)): - if click: - difficulty = 1 - break - if medium.collidepoint((mx, my)): - if click: - difficulty = 3 - break - if hard.collidepoint((mx, my)): - if click: - difficulty = 5 - break - if rules.collidepoint((mx, my)): - if click: - rulesGUI() - break - if quitGame.collidepoint((mx, my)): - if click: - pygame.quit() - sys.exit() - click = False - for event in pygame.event.get(): - if event.type == pygame.QUIT: - pygame.quit() - sys.exit() - if event.type == pygame.MOUSEBUTTONDOWN: - if event.button == 1: - click = True + if easy.collidepoint((mx, my)): + if click: + difficulty = 1 + break + if medium.collidepoint((mx, my)): + if click: + difficulty = 3 + break + if hard.collidepoint((mx, my)): + if click: + difficulty = 5 + break + if rules.collidepoint((mx, my)): + if click: + rulesGUI() + break + if quitGame.collidepoint((mx, my)): + if click: + pygame.quit() + sys.exit() + click = False + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + sys.exit() + if event.type == pygame.MOUSEBUTTONDOWN: + if event.button == 1: + click = True - pygame.display.update() - menuClock.tick(60) - if difficulty != 0: - game(difficulty) + pygame.display.update() + menuClock.tick(60) + + game(difficulty) def rulesGUI(): @@ -116,8 +118,8 @@ def rulesGUI(): menuClock = pygame.time.Clock() click = False width = screen.get_width() - titleFont = pygame.font.SysFont(None, 48) - font = pygame.font.SysFont(None, 21) + titleFont = pygame.font.SysFont("", 48) + font = pygame.font.SysFont("", 21) while True: screen.fill((128, 128, 128)) drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20) @@ -175,39 +177,65 @@ def game(difficulty): run = True clock = pygame.time.Clock() gameManager = GameManager(WIN, GREEN) + rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager) + mm = MiniMax() + totalReward = [] + for i in range(2000): + score = 0 + for j in range(200): + clock.tick(FPS) + reward = 0 + if gameManager.turn == WHITE: + mm = MiniMax() + value, newBoard = mm.AI(difficulty, WHITE, gameManager) + # gameManager.aiMove(newBoard) + # reward, newBoard = rl.AI(gameManager.board) + if newBoard is None: + print("Cannot make move") + continue + gameManager.aiMove(newBoard) + # - while run: - clock.tick(FPS) + gameManager.update() + pygame.display.update() - if gameManager.turn == WHITE: - mm = MiniMax() - value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager) - gameManager.aiMove(newBoard) - # time.sleep(0.15) + if gameManager.turn == GREEN: + value, newBoard = mm.AI(difficulty, GREEN, gameManager) + gameManager.aiMove(newBoard) - if gameManager.turn == GREEN: - mm = MiniMax() - value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager) - gameManager.aiMove(newBoard) - # time.sleep(0.15) + score += reward - if gameManager.winner() != None: - print(gameManager.winner()) - run = False + if gameManager.winner() is not None: + print(gameManager.winner()) + break - for event in pygame.event.get(): - if event.type == pygame.QUIT: - run = False - if event.type == pygame.MOUSEBUTTONDOWN: - pos = pygame.mouse.get_pos() - row, col = getRowColFromMouse(pos) - # if gameManager.turn == GREEN: - gameManager.select(row, col) + # for event in pygame.event.get(): + # if event.type == pygame.QUIT: + # break + # if event.type == pygame.MOUSEBUTTONDOWN: + # pos = pygame.mouse.get_pos() + # row, col = getRowColFromMouse(pos) + # # if gameManager.turn == GREEN: + # gameManager.select(row, col) - gameManager.update() - pygame.display.update() + gameManager.update() + pygame.display.update() + gameManager.reset() + rl.resetScore() + print("Game: ", i, " Reward: ", score) + totalReward.append(score) + # save model weights every 25 games + if i % 250 == 0 and i != 0: + rl.model.save("./modelWeights/model_" + str(i) + ".h5") # pygame.quit() + rl.model.save("./modelWeights/model_final.h5") -main() + plt.plot([i for i in range(len(totalReward))], totalReward) + plt.xlabel("Games") + plt.ylabel("Reward") + plt.show() + + +main(3) diff --git a/minimax/minimaxAlgo.py b/minimax/minimaxAlgo.py index 063b8cd..d4816d4 100644 --- a/minimax/minimaxAlgo.py +++ b/minimax/minimaxAlgo.py @@ -1,56 +1,38 @@ import random -from copy import deepcopy from math import inf from utilities.constants import GREEN, WHITE -class MiniMax(): +class MiniMax: - def AI(self, board, depth, maxPlayer, gameManager): - if depth == 0 or board.winner() is not None: - return board.scoreOfTheBoard(), board + def AI(self, depth, maxPlayer, gameManager): + if depth == 0 or gameManager.board.winner() is not None: + return gameManager.board.scoreOfTheBoard(), gameManager.board - if maxPlayer: + if type(maxPlayer) == int: maxEval = -inf bestMove = None - for move in self.getAllMoves(board, maxPlayer): - evaluation = self.AI(move, depth - 1, False, gameManager)[0] + for move in gameManager.board.getAllMoves(maxPlayer): + evaluation = self.AI(depth - 1, False, gameManager)[0] maxEval = max(maxEval, evaluation) if maxEval > evaluation: bestMove = move if maxEval == evaluation: + # bestMove = move bestMove = bestMove if random.choice([True, False]) else move return maxEval, bestMove else: minEval = inf bestMove = None colour = WHITE if gameManager.turn == GREEN else GREEN - for move in self.getAllMoves(board, colour): - evaluation = self.AI(move, depth - 1, True, gameManager)[0] + for move in gameManager.board.getAllMoves(colour): + evaluation = self.AI(depth - 1, True, gameManager)[0] minEval = min(minEval, evaluation) if minEval < evaluation: bestMove = move if minEval == evaluation: + # bestMove = move bestMove = bestMove if random.choice([True, False]) else move return minEval, bestMove - - def _simulateMove(self, piece, move, board, skip): - board.move(piece, move[0], move[1]) - if skip: - board.remove(skip) - - return board - - def getAllMoves(self, board, colour): - moves = [] - - for piece in board.getAllPieces(colour): - validMoves = board.getValidMoves(piece) - for move, skip in validMoves.items(): - tempBoard = deepcopy(board) - tempPiece = tempBoard.getPiece(piece.row, piece.col) - newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) - moves.append(newBoard) - return moves diff --git a/reinforcementLearning/ReinforcementLearning.py b/reinforcementLearning/ReinforcementLearning.py index e554162..59c8661 100644 --- a/reinforcementLearning/ReinforcementLearning.py +++ b/reinforcementLearning/ReinforcementLearning.py @@ -1,96 +1,252 @@ import random from collections import deque +from typing import Any +from copy import deepcopy import numpy as np import tensorflow as tf -from tensorflow.python.keras import Sequential, regularizers -from tensorflow.python.keras.layers import Dense +from keras.engine.input_layer import InputLayer +from keras.layers import BatchNormalization +from tensorflow.python.keras import Sequential, regularizers, Input +from tensorflow.python.keras.layers import Dense, Lambda, Dropout +from tensorflow.python.keras.optimizer_v2.adam import Adam + +from minimax.minimaxAlgo import MiniMax +from utilities import Board +from utilities.constants import WHITE, GREEN +from utilities.gameManager import GameManager class ReinforcementLearning(): - def __init__(self, action_space, state_space, env): - self.action_space = action_space - self.state_space = state_space - self.env = env + def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None: + """ + Constructor for the ReinforcementLearning class + :param actionSpace: the number of possible actions + :param board: the game board + """ + self.gameManager = gameManager + self.actionSpace = actionSpace + self.board = board + self.state = self.board.board + self.colour = colour + self.score = 0 self.epsilon = 1 self.gamma = .95 - self.batch_size = 64 - self.epsilon_min = .01 - self.epsilon_decay = .995 - self.learning_rate = 0.001 - self.memory = deque(maxlen=100000) - self.model = self._buildModel() + self.batchSize = 256 + self.maxSize = 32 + self.epsilonMin = .01 + self.epsilonDecay = .995 + self.learningRate = 0.001 + self.memory = deque(maxlen=10000000) + self.model = self._buildMainModel() - def AI(self, episode): - loss = [] + def AI(self, board: Board) -> tuple: + """ + Learns to play the draughts game + :return: the loss + """ + self.board = board + self.state = self._convertState(self.board.board) + self.actionSpace = self._encodeMoves(self.colour, self.board) + if len(self.actionSpace) == 0: + return self.score, None - max_steps = 1000 + action = self._act() + reward, nextState, done = self.board.step(action, self.colour) + self.score += reward + self.state = self._convertState(nextState.board) + self._remember(deepcopy(self.board), action, reward, self.state, done) + self._replay() - for e in range(episode): - state = self.env.reset() - state = np.reshape(state, (1, self.state_space)) - score = 0 - for i in range(max_steps): - action = self.act(state) - reward, next_state, done = self.env.step(action) - score += reward - next_state = np.reshape(next_state, (1, self.state_space)) - self.remember(state, action, reward, next_state, done) - state = next_state - self.replay() - if done: - print("episode: {}/{}, score: {}".format(e, episode, score)) - break - loss.append(score) + return self.score, nextState - def _buildModel(self): + def _buildMainModel(self) -> Sequential: + """ + Build the model for the AI + :return: the model + """ # Board model - board_model = Sequential() + modelLayers = [ + Lambda(lambda x: tf.reshape(x, [-1, 32])), + Dense(256, activation='relu'), + Dropout(0.2), + Dense(128, activation='relu'), + Dropout(0.2), + Dense(64, activation='relu'), + Dropout(0.2), + Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)) + ] + boardModel = Sequential(modelLayers) - # input dimensions is 32 board position values - board_model.add(Dense(64, activation='relu', input_dim=32)) + # boardModel.add(BatchNormalization()) + boardModel.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error') + boardModel.build(input_shape=(None, None)) - # use regularizers, to prevent fitting noisy labels - board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01))) - board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16 - board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8 + print(boardModel.summary()) - # output isn't squashed, because it might lose information - board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))) - board_model.compile(optimizer='nadam', loss='binary_crossentropy') + return boardModel - return board_model - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def replay(self): - if len(self.memory) < self.batch_size: + def _replay(self) -> None: + """ + trains the model + :return: None (void) + """ + if len(self.memory) < self.batchSize: + # Not enough data to replay and test the model return - minibatch = random.sample(self.memory, self.batch_size) - states = np.array([i[0] for i in minibatch]) - actions = np.array([i[1] for i in minibatch]) - rewards = np.array([i[2] for i in minibatch]) - next_states = np.array([i[3] for i in minibatch]) - dones = np.array([i[4] for i in minibatch]) + # Get a random sample from the memory + minibatch = random.sample(self.memory, int(self.maxSize)) - states = np.squeeze(states) - next_states = np.squeeze(next_states) + # Extract states, rewards, dones + states = [m[0] for m in minibatch] + rewards = [m[2] for m in minibatch] + dones = [m[4] for m in minibatch] - targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones) - targets_full = self.model.predict_on_batch(states) + # Encoded moves + encodedMoves = [] + for state in states: + encodedMoves.append(self._encodeMoves(self.colour, state)) - ind = np.array([i for i in range(self.batch_size)]) - targets_full[[ind], [actions]] = targets + # Calculate targets + targets = [] + for i, moves in enumerate(encodedMoves): + if dones[i]: + target = rewards[i] + else: + target = rewards[i] + self.gamma * self._maxNextQ() - self.model.fit(states, targets_full, epochs=1, verbose=0) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay + targets.append(target) - def act(self, state): + encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1)) + for m in encodedMoves]) + targets = np.array(targets) + self.model.fit(self._normalise(encodedMoves), self._normalise(targets), epochs=20) + if self.epsilon > self.epsilonMin: + self.epsilon *= self.epsilonDecay + + def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None: + """ + Remembers what it has learnt + :param state: the current state + :param action: the action taken + :param reward: the reward for the action + :param nextState: the next state + :param done: whether the game is finished + :return: None (void) + """ + self.memory.append((state, action, reward, nextState, done)) + + def _act(self) -> Any: + """ + Chooses an action based on the available moves + :return: the action + """ if np.random.rand() <= self.epsilon: - return random.randrange(self.action_space) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) + # choose a random action from the action spaces list + mm = MiniMax() + value, newBoard = mm.AI(3, self.colour, self.gameManager) + if newBoard is None: + return random.choice(self.actionSpace) + where = self._boardDiff(self.board, newBoard) + return self._encode(where[0]+1, where[1]+1) + + if len(self.actionSpace) == 1: + return self.actionSpace[0] + encodedMoves = np.squeeze(self.actionSpace) + encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)) + act_values = self.model.predict(self._normalise(encodedMoves)) + return self.actionSpace[np.argmax(act_values[0])] + + def resetScore(self): + self.score = 0 + + def _convertState(self, board: list) -> list: + """ + Converts the board into a 2D list of numbers + :param board: 2D list of pieces + :return: new 2D list of numbers + """ + num_board = [] + + for row in board: + num_row = [] + for piece in row: + if piece == 0: + num_row.append(0) + continue + + if piece.colour == 1: + num_row.append(1) + continue + + num_row.append(2) + + num_board.append(num_row) + + return num_board + + def _encode(self, start: tuple, end: tuple) -> int: + """ + Encodes the move into an integer + :param start: tuple of start position + :param end: tuple of end position + :return: encoded move + """ + start_row = start[0] + start_col = end[0] + + end_row = start[-1] + end_col = end[-1] + + # Concatenate into integer + return int(str(start_row) + str(start_col) + str(end_row) + str(end_col)) + + def _maxNextQ(self) -> float: + colour = WHITE if self.colour == GREEN else GREEN + encodedMoves = self._encodeMoves(colour, self.board) + if len(encodedMoves) == 0: + return -1 + paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))) + # paddedMoves = np.reshape(paddedMoves, (32, 8, 8)) + # paddedMoves = paddedMoves / np.max(paddedMoved + # paddedMoves = paddedMoves.reshape(32,) + # pm = tf.convert_to_tensor(paddedMoves, dtype=tf.float32) + # pm = tf.reshape(pm, [32]) + print(paddedMoves.shape) + nextQValues = self.model.predict_on_batch(self._normalise(paddedMoves)) + return np.max(nextQValues) + + def _encodeMoves(self, colour: int, board: Board) -> list: + """ + Encodes the moves into a list encoded moves + :param colour: colour of the player + :param board: the board + :return: list of encoded moves + """ + encodedMoves = [] + moves = board.getAllMoves(colour) + for move in moves: + where = self._boardDiff(board, move) + encodedMoves.append(self._encode(where[0]+1, where[1]+1)) + return encodedMoves + + def _boardDiff(self, board, move): + cnvState = np.array(self._convertState(board.board)) + cnvMove = np.array(self._convertState(move.board)) + diff = np.subtract(cnvMove, cnvState) + diff = np.nonzero(diff) + return diff + + def _normalise(self, data): + """ + Normalise the data + """ + for i in range(len(data)): + data[i] = data[i] / np.linalg.norm(data[i]) + return data diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..14d2031 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +conda activate +python main.py diff --git a/utilities/board.py b/utilities/Board.py similarity index 60% rename from utilities/board.py rename to utilities/Board.py index a1bf435..b4387a3 100644 --- a/utilities/board.py +++ b/utilities/Board.py @@ -1,5 +1,5 @@ import pygame - +from copy import deepcopy from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE from .piece import Piece @@ -9,34 +9,39 @@ class Board: self.board = [] self.greenLeft = self.whiteLeft = 12 self.greenKings = self.whiteKings = 0 - self.createBoard() + self.green = (144, 184, 59) + self._createBoard() - def drawSquares(self, win): + def _drawSquares(self, win): win.fill(BLACK) for row in range(ROWS): for col in range(row % 2, ROWS, 2): - pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE)) + pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE)) - def createBoard(self): + def _createBoard(self): for row in range(ROWS): self.board.append([]) for col in range(COLS): if col % 2 == ((row + 1) % 2): if row < 3: self.board[row].append(Piece(row, col, WHITE)) - elif row > 4: + continue + + if row > 4: self.board[row].append(Piece(row, col, GREEN)) - else: - self.board[row].append(None) - else: - self.board[row].append(None) + continue + + self.board[row].append(0) + continue + + self.board[row].append(0) def draw(self, win): - self.drawSquares(win) + self._drawSquares(win) for row in range(ROWS): for col in range(COLS): piece = self.board[row][col] - if piece is not None: + if piece != 0: piece.draw(win) def move(self, piece, row, col): @@ -45,19 +50,40 @@ class Board: if row == ROWS - 1 or row == 0: piece.makeKing() - if piece.colour == WHITE: - self.whiteKings += 1 - else: - self.greenKings += 1 + + if piece.colour == WHITE: + self.whiteKings += 1 + + if piece.colour == GREEN: + self.greenKings += 1 def remove(self, skipped): for piece in skipped: - self.board[piece.row][piece.col] = None - if piece is not None: + self.board[piece.row][piece.col] = 0 + if piece != 0: if piece.colour == GREEN: self.greenLeft -= 1 - else: - self.whiteLeft -= 1 + return + self.whiteLeft -= 1 + + def getAllMoves(self, colour): + moves = [] + + for piece in self.getAllPieces(colour): + validMoves = self.getValidMoves(piece) + for move, skip in validMoves.items(): + tempBoard = deepcopy(self) + tempPiece = tempBoard.getPiece(piece.row, piece.col) + newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) + moves.append(newBoard) + return moves + + def _simulateMove(self, piece, move, board, skip): + board.move(piece, move[0], move[1]) + if skip: + board.remove(skip) + + return board def getPiece(self, row, col): return self.board[row][col] @@ -65,7 +91,8 @@ class Board: def winner(self): if self.greenLeft <= 0: return WHITE - elif self.whiteLeft <= 0: + + if self.whiteLeft <= 0: return GREEN return None @@ -76,16 +103,10 @@ class Board: left = piece.col - 1 right = piece.col + 1 row = piece.row - if piece.colour == GREEN: - moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) - moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) - if piece.colour == WHITE: - moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) - moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) - - if piece.king: + if piece.colour == GREEN or piece.king: moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) + if piece.colour == WHITE or piece.king: moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) @@ -122,7 +143,7 @@ class Board: pieces = [] for row in self.board: for piece in row: - if piece is not None and piece.colour == colour: + if piece != 0 and piece.colour == colour: pieces.append(piece) return pieces @@ -162,7 +183,7 @@ class Board: def _traverse(self, row, col, skipped, moves, step, last, colour): current = self.board[row][col] - if current is None: + if current == 0: if skipped and not last: return None elif skipped: @@ -183,3 +204,56 @@ class Board: else: last = [current] return last + + def step(self, move, colour): + start, end = self._decode(move) + start[0] = start[0] - 1 + start[1] = start[1] - 1 + end[0] = end[0] - 1 + end[1] = end[1] - 1 + reward = 0 + done = False + piece = self.getPiece(start[0], start[1]) + if piece == 0: + newStart = end + end = start + start = newStart + piece = self.getPiece(start[0], start[1]) + moves = self.getValidMoves(piece) + for move, skip in moves.items(): + if tuple(end) == move: + self._simulateMove(piece, move, self, skip) + if len(skip) == 1: + reward = 2 + break + if len(skip) > 1: + reward = 3 + len(skip) * 0.2 + break + reward = -0.5 + break + + if self.winner() == colour: + done = True + reward = 10 + return reward, self, done + + + def _decode(self, move): + # Split digits back out + str_code = str(move) + print(str_code) + start_row = int(str_code[0]) + start_col = int(str_code[1]) + end_row = int(str_code[2]) + end_col = int(str_code[3]) + # Reconstruct positions + start = [start_row, start_col] + end = [end_row, end_col] + return start, end + + # def reset(self): + # self.board = [] + # self.whiteLeft = self.greenLeft = 12 + # self.whiteKings = self.greenKings = 0 + # self._createBoard() + # return self.board diff --git a/utilities/constants.py b/utilities/constants.py index 526c64e..11bb3ce 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS # RGB color -GREEN = (144, 184, 59) -WHITE = (255, 255, 255) +GREEN = 1 +WHITE = 2 BLACK = (0, 0, 0) BLUE = (0, 0, 255) GREY = (128, 128, 128) diff --git a/utilities/gameManager.py b/utilities/gameManager.py index 93db2a1..5eabd1e 100644 --- a/utilities/gameManager.py +++ b/utilities/gameManager.py @@ -1,7 +1,8 @@ import pygame -from utilities.board import Board +from utilities.Board import Board from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE + class GameManager: def __init__(self, win, colour): self._init(colour) @@ -29,14 +30,14 @@ class GameManager: self.selected = None self.select(row, col) piece = self.board.getPiece(row, col) - if piece is not None and piece.colour == self.turn: + if piece != 0 and piece.colour == self.turn: self.selected = piece self.validMoves = self.board.getValidMoves(piece) return True def _move(self, row, col): piece = self.board.getPiece(row, col) - if self.selected and piece is None and (row, col) in self.validMoves: + if self.selected and piece == 0 and (row, col) in self.validMoves: self.board.move(self.selected, row, col) skipped = self.validMoves[row, col] if self.validMoves[list(self.validMoves.keys())[0]]: @@ -58,8 +59,8 @@ class GameManager: self.validMoves = {} if self.turn == GREEN: self.turn = WHITE - else: - self.turn = GREEN + return + self.turn = GREEN def drawValidMoves(self, moves): for row, col in moves: diff --git a/utilities/piece.py b/utilities/piece.py index c808fd9..9326024 100644 --- a/utilities/piece.py +++ b/utilities/piece.py @@ -1,6 +1,6 @@ import pygame.draw -from utilities.constants import SQUARE_SIZE, GREY, CROWN +from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN class Piece: @@ -14,6 +14,8 @@ class Piece: self.calcPosition() self.padding = 20 self.border = 2 + self.green = (144, 184, 59) + self.white = (255, 255, 255) def calcPosition(self): self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2 @@ -25,7 +27,7 @@ class Piece: def draw(self, win): radius = SQUARE_SIZE // 2 - self.padding pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border) - pygame.draw.circle(win, self.colour, (self.x, self.y), radius) + pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius) if self.king: win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))