diff --git a/.idea/draughts.iml b/.idea/draughts.iml
index 3c96bee..3858d78 100644
--- a/.idea/draughts.iml
+++ b/.idea/draughts.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index d22234a..582a97a 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..a708ec7
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Report.pdf b/Report.pdf
deleted file mode 100644
index 3f309d9..0000000
Binary files a/Report.pdf and /dev/null differ
diff --git a/changeInRewards-3.txt b/changeInRewards-3.txt
new file mode 100644
index 0000000..47ba1a2
--- /dev/null
+++ b/changeInRewards-3.txt
@@ -0,0 +1,100 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/changeInRewards-5.txt b/changeInRewards-5.txt
new file mode 100644
index 0000000..47ba1a2
--- /dev/null
+++ b/changeInRewards-5.txt
@@ -0,0 +1,100 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/main.py b/main.py
index 921efef..7df3bca 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,10 @@
import sys
import pygame
+import numpy as np
+from matplotlib import pyplot as plt
+from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
from utilities.gameManager import GameManager
from minimax.minimaxAlgo import MiniMax
@@ -11,21 +14,45 @@ WIN = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Draughts")
-def getRowColFromMouse(pos):
+def getRowColFromMouse(pos: dict) -> tuple:
+ """
+ Gets the row and column from the mouse position
+ :param pos: X and Y position of the mouse
+ :return: Row and column
+ """
x, y = pos
row = y // SQUARE_SIZE
col = x // SQUARE_SIZE
return row, col
-def drawText(text, font, color, surface, x, y):
- textobj = font.render(text, 1, color)
+def drawText(text: str, font: pygame.font.SysFont, colour: tuple, surface: pygame.display, x: float, y: int) -> None:
+ """
+ Draws text on the screen
+ :param text: Text to draw
+ :param font: System font
+ :param colour: Colour of the text
+ :param surface: The display surface
+ :param x: X position of the text
+ :param y: Y position of the text
+ :return None
+ """
+ textobj = font.render(text, 1, colour)
textrect = textobj.get_rect()
textrect.topleft = (x, y)
surface.blit(textobj, textrect)
-def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
+def drawMultiLineText(surface: pygame.display, text: str, pos: dict, font: pygame.font.SysFont, colour: tuple = pygame.Color('black')) -> None:
+ """
+ Draws multiline text on the screen
+ :param surface: the display surface
+ :param text: text to draw
+ :param pos: X and Y position of the text
+ :param font: System font
+ :param colour: colour of the text
+ :return None
+ """
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
space = font.size(' ')[0] # The width of a space.
max_width, max_height = surface.get_size()
@@ -33,7 +60,7 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
word_height = None
for line in words:
for word in line:
- word_surface = font.render(word, 0, color)
+ word_surface = font.render(word, 0, colour)
word_width, word_height = word_surface.get_size()
if x + word_width >= max_width:
x = pos[0] # Reset the x.
@@ -44,80 +71,89 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
y += word_height # Start on new row.
-def main():
+def main(difficulty: int = 0) -> None:
+ """
+ Main function, that shows the menu before running the game
+ :param difficulty: difficulty of minimax
+ :return: None
+ """
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
- font = pygame.font.SysFont(None, 25)
- difficulty = 0
+ font = pygame.font.SysFont("", 25)
- while True:
- # menu
- screen.fill((128, 128, 128))
- drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
+ if difficulty == 0:
+ while True:
+ # menu
+ screen.fill((128, 128, 128))
+ drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
- mx, my = pygame.mouse.get_pos()
+ mx, my = pygame.mouse.get_pos()
- easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
- pygame.draw.rect(screen, (0, 255, 0), easy)
- drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
- medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
- pygame.draw.rect(screen, (255, 125, 0), medium)
- drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
- hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
- pygame.draw.rect(screen, (255, 0, 0), hard)
- drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
- rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
- pygame.draw.rect(screen, (0, 0, 255), rules)
- drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
- quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
- pygame.draw.rect(screen, (0, 0, 0), quitGame)
- drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
+ easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
+ pygame.draw.rect(screen, (0, 255, 0), easy)
+ drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
+ medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
+ pygame.draw.rect(screen, (255, 125, 0), medium)
+ drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
+ hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
+ pygame.draw.rect(screen, (255, 0, 0), hard)
+ drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
+ rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
+ pygame.draw.rect(screen, (0, 0, 255), rules)
+ drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
+ quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
+ pygame.draw.rect(screen, (0, 0, 0), quitGame)
+ drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
- if easy.collidepoint((mx, my)):
- if click:
- difficulty = 1
- break
- if medium.collidepoint((mx, my)):
- if click:
- difficulty = 3
- break
- if hard.collidepoint((mx, my)):
- if click:
- difficulty = 5
- break
- if rules.collidepoint((mx, my)):
- if click:
- rulesGUI()
- break
- if quitGame.collidepoint((mx, my)):
- if click:
- pygame.quit()
- sys.exit()
- click = False
- for event in pygame.event.get():
- if event.type == pygame.QUIT:
- pygame.quit()
- sys.exit()
- if event.type == pygame.MOUSEBUTTONDOWN:
- if event.button == 1:
- click = True
+ if easy.collidepoint((mx, my)):
+ if click:
+ difficulty = 1
+ break
+ if medium.collidepoint((mx, my)):
+ if click:
+ difficulty = 3
+ break
+ if hard.collidepoint((mx, my)):
+ if click:
+ difficulty = 5
+ break
+ if rules.collidepoint((mx, my)):
+ if click:
+ rulesGUI()
+ break
+ if quitGame.collidepoint((mx, my)):
+ if click:
+ pygame.quit()
+ sys.exit()
+ click = False
+ for event in pygame.event.get():
+ if event.type == pygame.QUIT:
+ pygame.quit()
+ sys.exit()
+ if event.type == pygame.MOUSEBUTTONDOWN:
+ if event.button == 1:
+ click = True
- pygame.display.update()
- menuClock.tick(60)
- if difficulty != 0:
- game(difficulty)
+ pygame.display.update()
+ menuClock.tick(60)
+
+ game(difficulty)
-def rulesGUI():
+def rulesGUI() -> None:
+ """
+ Shows the rules of the game
+ :return: None
+ """
screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
- titleFont = pygame.font.SysFont(None, 48)
- font = pygame.font.SysFont(None, 21)
+ titleFont = pygame.font.SysFont("", 48)
+ font = pygame.font.SysFont("", 21)
while True:
screen.fill((128, 128, 128))
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@@ -171,43 +207,116 @@ multi-jump until the next move.""", (50, 50), font)
menuClock.tick(60)
-def game(difficulty):
+def game(difficulty: int) -> None:
+ """
+ Runs the game with the given difficulty. Used for training and testing the RL algorithm
+ :param difficulty: The difficulty of the minimax algorithm
+ """
run = True
clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN)
+ rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
+ # model = rl.buildMainModel()
+ rl.model.load_weights("./modelWeights/model_final.h5")
+ mm = MiniMax()
+ totalReward = []
+ winners = []
+ for i in range(50):
+ score = 0
+ for j in range(200):
+ print(j)
+ clock.tick(FPS)
+ reward = 0
+ if gameManager.turn == WHITE:
+ # mm = MiniMax()
+ # value, newBoard = mm.AI(difficulty, WHITE, gameManager)
+ # gameManager.aiMove(newBoard)
+ # reward, newBoard = rl.AITrain(gameManager.board)
+ newBoard = rl.AITest(gameManager.board)
- while run:
- clock.tick(FPS)
+ if newBoard is None:
+ print("Cannot make move")
+ continue
+ gameManager.aiMove(newBoard)
- if gameManager.turn == WHITE:
- mm = MiniMax()
- value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
- gameManager.aiMove(newBoard)
- # time.sleep(0.15)
+ gameManager.update()
+ pygame.display.update()
- if gameManager.turn == GREEN:
- mm = MiniMax()
- value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
- gameManager.aiMove(newBoard)
- # time.sleep(0.15)
+ if gameManager.turn == GREEN:
+ value, newBoard = mm.AI(difficulty, GREEN, gameManager)
+ gameManager.aiMove(newBoard)
- if gameManager.winner() != None:
- print(gameManager.winner())
- run = False
+ score += reward
- for event in pygame.event.get():
- if event.type == pygame.QUIT:
- run = False
- if event.type == pygame.MOUSEBUTTONDOWN:
- pos = pygame.mouse.get_pos()
- row, col = getRowColFromMouse(pos)
- # if gameManager.turn == GREEN:
- gameManager.select(row, col)
+ if gameManager.winner() is not None:
+ print("Green" if gameManager.winner() == GREEN else "White", " wins")
+ # with open(f"winners-{difficulty}.txt", "a+") as f:
+ # f.write(str(gameManager.winner()) + "\n")
+ winners.append(gameManager.winner())
+ break
- gameManager.update()
- pygame.display.update()
+ # for event in pygame.event.get():
+ # if event.type == pygame.QUIT:
+ # break
+ # if event.type == pygame.MOUSEBUTTONDOWN:
+ # pos = pygame.mouse.get_pos()
+ # row, col = getRowColFromMouse(pos)
+ # # if gameManager.turn == GREEN:
+ # gameManager.select(row, col)
+ gameManager.update()
+ pygame.display.update()
+
+ if gameManager.winner() is None:
+ # with open(f"winners-{difficulty}.txt", "a+") as f:
+ # f.write(str(0) + "\n")
+ winners.append(0)
+ gameManager.reset()
+ rl.resetScore()
+ print("Game: ", i, " Reward: ", score)
+ # with open(f"rewards-{difficulty}.txt", "a+") as f:
+ # f.write(str(score) + "\n")
+
+ totalReward.append(score)
+ # save model weights every 25 games
+ # if i % 250 == 0 and i != 0:
+ # rl.model.save("./modelWeights/model_" + str(i) + ".h5")
# pygame.quit()
+ # rl.model.save("./modelWeights/model_final.h5")
+ change_in_rewards = [0] # Initialize with 0 for the first episode
+ for i in range(1, len(totalReward)):
+ change_in_reward = totalReward[i] - totalReward[i - 1]
+ change_in_rewards.append(change_in_reward)
-main()
+ # with open(f"changeInRewards-{difficulty}.txt", "a+") as f:
+ # for i in change_in_rewards:
+ # f.write(str(i) + "\n")
+
+ # episodes = list(range(1, len(totalReward) + 1))
+ #
+ # plt.plot(episodes, change_in_rewards)
+ # plt.xlabel('Training Games')
+ # plt.ylabel('Change in Game Reward')
+ # plt.title('Change in Game Reward vs. Training Games')
+ # plt.grid(True)
+ # plt.show()
+ #
+ # plt.plot([i for i in range(len(totalReward))], totalReward)
+ # plt.xlabel("Games")
+ # plt.ylabel("Reward")
+ # plt.show()
+
+ fig, ax = plt.subplots()
+ bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+ ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
+ ax.set_title(f"Winners for difficulty — {difficulty}")
+ ax.bar_label(bar)
+ plt.show()
+
+
+# difficulties = [3, 5, 7, 9]
+#
+# for diff in difficulties:
+# main(diff)
+main(3)
diff --git a/minimax/minimaxAlgo.py b/minimax/minimaxAlgo.py
index 063b8cd..d6d2af0 100644
--- a/minimax/minimaxAlgo.py
+++ b/minimax/minimaxAlgo.py
@@ -1,56 +1,46 @@
import random
-from copy import deepcopy
from math import inf
from utilities.constants import GREEN, WHITE
+from utilities.gameManager import GameManager
-class MiniMax():
+class MiniMax:
- def AI(self, board, depth, maxPlayer, gameManager):
- if depth == 0 or board.winner() is not None:
- return board.scoreOfTheBoard(), board
+ def AI(self, depth: int, maxPlayer: int, gameManager: GameManager) -> tuple:
+ """
+ The minimax algorithm
+ :param depth: How deep the algorithm should go
+ :param maxPlayer: The current player
+ :param gameManager: The game manager
+ :return: the best evaluation and board
+ """
+ if depth == 0 or gameManager.board.winner() is not None:
+ return gameManager.board.scoreOfTheBoard(), gameManager.board
- if maxPlayer:
+ if type(maxPlayer) == int:
maxEval = -inf
bestMove = None
- for move in self.getAllMoves(board, maxPlayer):
- evaluation = self.AI(move, depth - 1, False, gameManager)[0]
+ for move in gameManager.board.getAllMoves(maxPlayer):
+ evaluation = self.AI(depth - 1, False, gameManager)[0]
maxEval = max(maxEval, evaluation)
if maxEval > evaluation:
bestMove = move
if maxEval == evaluation:
+ # bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return maxEval, bestMove
else:
minEval = inf
bestMove = None
colour = WHITE if gameManager.turn == GREEN else GREEN
- for move in self.getAllMoves(board, colour):
- evaluation = self.AI(move, depth - 1, True, gameManager)[0]
+ for move in gameManager.board.getAllMoves(colour):
+ evaluation = self.AI(depth - 1, True, gameManager)[0]
minEval = min(minEval, evaluation)
if minEval < evaluation:
bestMove = move
if minEval == evaluation:
+ # bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return minEval, bestMove
-
- def _simulateMove(self, piece, move, board, skip):
- board.move(piece, move[0], move[1])
- if skip:
- board.remove(skip)
-
- return board
-
- def getAllMoves(self, board, colour):
- moves = []
-
- for piece in board.getAllPieces(colour):
- validMoves = board.getValidMoves(piece)
- for move, skip in validMoves.items():
- tempBoard = deepcopy(board)
- tempPiece = tempBoard.getPiece(piece.row, piece.col)
- newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
- moves.append(newBoard)
- return moves
diff --git a/modelWeights/model_250.h5 b/modelWeights/model_250.h5
new file mode 100644
index 0000000..dc44e7a
Binary files /dev/null and b/modelWeights/model_250.h5 differ
diff --git a/modelWeights/model_final.h5 b/modelWeights/model_final.h5
new file mode 100644
index 0000000..f7dfd74
Binary files /dev/null and b/modelWeights/model_final.h5 differ
diff --git a/reinforcementLearning/ReinforcementLearning.py b/reinforcementLearning/ReinforcementLearning.py
index e554162..1addec2 100644
--- a/reinforcementLearning/ReinforcementLearning.py
+++ b/reinforcementLearning/ReinforcementLearning.py
@@ -1,96 +1,280 @@
import random
from collections import deque
+from typing import Any
+from copy import deepcopy
import numpy as np
import tensorflow as tf
-from tensorflow.python.keras import Sequential, regularizers
-from tensorflow.python.keras.layers import Dense
+from keras.engine.input_layer import InputLayer
+from keras.layers import BatchNormalization
+from tensorflow.python.keras import Sequential, regularizers, Input
+from tensorflow.python.keras.layers import Dense, Lambda, Dropout
+from tensorflow.python.keras.optimizer_v2.adam import Adam
+
+from minimax.minimaxAlgo import MiniMax
+from utilities import Board
+from utilities.constants import WHITE, GREEN
+from utilities.gameManager import GameManager
class ReinforcementLearning():
- def __init__(self, action_space, state_space, env):
- self.action_space = action_space
- self.state_space = state_space
- self.env = env
+ def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
+ """
+ Constructor for the ReinforcementLearning class
+ :param actionSpace: The number of possible actions
+ :param board: The game board
+ """
+ self.gameManager = gameManager
+ self.actionSpace = actionSpace
+ self.board = board
+ self.state = self.board.board
+ self.colour = colour
+ self.score = 0
self.epsilon = 1
self.gamma = .95
- self.batch_size = 64
- self.epsilon_min = .01
- self.epsilon_decay = .995
- self.learning_rate = 0.001
- self.memory = deque(maxlen=100000)
- self.model = self._buildModel()
+ self.batchSize = 512
+ self.maxSize = 32
+ self.epsilonMin = .01
+ self.epsilonDecay = .995
+ self.learningRate = 0.0001
+ self.memory = deque(maxlen=10000000)
+ self.model = self.buildMainModel()
+ print(self.model.summary())
- def AI(self, episode):
- loss = []
+ def AITrain(self, board: Board) -> tuple:
+ """
+ Learns to play the draughts game
+ :return: The loss
+ """
+ self.board = board
+ self.state = self._convertState(self.board.board)
+ self.actionSpace = self.encodeMoves(self.colour, self.board)
+ if len(self.actionSpace) == 0:
+ return self.score, None
- max_steps = 1000
+ action = self._act()
+ reward, nextState, done = self.board.step(action, self.colour)
+ self.score += reward
+ self.state = self._convertState(nextState.board)
+ self._remember(deepcopy(self.board), action, reward, self.state, done)
+ self._replay()
- for e in range(episode):
- state = self.env.reset()
- state = np.reshape(state, (1, self.state_space))
- score = 0
- for i in range(max_steps):
- action = self.act(state)
- reward, next_state, done = self.env.step(action)
- score += reward
- next_state = np.reshape(next_state, (1, self.state_space))
- self.remember(state, action, reward, next_state, done)
- state = next_state
- self.replay()
- if done:
- print("episode: {}/{}, score: {}".format(e, episode, score))
- break
- loss.append(score)
+ return self.score, nextState
- def _buildModel(self):
+ def AITest(self, board: Board) -> Board:
+ """
+ Runs the AI
+ :param board: The board
+ :return: The new board
+ """
+ actionSpace = self.encodeMoves(WHITE, board)
+ if len(actionSpace) == 0:
+ print("Cannot make move")
+ return None
+ totalMoves = len(actionSpace)
+ # moves = np.squeeze(moves)
+ moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
+ act_values = self.model.predict(self.normalise(moves))
+ val = np.argmax(act_values[0])
+ val = val if val < totalMoves else totalMoves - 1
+ reward, newBoard, done = board.step(actionSpace[val], WHITE)
+ return newBoard
+
+ def buildMainModel(self) -> Sequential:
+ """
+ Build the model for the AI
+ :return: The model
+ """
# Board model
- board_model = Sequential()
+ modelLayers = [
+ Lambda(lambda x: tf.reshape(x, [-1, 32])),
+ Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
+ ]
+ boardModel = Sequential(modelLayers)
- # input dimensions is 32 board position values
- board_model.add(Dense(64, activation='relu', input_dim=32))
+ # boardModel.add(BatchNormalization())
+ boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
+ boardModel.build(input_shape=(None, None))
- # use regularizers, to prevent fitting noisy labels
- board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
- board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
- board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
+ return boardModel
- # output isn't squashed, because it might lose information
- board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
- board_model.compile(optimizer='nadam', loss='binary_crossentropy')
-
- return board_model
-
- def remember(self, state, action, reward, next_state, done):
- self.memory.append((state, action, reward, next_state, done))
-
- def replay(self):
- if len(self.memory) < self.batch_size:
+ def _replay(self) -> None:
+ """
+ trains the model
+ :return: None
+ """
+ if len(self.memory) < self.batchSize:
+ # Not enough data to replay and test the model
return
- minibatch = random.sample(self.memory, self.batch_size)
- states = np.array([i[0] for i in minibatch])
- actions = np.array([i[1] for i in minibatch])
- rewards = np.array([i[2] for i in minibatch])
- next_states = np.array([i[3] for i in minibatch])
- dones = np.array([i[4] for i in minibatch])
+ # Get a random sample from the memory
+ minibatch = random.sample(self.memory, int(self.maxSize))
- states = np.squeeze(states)
- next_states = np.squeeze(next_states)
+ # Extract states, rewards, dones
+ states = [m[0] for m in minibatch]
+ rewards = [m[2] for m in minibatch]
+ dones = [m[4] for m in minibatch]
- targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
- targets_full = self.model.predict_on_batch(states)
+ # Encoded moves
+ encodedMoves = []
+ for state in states:
+ encodedMoves.append(self.encodeMoves(self.colour, state))
- ind = np.array([i for i in range(self.batch_size)])
- targets_full[[ind], [actions]] = targets
+ # Calculate targets
+ targets = []
+ for i, moves in enumerate(encodedMoves):
+ if dones[i]:
+ target = rewards[i]
+ else:
+ target = rewards[i] + self.gamma * self._maxNextQ()
- self.model.fit(states, targets_full, epochs=1, verbose=0)
- if self.epsilon > self.epsilon_min:
- self.epsilon *= self.epsilon_decay
+ targets.append(target)
- def act(self, state):
+ encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
+ for m in encodedMoves])
+ targets = np.array(targets)
+ self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
+ if self.epsilon > self.epsilonMin:
+ self.epsilon *= self.epsilonDecay
+
+ def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
+ """
+ Remembers what it has learnt
+ :param state: The current state
+ :param action: The action taken
+ :param reward: The reward for the action
+ :param nextState: The next state
+ :param done: Whether the game is finished
+ :return: None
+ """
+ self.memory.append((state, action, reward, nextState, done))
+
+ def _act(self) -> Any:
+ """
+ Chooses an action based on the available moves
+ :return: The action
+ """
if np.random.rand() <= self.epsilon:
- return random.randrange(self.action_space)
- act_values = self.model.predict(state)
- return np.argmax(act_values[0])
+ # choose a random action from the action spaces list
+ mm = MiniMax()
+ value, newBoard = mm.AI(3, self.colour, self.gameManager)
+ if newBoard is None:
+ return random.choice(self.actionSpace)
+ where = self._boardDiff(self.board, newBoard)
+ return self._encode(where[0]+1, where[1]+1)
+
+ if len(self.actionSpace) == 1:
+ return self.actionSpace[0]
+ encodedMoves = np.squeeze(self.actionSpace)
+ encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
+ actValues = self.model.predict(self.normalise(encodedMoves))
+ val = np.argmax(actValues[0])
+ val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
+ return self.actionSpace[val]
+
+ def resetScore(self) -> None:
+ """
+ Resets the score
+ :return: None
+ """
+ self.score = 0
+
+ def _convertState(self, board: list) -> list:
+ """
+ Converts the board into a 2D list of numbers
+ :param board: 2D list of pieces
+ :return: new 2D list of numbers
+ """
+ num_board = []
+
+ for row in board:
+ num_row = []
+ for piece in row:
+ if piece == 0:
+ num_row.append(0)
+ continue
+
+ if piece.colour == 1:
+ num_row.append(1)
+ continue
+
+ num_row.append(2)
+
+ num_board.append(num_row)
+
+ return num_board
+
+ def _encode(self, start: tuple, end: tuple) -> int:
+ """
+ Encodes the move into an integer
+ :param start: Tuple of start position
+ :param end: Tuple of end position
+ :return: Encoded move
+ """
+ start_row = start[0]
+ start_col = end[0]
+
+ end_row = start[-1]
+ end_col = end[-1]
+
+ # Concatenate into integer
+ return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
+
+ def _maxNextQ(self) -> float:
+ """
+ Calculates the max Q value for the next state
+ :return: the max Q value
+ """
+ colour = WHITE if self.colour == GREEN else GREEN
+ encodedMoves = self.encodeMoves(colour, self.board)
+ if len(encodedMoves) == 0:
+ return -1
+ paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
+ nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
+ return np.max(nextQValues)
+
+ def encodeMoves(self, colour: int, board: Board) -> list:
+ """
+ Encodes the moves into a list encoded moves
+ :param colour: Colour of the player
+ :param board: The board
+ :return: list Of encoded moves
+ """
+ encodedMoves = []
+ moves = board.getAllMoves(colour)
+ for move in moves:
+ where = self._boardDiff(board, move)
+ encodedMoves.append(self._encode(where[0]+1, where[1]+1))
+ return encodedMoves
+
+ def _boardDiff(self, board: Board, move: Board) -> np.array:
+ """
+ Finds the difference between the two boards
+ :param board: The current board
+ :param move: The new board
+ :return: the difference between the two boards
+ """
+ cnvState = np.array(self._convertState(board.board))
+ cnvMove = np.array(self._convertState(move.board))
+ diff = np.subtract(cnvMove, cnvState)
+ diff = np.nonzero(diff)
+ return diff
+
+ def normalise(self, data: np.array) -> np.array:
+ """
+ Normalise the data
+ :param data: the data to normalise
+ :return: normalised data
+ """
+ return data / 10000
\ No newline at end of file
diff --git a/results.py b/results.py
new file mode 100644
index 0000000..7d0b17d
--- /dev/null
+++ b/results.py
@@ -0,0 +1,80 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from utilities.constants import GREEN, WHITE
+
+# winners = []
+with open("winners-5.txt", "r") as f:
+ winners = f.readlines()
+
+winners = [int(x.strip()) for x in winners]
+
+# lavg = []
+# for i in range(0, len(winners), 25):
+# lavg.append(winners[i:i+25].count(2) / 25)
+#
+# x = np.arange(0, len(lavg))
+# y = np.array(lavg) * 100
+#
+# a, b = np.polyfit(x, y, 1)
+#
+# fig, ax = plt.subplots(figsize=(10, 5))
+# ax.plot(y)
+# ax.set_xticks(np.arange(0, len(lavg), 2))
+# ax.minorticks_on()
+# ax.plot(x, a*x+b, color='red', linestyle='--', linewidth=2)
+# ax.set_ylim([0, 100])
+# ax.set_title("Winners Average")
+# ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
+# ax.grid(which='minor', linestyle=':', linewidth='0.5')
+# ax.set_xlabel("Average Set")
+# ax.set_ylabel("Percentage of Wins")
+# ax.tick_params(which="minor", bottom=False, left=False)
+# plt.show()
+
+fig, ax = plt.subplots()
+bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 100])
+ax.set_title("Winners at Depth 5")
+ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey', axis='y')
+ax.bar_label(bar)
+plt.show()
+
+# with open("trainedRewards.txt", "r") as f:
+# totalReward = f.readlines()
+#
+# totalReward = [float(x.strip()) for x in totalReward]
+# filteredReward = list(filter(lambda x: x > -1500, totalReward))
+
+# change_in_rewards = [0] # Initialize with 0 for the first episode
+# for i in range(1, len(totalReward)):
+# change_in_reward = totalReward[i] - totalReward[i - 1]
+# change_in_rewards.append(change_in_reward)
+#
+# games = list(range(1, len(totalReward) + 1))
+
+# plt.plot(games, change_in_rewards)
+# plt.xlabel('Training Games')
+# plt.ylabel('Change in Game Reward')
+# plt.title('Change in Game Reward vs. Training Games')
+# plt.grid(True)
+# plt.show()
+# major_ticks = np.arange(0, 101, 20)
+# minor_ticks = np.arange(0, 101, 5)
+#
+# plt.plot([i for i in range(len(totalReward))], totalReward)
+# plt.title("Rewards to Games")
+# plt.xlabel("Games")
+# plt.ylabel("Reward")
+# plt.xticks(major_ticks)
+# plt.xticks(minor_ticks, minor=True)
+# plt.yticks(major_ticks)
+# plt.yticks(minor_ticks, minor=True)
+# plt.grid(which='both')
+# plt.show()
+#
+# plt.plot([i for i in range(len(filteredReward))], filteredReward)
+# plt.title("Filtered Rewards to Games")
+# plt.xlabel("Games")
+# plt.ylabel("Reward")
+# plt.grid(which='both')
+# plt.show()
diff --git a/rewards-5.txt b/rewards-5.txt
new file mode 100644
index 0000000..5688a80
--- /dev/null
+++ b/rewards-5.txt
@@ -0,0 +1,56 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/rewardsA.png b/rewardsA.png
new file mode 100644
index 0000000..f29c5f3
Binary files /dev/null and b/rewardsA.png differ
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..14d2031
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,2 @@
+conda activate
+python main.py
diff --git a/trainedRewards.txt b/trainedRewards.txt
new file mode 100644
index 0000000..9a20a5d
--- /dev/null
+++ b/trainedRewards.txt
@@ -0,0 +1,500 @@
+180.5
+115.19999999999999
+-155.39999999999998
+-5169.4000000000015
+100.0
+-3354.2999999999956
+123.79999999999998
+-1738.0
+261.40000000000015
+120.89999999999999
+147.80000000000004
+108.0
+113.50000000000001
+110.5000000000002
+-1048.3000000000006
+75.8
+232.70000000000016
+89.10000000000001
+279.9000000000002
+165.40000000000003
+85.4
+34.20000000000016
+266.20000000000016
+101.69999999999999
+283.0
+-264.5
+225.0
+328.0
+215.5
+150.0
+-217.5
+-2920.0
+82.5
+-208.5
+150.5
+196.5
+223.0
+265.5
+-282.5
+175.5
+206.5
+221.5
+127.5
+-6337.5
+147.5
+231.5
+137.5
+-180.5
+108.0
+-339.5
+190.0
+-69.0
+52.5
+58.0
+-5575.0
+-159.5
+197.5
+177.5
+-5547.5
+-65.5
+136.5
+292.5
+-169.5
+185.0
+115.5
+198.0
+30.0
+162.5
+95.5
+170.0
+113.0
+-1405.0
+-27.0
+-4832.199999999999
+147.5
+228.0
+59.0
+262.5
+-220.0
+150.5
+177.5
+140.0
+123.0
+119.0
+137.5
+134.0
+175.5
+-5598.5
+46.5
+135.0
+205.0
+186.5
+177.5
+120.1
+332.5
+162.5
+122.5
+262.5
+-70.0
+159.0
+138.0
+240.5
+215.0
+147.5
+-118.0
+260.5
+199.0
+130.0
+265.0
+142.5
+230.0
+135.0
+197.5
+-179.5
+198.0
+288.0
+200.5
+-222.5
+165.5
+139.0
+228.0
+211.5
+197.5
+102.5
+233.0
+95.5
+-129.0
+187.5
+158.0
+295.0
+240.5
+-222.5
+-1841.5
+198.0
+113.0
+305.0
+-482.5
+125.5
+215.0
+110.0
+-180.0
+170.0
+-62.5
+215.5
+132.5
+187.5
+135.0
+-65.0
+138.0
+-1972.0
+240.5
+-237.5
+610.0
+267.5
+52.5
+-211.5
+217.5
+88.0
+305.5
+165.5
+115.0
+182.5
+-69.5
+333.0
+363.0
+112.5
+-15.5
+150.5
+118.0
+-52.5
+318.0
+174.0
+198.0
+-5705.0
+160.5
+155.0
+125.0
+165.0
+259.0
+165.5
+155.0
+-236.0
+220.5
+-15.5
+117.5
+367.5
+237.5
+255.0
+85.0
+-5342.5
+141.5
+-3582.5
+-600.0
+915.5
+179.0
+190.0
+-47.5
+275.5
+-5.0
+195.0
+128.0
+146.5
+750.5
+153.0
+-5157.5
+-279.5
+219.0
+154.0
+153.0
+-234.5
+248.0
+182.5
+122.5
+155.5
+1078.0
+102.5
+358.0
+152.5
+261.5
+239.0
+128.0
+111.5
+93.0
+310.5
+-87.0
+158.0
+113.0
+165.5
+120.0
+256.5
+90.5
+245.0
+159.0
+160.0
+-5272.0
+-88.5
+159.0
+169.0
+147.5
+-1149.5
+-372.0
+-270.0
+95.0
+142.5
+212.5
+154.0
+425.0
+153.0
+213.0
+280.5
+-80.5
+-45.90000000000003
+-2250.5
+123.50000000000003
+149.40000000000006
+219.0
+108.0
+180.0
+271.19999999999993
+202.5
+121.8000000000001
+47.599999999999966
+-35.0
+281.5
+307.5
+99.80000000000001
+154.0
+166.30000000000004
+271.5
+205.5
+145.5
+265.0
+113.0
+144.0
+88.0
+-204.5
+204.0
+215.0
+177.5
+168.0
+263.0
+66.5
+258.0
+-5477.5
+94.5
+-139.0
+190.5
+160.0
+-35.5
+149.0
+100.5
+130.0
+-40.0
+175.0
+132.5
+107.5
+143.0
+-5097.5
+97.5
+-1880.0
+-15.0
+213.0
+-601.0
+282.5
+276.5
+113.0
+106.5
+-1011.5
+128.0
+150.0
+145.5
+233.0
+209.0
+136.5
+240.0
+7.5
+-1535.0
+238.0
+185.0
+157.5
+-1660.0
+-15.5
+-145.0
+178.0
+-4997.5
+182.5
+197.5
+355.5
+130.0
+232.5
+-5420.0
+190.0
+128.0
+115.0
+2.5
+149.0
+220.0
+-87.0
+-447.5
+-4122.5
+-67.5
+-425.0
+283.0
+925.0
+49.5
+-15.0
+233.0
+215.5
+234.0
+154.0
+141.5
+226.5
+220.0
+110.5
+270.0
+253.0
+-1944.0
+215.0
+250.5
+155.0
+260.5
+185.0
+261.5
+232.5
+177.5
+-97.5
+-196.0
+230.0
+205.5
+-367.0
+265.5
+180.0
+135.5
+139.0
+103.0
+314.0
+192.5
+179.0
+97.5
+52.5
+135.0
+184.0
+-305.0
+147.5
+206.5
+157.5
+243.0
+-6125.0
+257.5
+125.60000000000002
+190.0
+-6225.0
+96.5
+350.0
+193.0
+185.5
+206.5
+223.0
+-225.0
+117.5
+170.0
+223.0
+175.5
+210.0
+-222.0
+148.60000000000002
+-133.0
+-193.5
+152.5
+-152.0
+-6245.0
+-2.0
+50.5
+-140.5
+185.5
+125.5
+208.0
+-200.0
+202.5
+112.5
+119.0
+210.5
+-1199.5
+-2.5
+102.5
+0.5
+275.5
+135.5
+-32.5
+235.5
+-617.0
+110.0
+222.5
+-372.0
+-53.0
+306.5
+117.5
+-5095.0
+223.0
+-257.0
+-5760.0
+11.5
+182.5
+160.0
+325.5
+151.5
+-327.5
+-5655.0
+62.5
+-5550.0
+195.0
+92.5
+-5290.0
+215.0
+59.0
+179.0
+188.0
+-2103.5
+253.0
+118.0
+335.0
+85.0
+207.5
+229.0
+152.5
+-188.5
+-177.5
+274.0
+220.0
+-5169.5
+128.0
+-1260.0
+140.0
+147.5
+140.0
+-505.0
+155.0
+225.5
+188.0
+131.5
+1111.5
+180.0
+-4897.5
+-687.5
+125.0
+180.0
+111.5
+-5582.5
+232.5
+153.0
+-130.5
+102.5
+189.0
+157.5
+-5685.0
+325.0
+-6870.0
+-520.5
+-3027.0
+32.5
diff --git a/trainedWinners.txt b/trainedWinners.txt
new file mode 100644
index 0000000..8105b8b
--- /dev/null
+++ b/trainedWinners.txt
@@ -0,0 +1,500 @@
+2
+0
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+2
+1
+2
+2
+0
+2
+2
+2
+0
+1
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+2
+1
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+22
+2
+2
+1
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+1
+1
+2
+2
+1
+2
+2
+2
+1
+2
+2
+2
+2
+0
+2
+2
+2
+2
+1
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+1
+1
+2
+2
+0
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+1
+2
+0
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+3
+2
+2
+2
+2
+2
+1
+2
+2
+2
+0
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+0
+2
+2
+0
+2
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+0
+2
+2
+1
+2
+0
+2
+0
+2
+0
+1
+2
+2
diff --git a/utilities/Board.py b/utilities/Board.py
new file mode 100644
index 0000000..ebd53c5
--- /dev/null
+++ b/utilities/Board.py
@@ -0,0 +1,390 @@
+from __future__ import annotations
+
+import pygame
+from copy import deepcopy
+from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
+from .piece import Piece
+
+
+class Board:
+ def __init__(self) -> None:
+ """
+ Constructor for the Board class
+ :return: None
+ """
+ self.board = []
+ self.greenLeft = self.whiteLeft = 12
+ self.greenKings = self.whiteKings = 0
+ self.green = (144, 184, 59)
+ self._createBoard()
+
+ def _drawSquares(self, win: pygame.display) -> None:
+ """
+ Draws the squares on the board
+ :param win: The window
+ """
+ win.fill(BLACK)
+ for row in range(ROWS):
+ for col in range(row % 2, ROWS, 2):
+ pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
+
+ def _createBoard(self) -> None:
+ """
+ Creates a board representation of the game
+ :return: None
+ """
+ for row in range(ROWS):
+ self.board.append([])
+ for col in range(COLS):
+ if col % 2 == ((row + 1) % 2):
+ if row < 3:
+ self.board[row].append(Piece(row, col, WHITE))
+ continue
+
+ if row > 4:
+ self.board[row].append(Piece(row, col, GREEN))
+ continue
+
+ self.board[row].append(0)
+ continue
+
+ self.board[row].append(0)
+
+ def draw(self, win: pygame.display) -> None:
+ """
+ Draws the pieces on the board
+ :param win: The window
+ :return: None
+ """
+ self._drawSquares(win)
+ for row in range(ROWS):
+ for col in range(COLS):
+ piece = self.board[row][col]
+ if piece != 0:
+ piece.draw(win)
+
+ def move(self, piece: Piece, row: int, col: int) -> None:
+ """
+ Moves a piece and make it a king if it reaches the end of the board
+ :param piece: Piece to move
+ :param row: Row to move to
+ :param col: Column to move to
+ :return: None
+ """
+ self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
+ piece.move(row, col)
+
+ if row == ROWS - 1 or row == 0:
+ piece.makeKing()
+
+ if piece.colour == WHITE:
+ self.whiteKings += 1
+
+ if piece.colour == GREEN:
+ self.greenKings += 1
+
+ def remove(self, skipped: tuple) -> None:
+ """
+ Removes a piece from the board
+ :param skipped: A tuple of the piece to remove
+ """
+ for piece in skipped:
+ self.board[piece.row][piece.col] = 0
+ if piece != 0:
+ if piece.colour == GREEN:
+ self.greenLeft -= 1
+ continue
+ self.whiteLeft -= 1
+
+ def getAllMoves(self, colour: int) -> list:
+ """
+ Gets all the possible moves for a player
+ :param colour: colour of the player
+ :return:
+ """
+ moves = []
+ possibleMoves = []
+ possiblePieces = []
+ pieces = self.getAllPieces(colour)
+ hasForcedCapture = False
+
+ for piece in pieces:
+ validMoves = self.getValidMoves(piece)
+
+ # Check if there are forced capture moves for this piece
+ forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
+ if forcedCaptureMoves:
+ hasForcedCapture = True
+ possiblePieces.append(piece)
+ possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
+
+ if hasForcedCapture:
+ # If there are forced capture moves, consider only those
+ for i in range(len(possibleMoves)):
+ for move, skip in possibleMoves[i].items():
+ tempBoard = deepcopy(self)
+ tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
+ newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+ moves.append(newBoard)
+ else:
+ # If no forced capture moves, consider all valid moves
+ for piece in pieces:
+ validMoves = self.getValidMoves(piece)
+ for move, skip in validMoves.items():
+ tempBoard = deepcopy(self)
+ tempPiece = tempBoard.getPiece(piece.row, piece.col)
+ newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+ moves.append(newBoard)
+
+ return moves
+
+ def _simulateMove(self, piece: Piece, move: list, board: Board, skip: tuple) -> Board:
+ """
+ Simulates a move on the board
+ :param piece: Piece to move
+ :param move: Move to make
+ :param board: Board to make the move on
+ :param skip: Tuple of pieces to skip
+ :return: Board after the move
+ """
+ board.move(piece, move[0], move[1])
+ if skip:
+ board.remove(skip)
+
+ return board
+
+ def getPiece(self, row: int, col: int) -> Piece:
+ """
+ Gets a piece from the board
+ :param row: Row of the piece
+ :param col: Column of the piece
+ :return: Piece
+ """
+ return self.board[row][col]
+
+ def winner(self):
+ if self.greenLeft <= 0:
+ return WHITE
+
+ if self.whiteLeft <= 0:
+ return GREEN
+
+ return None
+
+ def getValidMoves(self, piece: Piece) -> dict:
+ """
+ Gets all the valid moves for a piece
+ :param piece: Piece to get the moves for
+ :return: dictionary of moves
+ """
+ moves = {}
+ forcedCapture = {}
+ left = piece.col - 1
+ right = piece.col + 1
+ row = piece.row
+ if piece.colour == GREEN or piece.king:
+ moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
+ moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
+ if piece.colour == WHITE or piece.king:
+ moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
+ moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
+
+ if len(moves.values()) <= 1:
+ return moves
+
+ movesValues = list(moves.values())
+ movesKeys = list(moves.keys())
+
+ forced = {}
+
+ for i in range(len(movesKeys)):
+ if not movesValues[i]:
+ forced[movesKeys[i]] = moves[movesKeys[i]]
+ if len(forced) != len(moves):
+ forced.clear()
+ for i in range(len(movesKeys)):
+ if movesValues[i]:
+ forced[movesKeys[i]] = moves[movesKeys[i]]
+ if len(forced) != len(moves):
+ for i in range(len(movesKeys)):
+ if movesValues[i]:
+ forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
+ else:
+ forcedCapture = forced
+ else:
+ forcedCapture = forced
+
+ return forcedCapture
+
+ def scoreOfTheBoard(self) -> int:
+ """
+ Calculates the score of the board
+ :return: score of the board
+ """
+ return self.whiteLeft - self.greenLeft
+
+ def getAllPieces(self, colour):
+ """
+ Gets all the pieces of a player
+ :param colour: Piece colour
+ :return: Pieces of the player
+ """
+ pieces = []
+ for row in self.board:
+ for piece in row:
+ if piece != 0 and piece.colour == colour:
+ pieces.append(piece)
+ return pieces
+
+ def _traverseLeft(self, start: int, stop: int, step: int, colour: int, left: int, skipped: list = []) -> dict:
+ """
+ Traverses the left side of the board
+ :param start: Start position
+ :param stop: Stop position
+ :param step: Step size
+ :param colour: colour of the player
+ :param left: Left position
+ :param skipped: List of pieces to skip
+ :return: dictionary of moves
+ """
+ moves = {}
+ last = []
+ for row in range(start, stop, step):
+ if left < 0:
+ break
+ mvs = self._traverse(row, left, skipped, moves, step, last, colour)
+ if mvs is None:
+ break
+ elif isinstance(mvs, list):
+ last = mvs
+ else:
+ moves.update(mvs)
+ left -= 1
+ return moves
+
+ def _traverseRight(self, start: int, stop: int, step: int, colour: int, right: int, skipped: list = []) -> dict:
+ """
+ Traverses the left side of the board
+ :param start: Start position
+ :param stop: Stop position
+ :param step: Step size
+ :param colour: colour of the player
+ :param right: Right position
+ :param skipped: List of pieces to skip
+ :return: dictionary of moves
+ """
+ moves = {}
+ last = []
+ for row in range(start, stop, step):
+ if right >= COLS:
+ break
+
+ mvs = self._traverse(row, right, skipped, moves, step, last, colour)
+ if mvs is None:
+ break
+ elif isinstance(mvs, list):
+ last = mvs
+ else:
+ moves.update(mvs)
+
+ right += 1
+ return moves
+
+ def _traverse(self, row: int, col: int, skipped: list, moves: dict, step: int, last: list, colour: int) -> list or None:
+ """
+ Traverses the board
+ :param row: Row to traverse
+ :param col: Column to traverse
+ :param skipped: List of pieces to jump
+ :param moves: Dictionary of moves
+ :param step: Step size
+ :param last: List of last pieces
+ :param colour: Colour of the player
+ :return: list of last pieces or None
+ """
+ current = self.board[row][col]
+ if current == 0:
+ if skipped and not last:
+ return None
+ elif skipped:
+ moves[(row, col)] = last + skipped
+ else:
+ moves[(row, col)] = last
+
+ if last:
+ if step == -1:
+ rowCalc = max(row - 3, 0)
+ else:
+ rowCalc = min(row + 3, ROWS)
+ moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
+ moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
+ return None
+ elif current.colour == colour:
+ return None
+ else:
+ last = [current]
+ return last
+
+ def step(self, move: int, colour: int) -> None:
+ """
+ Takes a move and executes it
+ :param move: The move to execute
+ :param colour: The colour of the player
+ :return: None
+ """
+ start, end = self._decode(move)
+ start[0] = start[0] - 1
+ start[1] = start[1] - 1
+ end[0] = end[0] - 1
+ end[1] = end[1] - 1
+ reward = 0
+ done = False
+ piece = self.getPiece(start[0], start[1])
+ if piece == 0:
+ newStart = end
+ end = start
+ start = newStart
+ piece = self.getPiece(start[0], start[1])
+ moves = self.getValidMoves(piece)
+ for move, skip in moves.items():
+ if tuple(end) == move:
+ self._simulateMove(piece, move, self, skip)
+ if len(skip) == 1:
+ reward = 2
+ break
+ if len(skip) > 1:
+ reward = 3 + len(skip) * 0.2
+ break
+ reward = -0.5
+ break
+
+ if self.winner() == colour:
+ done = True
+ reward = 10
+ return reward, self, done
+
+
+ def _decode(self, move: int) -> tuple:
+ """
+ Decodes the move from a integer to a start and end tuple
+ :param move: The move to decode
+ :return: Start and end tuple
+ """
+ # Split digits back out
+ str_code = str(move)
+ # print(str_code)
+ start_row = int(str_code[0])
+ start_col = int(str_code[1])
+ end_row = int(str_code[2])
+ end_col = int(str_code[3])
+ # Reconstruct positions
+ start = [start_row, start_col]
+ end = [end_row, end_col]
+ return start, end
+
+ # def reset(self):
+ # self.board = []
+ # self.whiteLeft = self.greenLeft = 12
+ # self.whiteKings = self.greenKings = 0
+ # self._createBoard()
+ # return self.board
diff --git a/utilities/board.py b/utilities/board.py
deleted file mode 100644
index a1bf435..0000000
--- a/utilities/board.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import pygame
-
-from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
-from .piece import Piece
-
-
-class Board:
- def __init__(self):
- self.board = []
- self.greenLeft = self.whiteLeft = 12
- self.greenKings = self.whiteKings = 0
- self.createBoard()
-
- def drawSquares(self, win):
- win.fill(BLACK)
- for row in range(ROWS):
- for col in range(row % 2, ROWS, 2):
- pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
-
- def createBoard(self):
- for row in range(ROWS):
- self.board.append([])
- for col in range(COLS):
- if col % 2 == ((row + 1) % 2):
- if row < 3:
- self.board[row].append(Piece(row, col, WHITE))
- elif row > 4:
- self.board[row].append(Piece(row, col, GREEN))
- else:
- self.board[row].append(None)
- else:
- self.board[row].append(None)
-
- def draw(self, win):
- self.drawSquares(win)
- for row in range(ROWS):
- for col in range(COLS):
- piece = self.board[row][col]
- if piece is not None:
- piece.draw(win)
-
- def move(self, piece, row, col):
- self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
- piece.move(row, col)
-
- if row == ROWS - 1 or row == 0:
- piece.makeKing()
- if piece.colour == WHITE:
- self.whiteKings += 1
- else:
- self.greenKings += 1
-
- def remove(self, skipped):
- for piece in skipped:
- self.board[piece.row][piece.col] = None
- if piece is not None:
- if piece.colour == GREEN:
- self.greenLeft -= 1
- else:
- self.whiteLeft -= 1
-
- def getPiece(self, row, col):
- return self.board[row][col]
-
- def winner(self):
- if self.greenLeft <= 0:
- return WHITE
- elif self.whiteLeft <= 0:
- return GREEN
-
- return None
-
- def getValidMoves(self, piece):
- moves = {}
- forcedCapture = {}
- left = piece.col - 1
- right = piece.col + 1
- row = piece.row
- if piece.colour == GREEN:
- moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
- moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
- if piece.colour == WHITE:
- moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
- moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
-
- if piece.king:
- moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
- moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
- moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
- moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
-
- if len(moves.values()) <= 1:
- return moves
-
- movesValues = list(moves.values())
- movesKeys = list(moves.keys())
-
- forced = {}
-
- for i in range(len(movesKeys)):
- if not movesValues[i]:
- forced[movesKeys[i]] = moves[movesKeys[i]]
- if len(forced) != len(moves):
- forced.clear()
- for i in range(len(movesKeys)):
- if movesValues[i]:
- forced[movesKeys[i]] = moves[movesKeys[i]]
- if len(forced) != len(moves):
- for i in range(len(movesKeys)):
- if movesValues[i]:
- forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
- else:
- forcedCapture = forced
- else:
- forcedCapture = forced
- return forcedCapture
-
- def scoreOfTheBoard(self):
- return self.whiteLeft - self.greenLeft
-
- def getAllPieces(self, colour):
- pieces = []
- for row in self.board:
- for piece in row:
- if piece is not None and piece.colour == colour:
- pieces.append(piece)
- return pieces
-
- def _traverseLeft(self, start, stop, step, colour, left, skipped=[]):
- moves = {}
- last = []
- for row in range(start, stop, step):
- if left < 0:
- break
- mvs = self._traverse(row, left, skipped, moves, step, last, colour)
- if mvs is None:
- break
- elif isinstance(mvs, list):
- last = mvs
- else:
- moves.update(mvs)
- left -= 1
- return moves
-
- def _traverseRight(self, start, stop, step, colour, right, skipped=[]):
- moves = {}
- last = []
- for row in range(start, stop, step):
- if right >= COLS:
- break
-
- mvs = self._traverse(row, right, skipped, moves, step, last, colour)
- if mvs is None:
- break
- elif isinstance(mvs, list):
- last = mvs
- else:
- moves.update(mvs)
-
- right += 1
- return moves
-
- def _traverse(self, row, col, skipped, moves, step, last, colour):
- current = self.board[row][col]
- if current is None:
- if skipped and not last:
- return None
- elif skipped:
- moves[(row, col)] = last + skipped
- else:
- moves[(row, col)] = last
-
- if last:
- if step == -1:
- rowCalc = max(row - 3, 0)
- else:
- rowCalc = min(row + 3, ROWS)
- moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
- moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
- return None
- elif current.colour == colour:
- return None
- else:
- last = [current]
- return last
diff --git a/utilities/constants.py b/utilities/constants.py
index 526c64e..be0f817 100644
--- a/utilities/constants.py
+++ b/utilities/constants.py
@@ -4,10 +4,10 @@ WIDTH, HEIGHT = 800, 800
ROWS, COLS = 8, 8
SQUARE_SIZE = WIDTH // COLS
-# RGB color
+# RGB colour
-GREEN = (144, 184, 59)
-WHITE = (255, 255, 255)
+GREEN = 1
+WHITE = 2
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)
GREY = (128, 128, 128)
diff --git a/utilities/gameManager.py b/utilities/gameManager.py
index 93db2a1..46289b7 100644
--- a/utilities/gameManager.py
+++ b/utilities/gameManager.py
@@ -1,42 +1,74 @@
+from __future__ import annotations
+
import pygame
-from utilities.board import Board
+from utilities.Board import Board
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
+
class GameManager:
- def __init__(self, win, colour):
+ def __init__(self, win: pygame.display, colour: int) -> None:
+ """
+ Constructor for the GameManager class
+ :param win: The window
+ :param colour: The colour of the player
+ """
self._init(colour)
self.win = win
- def _init(self, colour):
+ def _init(self, colour: int) -> None:
+ """
+ Initializes the game
+ :param colour: the colour of the player
+ """
self.selected = None
self.board = Board()
self.turn = colour
self.validMoves = {}
self.legCount = 0
- def update(self):
+ def update(self) -> None:
+ """
+ Updates the GUI
+ return: None
+ """
self.board.draw(self.win)
self.drawValidMoves(self.validMoves)
pygame.display.update()
- def reset(self):
+ def reset(self) -> None:
+ """
+ Resets the game
+ :return: None
+ """
self._init(self.turn)
- def select(self, row, col):
+ def select(self, row: int, col: int) -> bool:
+ """
+ Selects a piece
+ :param row: Row of the piece
+ :param col: Column of the piece
+ :return: True
+ """
if self.selected:
result = self._move(row, col)
if not result:
self.selected = None
self.select(row, col)
piece = self.board.getPiece(row, col)
- if piece is not None and piece.colour == self.turn:
+ if piece != 0 and piece.colour == self.turn:
self.selected = piece
self.validMoves = self.board.getValidMoves(piece)
return True
- def _move(self, row, col):
+ def _move(self, row: int, col: int) -> bool:
+ """
+ Moves a piece
+ :param row: Row of the piece
+ :param col: Column of the piece
+ :return: True if the move was successful, False otherwise
+ """
piece = self.board.getPiece(row, col)
- if self.selected and piece is None and (row, col) in self.validMoves:
+ if self.selected and piece == 0 and (row, col) in self.validMoves:
self.board.move(self.selected, row, col)
skipped = self.validMoves[row, col]
if self.validMoves[list(self.validMoves.keys())[0]]:
@@ -58,21 +90,39 @@ class GameManager:
self.validMoves = {}
if self.turn == GREEN:
self.turn = WHITE
- else:
- self.turn = GREEN
+ return
+ self.turn = GREEN
- def drawValidMoves(self, moves):
+ def drawValidMoves(self, moves: list) -> None:
+ """
+ Draws the valid moves
+ :param moves: list of valid moves
+ :return: None
+ """
for row, col in moves:
pygame.draw.circle(self.win, BLUE,
(col * SQUARE_SIZE + SQUARE_SIZE // 2, row * SQUARE_SIZE + SQUARE_SIZE // 2), 15)
- def winner(self):
+ def winner(self) -> int or None:
+ """
+ Gets the winner
+ :return: The winner
+ """
return self.board.winner()
- def getBoard(self):
+ def getBoard(self) -> Board:
+ """
+ Gets the board
+ :return: The board
+ """
return self.board
- def aiMove(self, board):
+ def aiMove(self, board: Board) -> None:
+ """
+ Makes a move for the AI
+ :param board: The new board
+ :return: None
+ """
if board is None:
# colour = "green" if self.turn == GREEN else "white"
# print("no move left for " + colour + " to make")
diff --git a/utilities/piece.py b/utilities/piece.py
index c808fd9..4d033d0 100644
--- a/utilities/piece.py
+++ b/utilities/piece.py
@@ -1,10 +1,16 @@
import pygame.draw
-from utilities.constants import SQUARE_SIZE, GREY, CROWN
+from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
class Piece:
- def __init__(self, row, col, colour):
+ def __init__(self, row: int, col: int, colour: int) -> None:
+ """
+ Initialises the piece class, which represents a piece on the board. Constructor for the piece class
+ :param row: Row of the piece
+ :param col: Column of the piece
+ :param colour: Colour of the piece
+ """
self.row = row
self.col = col
self.colour = colour
@@ -14,25 +20,50 @@ class Piece:
self.calcPosition()
self.padding = 20
self.border = 2
+ self.green = (144, 184, 59)
+ self.white = (255, 255, 255)
- def calcPosition(self):
+ def calcPosition(self) -> None:
+ """
+ Calculates the position of the piece
+ :return: None
+ """
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
self.y = SQUARE_SIZE * self.row + SQUARE_SIZE // 2
- def makeKing(self):
+ def makeKing(self) -> None:
+ """
+ Makes the piece a king
+ :return: None
+ """
self.king = True
- def draw(self, win):
+ def draw(self, win) -> None:
+ """
+ Draws the piece
+ :param win: The window to draw the piece on
+ :return: None
+ """
radius = SQUARE_SIZE // 2 - self.padding
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
- pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
+ pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
if self.king:
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))
- def move(self, row, col):
+ def move(self, row: int, col: int) -> None:
+ """
+ Moves the piece to a new position
+ :param row: Row to move to
+ :param col: Column to move to
+ :return: None
+ """
self.row = row
self.col = col
self.calcPosition()
- def __repr__(self):
+ def __repr__(self) -> str:
+ """
+ String representation of the piece
+ :return: String representation of the colour
+ """
return str(self.colour)
diff --git a/winners-3.txt b/winners-3.txt
new file mode 100644
index 0000000..c771e8b
--- /dev/null
+++ b/winners-3.txt
@@ -0,0 +1,100 @@
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+0
+0
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+1
+0
+2
+0
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
diff --git a/winners-5.txt b/winners-5.txt
new file mode 100644
index 0000000..94e3b68
--- /dev/null
+++ b/winners-5.txt
@@ -0,0 +1,100 @@
+2
+2
+1
+2
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+1
+2
+2
+0
+2
+2
+0
+2
+2
+0
+0
+2
+2
+2
+2
+2
+0
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+0
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+1
+0
+2
+2
+2
+2
+2
+2
+1
+2
+2
+2
+2
+0
+2
+0
+2
+2
+2
+2
+2
+1
+2
+2
+1
+2
+2
+2
+2
+2
+2
+2
+2