diff --git a/.idea/draughts.iml b/.idea/draughts.iml index 3c96bee..3858d78 100644 --- a/.idea/draughts.iml +++ b/.idea/draughts.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/Report.pdf b/Report.pdf deleted file mode 100644 index 3f309d9..0000000 Binary files a/Report.pdf and /dev/null differ diff --git a/main.py b/main.py index 40c2f98..a793849 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ import sys import pygame +import numpy as np from matplotlib import pyplot as plt from reinforcementLearning.ReinforcementLearning import ReinforcementLearning @@ -178,23 +179,38 @@ def game(difficulty): clock = pygame.time.Clock() gameManager = GameManager(WIN, GREEN) rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager) + model = rl.buildMainModel() + model.load_weights("./modelWeights/model_final.h5") mm = MiniMax() totalReward = [] - for i in range(2000): + winners = [] + for i in range(100): score = 0 for j in range(200): + print(j) clock.tick(FPS) reward = 0 if gameManager.turn == WHITE: - mm = MiniMax() - value, newBoard = mm.AI(difficulty, WHITE, gameManager) + # mm = MiniMax() + # value, newBoard = mm.AI(difficulty, WHITE, gameManager) # gameManager.aiMove(newBoard) # reward, newBoard = rl.AI(gameManager.board) - if newBoard is None: + actionSpace = rl.encodeMoves(WHITE, gameManager.board) + if len(actionSpace) == 0: print("Cannot make move") continue + totalMoves = len(actionSpace) + # moves = np.squeeze(moves) + moves = np.pad(actionSpace, (0, rl.maxSize - totalMoves), 'constant', constant_values=(1, 1)) + act_values = model.predict(rl.normalise(moves)) + val = np.argmax(act_values[0]) + val = val if val < totalMoves else totalMoves - 1 + reward, newBoard, done = gameManager.board.step(actionSpace[val], WHITE) + + # if newBoard is None: + # print("Cannot make move") + # continue gameManager.aiMove(newBoard) - # gameManager.update() pygame.display.update() @@ -206,7 +222,10 @@ def game(difficulty): score += reward if gameManager.winner() is not None: - print(gameManager.winner()) + print("Green" if gameManager.winner() == GREEN else "White", " wins") + with open("winners.txt", "a+") as f: + f.write(str(gameManager.winner()) + "\n") + winners.append(gameManager.winner()) break # for event in pygame.event.get(): @@ -221,9 +240,16 @@ def game(difficulty): gameManager.update() pygame.display.update() + if gameManager.winner() is None: + with open("winners.txt", "a+") as f: + f.write(str(0) + "\n") + winners.append(0) gameManager.reset() rl.resetScore() print("Game: ", i, " Reward: ", score) + with open("rewards.txt", "a+") as f: + f.write(str(score) + "\n") + totalReward.append(score) # save model weights every 25 games if i % 250 == 0 and i != 0: @@ -237,5 +263,12 @@ def game(difficulty): plt.ylabel("Reward") plt.show() + fig, ax = plt.subplots() + bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)]) + ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500]) + ax.set_title("Winners") + ax.bar_label(bar) + plt.show() + main(3) diff --git a/reinforcementLearning/ReinforcementLearning.py b/reinforcementLearning/ReinforcementLearning.py index 59c8661..1e8f57d 100644 --- a/reinforcementLearning/ReinforcementLearning.py +++ b/reinforcementLearning/ReinforcementLearning.py @@ -37,9 +37,10 @@ class ReinforcementLearning(): self.maxSize = 32 self.epsilonMin = .01 self.epsilonDecay = .995 - self.learningRate = 0.001 + self.learningRate = 0.0001 self.memory = deque(maxlen=10000000) - self.model = self._buildMainModel() + self.model = self.buildMainModel() + print(self.model.summary()) def AI(self, board: Board) -> tuple: """ @@ -48,7 +49,7 @@ class ReinforcementLearning(): """ self.board = board self.state = self._convertState(self.board.board) - self.actionSpace = self._encodeMoves(self.colour, self.board) + self.actionSpace = self.encodeMoves(self.colour, self.board) if len(self.actionSpace) == 0: return self.score, None @@ -61,7 +62,7 @@ class ReinforcementLearning(): return self.score, nextState - def _buildMainModel(self) -> Sequential: + def buildMainModel(self) -> Sequential: """ Build the model for the AI :return: the model @@ -69,26 +70,24 @@ class ReinforcementLearning(): # Board model modelLayers = [ Lambda(lambda x: tf.reshape(x, [-1, 32])), - Dense(256, activation='relu'), + Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)), Dropout(0.2), - Dense(128, activation='relu'), + Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)), Dropout(0.2), - Dense(64, activation='relu'), + Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)), + Dropout(0.2), + Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)), Dropout(0.2), Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)), Dropout(0.2), - Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)), - Dropout(0.2), - Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)) + Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01)) ] boardModel = Sequential(modelLayers) # boardModel.add(BatchNormalization()) - boardModel.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error') + boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error') boardModel.build(input_shape=(None, None)) - print(boardModel.summary()) - return boardModel def _replay(self) -> None: @@ -111,7 +110,7 @@ class ReinforcementLearning(): # Encoded moves encodedMoves = [] for state in states: - encodedMoves.append(self._encodeMoves(self.colour, state)) + encodedMoves.append(self.encodeMoves(self.colour, state)) # Calculate targets targets = [] @@ -126,7 +125,7 @@ class ReinforcementLearning(): encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1)) for m in encodedMoves]) targets = np.array(targets) - self.model.fit(self._normalise(encodedMoves), self._normalise(targets), epochs=20) + self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20) if self.epsilon > self.epsilonMin: self.epsilon *= self.epsilonDecay @@ -160,8 +159,10 @@ class ReinforcementLearning(): return self.actionSpace[0] encodedMoves = np.squeeze(self.actionSpace) encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)) - act_values = self.model.predict(self._normalise(encodedMoves)) - return self.actionSpace[np.argmax(act_values[0])] + act_values = self.model.predict(self.normalise(encodedMoves)) + val = np.argmax(act_values[0]) + val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1 + return self.actionSpace[val] def resetScore(self): self.score = 0 @@ -209,20 +210,14 @@ class ReinforcementLearning(): def _maxNextQ(self) -> float: colour = WHITE if self.colour == GREEN else GREEN - encodedMoves = self._encodeMoves(colour, self.board) + encodedMoves = self.encodeMoves(colour, self.board) if len(encodedMoves) == 0: return -1 paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))) - # paddedMoves = np.reshape(paddedMoves, (32, 8, 8)) - # paddedMoves = paddedMoves / np.max(paddedMoved - # paddedMoves = paddedMoves.reshape(32,) - # pm = tf.convert_to_tensor(paddedMoves, dtype=tf.float32) - # pm = tf.reshape(pm, [32]) - print(paddedMoves.shape) - nextQValues = self.model.predict_on_batch(self._normalise(paddedMoves)) + nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves)) return np.max(nextQValues) - def _encodeMoves(self, colour: int, board: Board) -> list: + def encodeMoves(self, colour: int, board: Board) -> list: """ Encodes the moves into a list encoded moves :param colour: colour of the player @@ -243,10 +238,8 @@ class ReinforcementLearning(): diff = np.nonzero(diff) return diff - def _normalise(self, data): + def normalise(self, data): """ Normalise the data """ - for i in range(len(data)): - data[i] = data[i] / np.linalg.norm(data[i]) - return data + return data / 10000 \ No newline at end of file diff --git a/results.py b/results.py new file mode 100644 index 0000000..3573d0a --- /dev/null +++ b/results.py @@ -0,0 +1,27 @@ +import matplotlib.pyplot as plt + +from utilities.constants import GREEN, WHITE + +# winners = [] +with open("winners.txt") as f: + winners = f.readlines() + +winners = [int(x.strip()) for x in winners] + + +fig, ax = plt.subplots() +bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)]) +ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500]) +ax.set_title("Winners") +ax.bar_label(bar) +plt.show() + + +with open("rewardsA.txt") as f: + totalReward = f.readlines() + + +plt.plot([i for i in range(len(totalReward))], totalReward) +plt.xlabel("Games") +plt.ylabel("Reward") +plt.show() \ No newline at end of file diff --git a/utilities/Board.py b/utilities/Board.py index b4387a3..d01954b 100644 --- a/utilities/Board.py +++ b/utilities/Board.py @@ -63,19 +63,44 @@ class Board: if piece != 0: if piece.colour == GREEN: self.greenLeft -= 1 - return + continue self.whiteLeft -= 1 def getAllMoves(self, colour): moves = [] + possibleMoves = [] + possiblePieces = [] + pieces = self.getAllPieces(colour) + hasForcedCapture = False - for piece in self.getAllPieces(colour): + for piece in pieces: validMoves = self.getValidMoves(piece) - for move, skip in validMoves.items(): - tempBoard = deepcopy(self) - tempPiece = tempBoard.getPiece(piece.row, piece.col) - newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) - moves.append(newBoard) + + # Check if there are forced capture moves for this piece + forcedCaptureMoves = [move for move, skip in validMoves.items() if skip] + if forcedCaptureMoves: + hasForcedCapture = True + possiblePieces.append(piece) + possibleMoves.append({move: skip for move, skip in validMoves.items() if skip}) + + if hasForcedCapture: + # If there are forced capture moves, consider only those + for i in range(len(possibleMoves)): + for move, skip in possibleMoves[i].items(): + tempBoard = deepcopy(self) + tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col) + newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) + moves.append(newBoard) + else: + # If no forced capture moves, consider all valid moves + for piece in pieces: + validMoves = self.getValidMoves(piece) + for move, skip in validMoves.items(): + tempBoard = deepcopy(self) + tempPiece = tempBoard.getPiece(piece.row, piece.col) + newBoard = self._simulateMove(tempPiece, move, tempBoard, skip) + moves.append(newBoard) + return moves def _simulateMove(self, piece, move, board, skip): @@ -134,6 +159,7 @@ class Board: forcedCapture = forced else: forcedCapture = forced + return forcedCapture def scoreOfTheBoard(self): @@ -241,7 +267,7 @@ class Board: def _decode(self, move): # Split digits back out str_code = str(move) - print(str_code) + # print(str_code) start_row = int(str_code[0]) start_col = int(str_code[1]) end_row = int(str_code[2])