Merge pull request 'reinforcement-learning' (#1) from reinforcement-learning into master

Reviewed-on: #1
This commit is contained in:
Rohit Pai 2023-09-28 23:59:04 +01:00
commit 7e9b0a475e
24 changed files with 2514 additions and 401 deletions

View File

@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" /> <excludeFolder url="file://$MODULE_DIR$/venv" />
</content> </content>
<orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
</project> </project>

6
.idea/other.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

Binary file not shown.

100
changeInRewards-3.txt Normal file
View File

@ -0,0 +1,100 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

100
changeInRewards-5.txt Normal file
View File

@ -0,0 +1,100 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

187
main.py
View File

@ -1,7 +1,10 @@
import sys import sys
import pygame import pygame
import numpy as np
from matplotlib import pyplot as plt
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
from utilities.gameManager import GameManager from utilities.gameManager import GameManager
from minimax.minimaxAlgo import MiniMax from minimax.minimaxAlgo import MiniMax
@ -11,21 +14,45 @@ WIN = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Draughts") pygame.display.set_caption("Draughts")
def getRowColFromMouse(pos): def getRowColFromMouse(pos: dict) -> tuple:
"""
Gets the row and column from the mouse position
:param pos: X and Y position of the mouse
:return: Row and column
"""
x, y = pos x, y = pos
row = y // SQUARE_SIZE row = y // SQUARE_SIZE
col = x // SQUARE_SIZE col = x // SQUARE_SIZE
return row, col return row, col
def drawText(text, font, color, surface, x, y): def drawText(text: str, font: pygame.font.SysFont, colour: tuple, surface: pygame.display, x: float, y: int) -> None:
textobj = font.render(text, 1, color) """
Draws text on the screen
:param text: Text to draw
:param font: System font
:param colour: Colour of the text
:param surface: The display surface
:param x: X position of the text
:param y: Y position of the text
:return None
"""
textobj = font.render(text, 1, colour)
textrect = textobj.get_rect() textrect = textobj.get_rect()
textrect.topleft = (x, y) textrect.topleft = (x, y)
surface.blit(textobj, textrect) surface.blit(textobj, textrect)
def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')): def drawMultiLineText(surface: pygame.display, text: str, pos: dict, font: pygame.font.SysFont, colour: tuple = pygame.Color('black')) -> None:
"""
Draws multiline text on the screen
:param surface: the display surface
:param text: text to draw
:param pos: X and Y position of the text
:param font: System font
:param colour: colour of the text
:return None
"""
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words. words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
space = font.size(' ')[0] # The width of a space. space = font.size(' ')[0] # The width of a space.
max_width, max_height = surface.get_size() max_width, max_height = surface.get_size()
@ -33,7 +60,7 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
word_height = None word_height = None
for line in words: for line in words:
for word in line: for word in line:
word_surface = font.render(word, 0, color) word_surface = font.render(word, 0, colour)
word_width, word_height = word_surface.get_size() word_width, word_height = word_surface.get_size()
if x + word_width >= max_width: if x + word_width >= max_width:
x = pos[0] # Reset the x. x = pos[0] # Reset the x.
@ -44,15 +71,20 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
y += word_height # Start on new row. y += word_height # Start on new row.
def main(): def main(difficulty: int = 0) -> None:
"""
Main function, that shows the menu before running the game
:param difficulty: difficulty of minimax
:return: None
"""
pygame.init() pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT)) screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock() menuClock = pygame.time.Clock()
click = False click = False
width = screen.get_width() width = screen.get_width()
font = pygame.font.SysFont(None, 25) font = pygame.font.SysFont("", 25)
difficulty = 0
if difficulty == 0:
while True: while True:
# menu # menu
screen.fill((128, 128, 128)) screen.fill((128, 128, 128))
@ -107,17 +139,21 @@ def main():
pygame.display.update() pygame.display.update()
menuClock.tick(60) menuClock.tick(60)
if difficulty != 0:
game(difficulty) game(difficulty)
def rulesGUI(): def rulesGUI() -> None:
"""
Shows the rules of the game
:return: None
"""
screen = pygame.display.set_mode((WIDTH, HEIGHT)) screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock() menuClock = pygame.time.Clock()
click = False click = False
width = screen.get_width() width = screen.get_width()
titleFont = pygame.font.SysFont(None, 48) titleFont = pygame.font.SysFont("", 48)
font = pygame.font.SysFont(None, 21) font = pygame.font.SysFont("", 21)
while True: while True:
screen.fill((128, 128, 128)) screen.fill((128, 128, 128))
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20) drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@ -171,43 +207,116 @@ multi-jump until the next move.""", (50, 50), font)
menuClock.tick(60) menuClock.tick(60)
def game(difficulty): def game(difficulty: int) -> None:
"""
Runs the game with the given difficulty. Used for training and testing the RL algorithm
:param difficulty: The difficulty of the minimax algorithm
"""
run = True run = True
clock = pygame.time.Clock() clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN) gameManager = GameManager(WIN, GREEN)
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
while run: # model = rl.buildMainModel()
rl.model.load_weights("./modelWeights/model_final.h5")
mm = MiniMax()
totalReward = []
winners = []
for i in range(50):
score = 0
for j in range(200):
print(j)
clock.tick(FPS) clock.tick(FPS)
reward = 0
if gameManager.turn == WHITE: if gameManager.turn == WHITE:
mm = MiniMax() # mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager) # value, newBoard = mm.AI(difficulty, WHITE, gameManager)
# gameManager.aiMove(newBoard)
# reward, newBoard = rl.AITrain(gameManager.board)
newBoard = rl.AITest(gameManager.board)
if newBoard is None:
print("Cannot make move")
continue
gameManager.aiMove(newBoard) gameManager.aiMove(newBoard)
# time.sleep(0.15)
if gameManager.turn == GREEN:
mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
# time.sleep(0.15)
if gameManager.winner() != None:
print(gameManager.winner())
run = False
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
if event.type == pygame.MOUSEBUTTONDOWN:
pos = pygame.mouse.get_pos()
row, col = getRowColFromMouse(pos)
# if gameManager.turn == GREEN:
gameManager.select(row, col)
gameManager.update() gameManager.update()
pygame.display.update() pygame.display.update()
if gameManager.turn == GREEN:
value, newBoard = mm.AI(difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
score += reward
if gameManager.winner() is not None:
print("Green" if gameManager.winner() == GREEN else "White", " wins")
# with open(f"winners-{difficulty}.txt", "a+") as f:
# f.write(str(gameManager.winner()) + "\n")
winners.append(gameManager.winner())
break
# for event in pygame.event.get():
# if event.type == pygame.QUIT:
# break
# if event.type == pygame.MOUSEBUTTONDOWN:
# pos = pygame.mouse.get_pos()
# row, col = getRowColFromMouse(pos)
# # if gameManager.turn == GREEN:
# gameManager.select(row, col)
gameManager.update()
pygame.display.update()
if gameManager.winner() is None:
# with open(f"winners-{difficulty}.txt", "a+") as f:
# f.write(str(0) + "\n")
winners.append(0)
gameManager.reset()
rl.resetScore()
print("Game: ", i, " Reward: ", score)
# with open(f"rewards-{difficulty}.txt", "a+") as f:
# f.write(str(score) + "\n")
totalReward.append(score)
# save model weights every 25 games
# if i % 250 == 0 and i != 0:
# rl.model.save("./modelWeights/model_" + str(i) + ".h5")
# pygame.quit() # pygame.quit()
# rl.model.save("./modelWeights/model_final.h5")
change_in_rewards = [0] # Initialize with 0 for the first episode
for i in range(1, len(totalReward)):
change_in_reward = totalReward[i] - totalReward[i - 1]
change_in_rewards.append(change_in_reward)
main() # with open(f"changeInRewards-{difficulty}.txt", "a+") as f:
# for i in change_in_rewards:
# f.write(str(i) + "\n")
# episodes = list(range(1, len(totalReward) + 1))
#
# plt.plot(episodes, change_in_rewards)
# plt.xlabel('Training Games')
# plt.ylabel('Change in Game Reward')
# plt.title('Change in Game Reward vs. Training Games')
# plt.grid(True)
# plt.show()
#
# plt.plot([i for i in range(len(totalReward))], totalReward)
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.show()
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
ax.set_title(f"Winners for difficulty — {difficulty}")
ax.bar_label(bar)
plt.show()
# difficulties = [3, 5, 7, 9]
#
# for diff in difficulties:
# main(diff)
main(3)

View File

@ -1,56 +1,46 @@
import random import random
from copy import deepcopy
from math import inf from math import inf
from utilities.constants import GREEN, WHITE from utilities.constants import GREEN, WHITE
from utilities.gameManager import GameManager
class MiniMax(): class MiniMax:
def AI(self, board, depth, maxPlayer, gameManager): def AI(self, depth: int, maxPlayer: int, gameManager: GameManager) -> tuple:
if depth == 0 or board.winner() is not None: """
return board.scoreOfTheBoard(), board The minimax algorithm
:param depth: How deep the algorithm should go
:param maxPlayer: The current player
:param gameManager: The game manager
:return: the best evaluation and board
"""
if depth == 0 or gameManager.board.winner() is not None:
return gameManager.board.scoreOfTheBoard(), gameManager.board
if maxPlayer: if type(maxPlayer) == int:
maxEval = -inf maxEval = -inf
bestMove = None bestMove = None
for move in self.getAllMoves(board, maxPlayer): for move in gameManager.board.getAllMoves(maxPlayer):
evaluation = self.AI(move, depth - 1, False, gameManager)[0] evaluation = self.AI(depth - 1, False, gameManager)[0]
maxEval = max(maxEval, evaluation) maxEval = max(maxEval, evaluation)
if maxEval > evaluation: if maxEval > evaluation:
bestMove = move bestMove = move
if maxEval == evaluation: if maxEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move bestMove = bestMove if random.choice([True, False]) else move
return maxEval, bestMove return maxEval, bestMove
else: else:
minEval = inf minEval = inf
bestMove = None bestMove = None
colour = WHITE if gameManager.turn == GREEN else GREEN colour = WHITE if gameManager.turn == GREEN else GREEN
for move in self.getAllMoves(board, colour): for move in gameManager.board.getAllMoves(colour):
evaluation = self.AI(move, depth - 1, True, gameManager)[0] evaluation = self.AI(depth - 1, True, gameManager)[0]
minEval = min(minEval, evaluation) minEval = min(minEval, evaluation)
if minEval < evaluation: if minEval < evaluation:
bestMove = move bestMove = move
if minEval == evaluation: if minEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move bestMove = bestMove if random.choice([True, False]) else move
return minEval, bestMove return minEval, bestMove
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getAllMoves(self, board, colour):
moves = []
for piece in board.getAllPieces(colour):
validMoves = board.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(board)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves

BIN
modelWeights/model_250.h5 Normal file

Binary file not shown.

BIN
modelWeights/model_final.h5 Normal file

Binary file not shown.

View File

@ -1,96 +1,280 @@
import random import random
from collections import deque from collections import deque
from typing import Any
from copy import deepcopy
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import Sequential, regularizers from keras.engine.input_layer import InputLayer
from tensorflow.python.keras.layers import Dense from keras.layers import BatchNormalization
from tensorflow.python.keras import Sequential, regularizers, Input
from tensorflow.python.keras.layers import Dense, Lambda, Dropout
from tensorflow.python.keras.optimizer_v2.adam import Adam
from minimax.minimaxAlgo import MiniMax
from utilities import Board
from utilities.constants import WHITE, GREEN
from utilities.gameManager import GameManager
class ReinforcementLearning(): class ReinforcementLearning():
def __init__(self, action_space, state_space, env): def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
self.action_space = action_space """
self.state_space = state_space Constructor for the ReinforcementLearning class
self.env = env :param actionSpace: The number of possible actions
:param board: The game board
"""
self.gameManager = gameManager
self.actionSpace = actionSpace
self.board = board
self.state = self.board.board
self.colour = colour
self.score = 0
self.epsilon = 1 self.epsilon = 1
self.gamma = .95 self.gamma = .95
self.batch_size = 64 self.batchSize = 512
self.epsilon_min = .01 self.maxSize = 32
self.epsilon_decay = .995 self.epsilonMin = .01
self.learning_rate = 0.001 self.epsilonDecay = .995
self.memory = deque(maxlen=100000) self.learningRate = 0.0001
self.model = self._buildModel() self.memory = deque(maxlen=10000000)
self.model = self.buildMainModel()
print(self.model.summary())
def AI(self, episode): def AITrain(self, board: Board) -> tuple:
loss = [] """
Learns to play the draughts game
:return: The loss
"""
self.board = board
self.state = self._convertState(self.board.board)
self.actionSpace = self.encodeMoves(self.colour, self.board)
if len(self.actionSpace) == 0:
return self.score, None
max_steps = 1000 action = self._act()
reward, nextState, done = self.board.step(action, self.colour)
self.score += reward
self.state = self._convertState(nextState.board)
self._remember(deepcopy(self.board), action, reward, self.state, done)
self._replay()
for e in range(episode): return self.score, nextState
state = self.env.reset()
state = np.reshape(state, (1, self.state_space))
score = 0
for i in range(max_steps):
action = self.act(state)
reward, next_state, done = self.env.step(action)
score += reward
next_state = np.reshape(next_state, (1, self.state_space))
self.remember(state, action, reward, next_state, done)
state = next_state
self.replay()
if done:
print("episode: {}/{}, score: {}".format(e, episode, score))
break
loss.append(score)
def _buildModel(self): def AITest(self, board: Board) -> Board:
"""
Runs the AI
:param board: The board
:return: The new board
"""
actionSpace = self.encodeMoves(WHITE, board)
if len(actionSpace) == 0:
print("Cannot make move")
return None
totalMoves = len(actionSpace)
# moves = np.squeeze(moves)
moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
act_values = self.model.predict(self.normalise(moves))
val = np.argmax(act_values[0])
val = val if val < totalMoves else totalMoves - 1
reward, newBoard, done = board.step(actionSpace[val], WHITE)
return newBoard
def buildMainModel(self) -> Sequential:
"""
Build the model for the AI
:return: The model
"""
# Board model # Board model
board_model = Sequential() modelLayers = [
Lambda(lambda x: tf.reshape(x, [-1, 32])),
Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
]
boardModel = Sequential(modelLayers)
# input dimensions is 32 board position values # boardModel.add(BatchNormalization())
board_model.add(Dense(64, activation='relu', input_dim=32)) boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
boardModel.build(input_shape=(None, None))
# use regularizers, to prevent fitting noisy labels return boardModel
board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
# output isn't squashed, because it might lose information def _replay(self) -> None:
board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))) """
board_model.compile(optimizer='nadam', loss='binary_crossentropy') trains the model
:return: None
return board_model """
if len(self.memory) < self.batchSize:
def remember(self, state, action, reward, next_state, done): # Not enough data to replay and test the model
self.memory.append((state, action, reward, next_state, done))
def replay(self):
if len(self.memory) < self.batch_size:
return return
minibatch = random.sample(self.memory, self.batch_size) # Get a random sample from the memory
states = np.array([i[0] for i in minibatch]) minibatch = random.sample(self.memory, int(self.maxSize))
actions = np.array([i[1] for i in minibatch])
rewards = np.array([i[2] for i in minibatch])
next_states = np.array([i[3] for i in minibatch])
dones = np.array([i[4] for i in minibatch])
states = np.squeeze(states) # Extract states, rewards, dones
next_states = np.squeeze(next_states) states = [m[0] for m in minibatch]
rewards = [m[2] for m in minibatch]
dones = [m[4] for m in minibatch]
targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones) # Encoded moves
targets_full = self.model.predict_on_batch(states) encodedMoves = []
for state in states:
encodedMoves.append(self.encodeMoves(self.colour, state))
ind = np.array([i for i in range(self.batch_size)]) # Calculate targets
targets_full[[ind], [actions]] = targets targets = []
for i, moves in enumerate(encodedMoves):
if dones[i]:
target = rewards[i]
else:
target = rewards[i] + self.gamma * self._maxNextQ()
self.model.fit(states, targets_full, epochs=1, verbose=0) targets.append(target)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def act(self, state): encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
for m in encodedMoves])
targets = np.array(targets)
self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
if self.epsilon > self.epsilonMin:
self.epsilon *= self.epsilonDecay
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
"""
Remembers what it has learnt
:param state: The current state
:param action: The action taken
:param reward: The reward for the action
:param nextState: The next state
:param done: Whether the game is finished
:return: None
"""
self.memory.append((state, action, reward, nextState, done))
def _act(self) -> Any:
"""
Chooses an action based on the available moves
:return: The action
"""
if np.random.rand() <= self.epsilon: if np.random.rand() <= self.epsilon:
return random.randrange(self.action_space) # choose a random action from the action spaces list
act_values = self.model.predict(state) mm = MiniMax()
return np.argmax(act_values[0]) value, newBoard = mm.AI(3, self.colour, self.gameManager)
if newBoard is None:
return random.choice(self.actionSpace)
where = self._boardDiff(self.board, newBoard)
return self._encode(where[0]+1, where[1]+1)
if len(self.actionSpace) == 1:
return self.actionSpace[0]
encodedMoves = np.squeeze(self.actionSpace)
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
actValues = self.model.predict(self.normalise(encodedMoves))
val = np.argmax(actValues[0])
val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
return self.actionSpace[val]
def resetScore(self) -> None:
"""
Resets the score
:return: None
"""
self.score = 0
def _convertState(self, board: list) -> list:
"""
Converts the board into a 2D list of numbers
:param board: 2D list of pieces
:return: new 2D list of numbers
"""
num_board = []
for row in board:
num_row = []
for piece in row:
if piece == 0:
num_row.append(0)
continue
if piece.colour == 1:
num_row.append(1)
continue
num_row.append(2)
num_board.append(num_row)
return num_board
def _encode(self, start: tuple, end: tuple) -> int:
"""
Encodes the move into an integer
:param start: Tuple of start position
:param end: Tuple of end position
:return: Encoded move
"""
start_row = start[0]
start_col = end[0]
end_row = start[-1]
end_col = end[-1]
# Concatenate into integer
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
def _maxNextQ(self) -> float:
"""
Calculates the max Q value for the next state
:return: the max Q value
"""
colour = WHITE if self.colour == GREEN else GREEN
encodedMoves = self.encodeMoves(colour, self.board)
if len(encodedMoves) == 0:
return -1
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
return np.max(nextQValues)
def encodeMoves(self, colour: int, board: Board) -> list:
"""
Encodes the moves into a list encoded moves
:param colour: Colour of the player
:param board: The board
:return: list Of encoded moves
"""
encodedMoves = []
moves = board.getAllMoves(colour)
for move in moves:
where = self._boardDiff(board, move)
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
return encodedMoves
def _boardDiff(self, board: Board, move: Board) -> np.array:
"""
Finds the difference between the two boards
:param board: The current board
:param move: The new board
:return: the difference between the two boards
"""
cnvState = np.array(self._convertState(board.board))
cnvMove = np.array(self._convertState(move.board))
diff = np.subtract(cnvMove, cnvState)
diff = np.nonzero(diff)
return diff
def normalise(self, data: np.array) -> np.array:
"""
Normalise the data
:param data: the data to normalise
:return: normalised data
"""
return data / 10000

80
results.py Normal file
View File

@ -0,0 +1,80 @@
import matplotlib.pyplot as plt
import numpy as np
from utilities.constants import GREEN, WHITE
# winners = []
with open("winners-5.txt", "r") as f:
winners = f.readlines()
winners = [int(x.strip()) for x in winners]
# lavg = []
# for i in range(0, len(winners), 25):
# lavg.append(winners[i:i+25].count(2) / 25)
#
# x = np.arange(0, len(lavg))
# y = np.array(lavg) * 100
#
# a, b = np.polyfit(x, y, 1)
#
# fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(y)
# ax.set_xticks(np.arange(0, len(lavg), 2))
# ax.minorticks_on()
# ax.plot(x, a*x+b, color='red', linestyle='--', linewidth=2)
# ax.set_ylim([0, 100])
# ax.set_title("Winners Average")
# ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
# ax.grid(which='minor', linestyle=':', linewidth='0.5')
# ax.set_xlabel("Average Set")
# ax.set_ylabel("Percentage of Wins")
# ax.tick_params(which="minor", bottom=False, left=False)
# plt.show()
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 100])
ax.set_title("Winners at Depth 5")
ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey', axis='y')
ax.bar_label(bar)
plt.show()
# with open("trainedRewards.txt", "r") as f:
# totalReward = f.readlines()
#
# totalReward = [float(x.strip()) for x in totalReward]
# filteredReward = list(filter(lambda x: x > -1500, totalReward))
# change_in_rewards = [0] # Initialize with 0 for the first episode
# for i in range(1, len(totalReward)):
# change_in_reward = totalReward[i] - totalReward[i - 1]
# change_in_rewards.append(change_in_reward)
#
# games = list(range(1, len(totalReward) + 1))
# plt.plot(games, change_in_rewards)
# plt.xlabel('Training Games')
# plt.ylabel('Change in Game Reward')
# plt.title('Change in Game Reward vs. Training Games')
# plt.grid(True)
# plt.show()
# major_ticks = np.arange(0, 101, 20)
# minor_ticks = np.arange(0, 101, 5)
#
# plt.plot([i for i in range(len(totalReward))], totalReward)
# plt.title("Rewards to Games")
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.xticks(major_ticks)
# plt.xticks(minor_ticks, minor=True)
# plt.yticks(major_ticks)
# plt.yticks(minor_ticks, minor=True)
# plt.grid(which='both')
# plt.show()
#
# plt.plot([i for i in range(len(filteredReward))], filteredReward)
# plt.title("Filtered Rewards to Games")
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.grid(which='both')
# plt.show()

56
rewards-5.txt Normal file
View File

@ -0,0 +1,56 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

BIN
rewardsA.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

2
run.sh Executable file
View File

@ -0,0 +1,2 @@
conda activate
python main.py

500
trainedRewards.txt Normal file
View File

@ -0,0 +1,500 @@
180.5
115.19999999999999
-155.39999999999998
-5169.4000000000015
100.0
-3354.2999999999956
123.79999999999998
-1738.0
261.40000000000015
120.89999999999999
147.80000000000004
108.0
113.50000000000001
110.5000000000002
-1048.3000000000006
75.8
232.70000000000016
89.10000000000001
279.9000000000002
165.40000000000003
85.4
34.20000000000016
266.20000000000016
101.69999999999999
283.0
-264.5
225.0
328.0
215.5
150.0
-217.5
-2920.0
82.5
-208.5
150.5
196.5
223.0
265.5
-282.5
175.5
206.5
221.5
127.5
-6337.5
147.5
231.5
137.5
-180.5
108.0
-339.5
190.0
-69.0
52.5
58.0
-5575.0
-159.5
197.5
177.5
-5547.5
-65.5
136.5
292.5
-169.5
185.0
115.5
198.0
30.0
162.5
95.5
170.0
113.0
-1405.0
-27.0
-4832.199999999999
147.5
228.0
59.0
262.5
-220.0
150.5
177.5
140.0
123.0
119.0
137.5
134.0
175.5
-5598.5
46.5
135.0
205.0
186.5
177.5
120.1
332.5
162.5
122.5
262.5
-70.0
159.0
138.0
240.5
215.0
147.5
-118.0
260.5
199.0
130.0
265.0
142.5
230.0
135.0
197.5
-179.5
198.0
288.0
200.5
-222.5
165.5
139.0
228.0
211.5
197.5
102.5
233.0
95.5
-129.0
187.5
158.0
295.0
240.5
-222.5
-1841.5
198.0
113.0
305.0
-482.5
125.5
215.0
110.0
-180.0
170.0
-62.5
215.5
132.5
187.5
135.0
-65.0
138.0
-1972.0
240.5
-237.5
610.0
267.5
52.5
-211.5
217.5
88.0
305.5
165.5
115.0
182.5
-69.5
333.0
363.0
112.5
-15.5
150.5
118.0
-52.5
318.0
174.0
198.0
-5705.0
160.5
155.0
125.0
165.0
259.0
165.5
155.0
-236.0
220.5
-15.5
117.5
367.5
237.5
255.0
85.0
-5342.5
141.5
-3582.5
-600.0
915.5
179.0
190.0
-47.5
275.5
-5.0
195.0
128.0
146.5
750.5
153.0
-5157.5
-279.5
219.0
154.0
153.0
-234.5
248.0
182.5
122.5
155.5
1078.0
102.5
358.0
152.5
261.5
239.0
128.0
111.5
93.0
310.5
-87.0
158.0
113.0
165.5
120.0
256.5
90.5
245.0
159.0
160.0
-5272.0
-88.5
159.0
169.0
147.5
-1149.5
-372.0
-270.0
95.0
142.5
212.5
154.0
425.0
153.0
213.0
280.5
-80.5
-45.90000000000003
-2250.5
123.50000000000003
149.40000000000006
219.0
108.0
180.0
271.19999999999993
202.5
121.8000000000001
47.599999999999966
-35.0
281.5
307.5
99.80000000000001
154.0
166.30000000000004
271.5
205.5
145.5
265.0
113.0
144.0
88.0
-204.5
204.0
215.0
177.5
168.0
263.0
66.5
258.0
-5477.5
94.5
-139.0
190.5
160.0
-35.5
149.0
100.5
130.0
-40.0
175.0
132.5
107.5
143.0
-5097.5
97.5
-1880.0
-15.0
213.0
-601.0
282.5
276.5
113.0
106.5
-1011.5
128.0
150.0
145.5
233.0
209.0
136.5
240.0
7.5
-1535.0
238.0
185.0
157.5
-1660.0
-15.5
-145.0
178.0
-4997.5
182.5
197.5
355.5
130.0
232.5
-5420.0
190.0
128.0
115.0
2.5
149.0
220.0
-87.0
-447.5
-4122.5
-67.5
-425.0
283.0
925.0
49.5
-15.0
233.0
215.5
234.0
154.0
141.5
226.5
220.0
110.5
270.0
253.0
-1944.0
215.0
250.5
155.0
260.5
185.0
261.5
232.5
177.5
-97.5
-196.0
230.0
205.5
-367.0
265.5
180.0
135.5
139.0
103.0
314.0
192.5
179.0
97.5
52.5
135.0
184.0
-305.0
147.5
206.5
157.5
243.0
-6125.0
257.5
125.60000000000002
190.0
-6225.0
96.5
350.0
193.0
185.5
206.5
223.0
-225.0
117.5
170.0
223.0
175.5
210.0
-222.0
148.60000000000002
-133.0
-193.5
152.5
-152.0
-6245.0
-2.0
50.5
-140.5
185.5
125.5
208.0
-200.0
202.5
112.5
119.0
210.5
-1199.5
-2.5
102.5
0.5
275.5
135.5
-32.5
235.5
-617.0
110.0
222.5
-372.0
-53.0
306.5
117.5
-5095.0
223.0
-257.0
-5760.0
11.5
182.5
160.0
325.5
151.5
-327.5
-5655.0
62.5
-5550.0
195.0
92.5
-5290.0
215.0
59.0
179.0
188.0
-2103.5
253.0
118.0
335.0
85.0
207.5
229.0
152.5
-188.5
-177.5
274.0
220.0
-5169.5
128.0
-1260.0
140.0
147.5
140.0
-505.0
155.0
225.5
188.0
131.5
1111.5
180.0
-4897.5
-687.5
125.0
180.0
111.5
-5582.5
232.5
153.0
-130.5
102.5
189.0
157.5
-5685.0
325.0
-6870.0
-520.5
-3027.0
32.5

500
trainedWinners.txt Normal file
View File

@ -0,0 +1,500 @@
2
0
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
2
2
1
2
2
2
1
2
2
0
2
2
2
0
1
2
2
2
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
2
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
1
1
2
2
2
2
2
2
2
2
2
2
1
2
2
1
2
2
2
0
2
2
2
2
2
2
2
2
2
1
2
2
2
2
2
0
2
2
2
0
2
2
1
2
2
2
2
2
0
2
0
2
2
2
2
2
2
2
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
22
2
2
1
2
1
2
2
2
2
2
2
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
1
1
2
2
1
2
2
2
1
2
2
2
2
0
2
2
2
2
1
2
2
2
2
1
2
2
2
2
2
2
2
2
1
2
2
2
1
1
2
2
0
2
2
2
2
2
0
2
2
2
1
2
2
1
2
2
2
1
2
0
1
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
2
2
0
2
2
2
2
2
2
2
2
3
2
2
2
2
2
1
2
2
2
0
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
0
2
2
0
2
2
2
2
2
2
0
2
0
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
0
2
2
2
2
0
2
2
1
2
0
2
0
2
0
1
2
2

390
utilities/Board.py Normal file
View File

@ -0,0 +1,390 @@
from __future__ import annotations
import pygame
from copy import deepcopy
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
from .piece import Piece
class Board:
def __init__(self) -> None:
"""
Constructor for the Board class
:return: None
"""
self.board = []
self.greenLeft = self.whiteLeft = 12
self.greenKings = self.whiteKings = 0
self.green = (144, 184, 59)
self._createBoard()
def _drawSquares(self, win: pygame.display) -> None:
"""
Draws the squares on the board
:param win: The window
"""
win.fill(BLACK)
for row in range(ROWS):
for col in range(row % 2, ROWS, 2):
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
def _createBoard(self) -> None:
"""
Creates a board representation of the game
:return: None
"""
for row in range(ROWS):
self.board.append([])
for col in range(COLS):
if col % 2 == ((row + 1) % 2):
if row < 3:
self.board[row].append(Piece(row, col, WHITE))
continue
if row > 4:
self.board[row].append(Piece(row, col, GREEN))
continue
self.board[row].append(0)
continue
self.board[row].append(0)
def draw(self, win: pygame.display) -> None:
"""
Draws the pieces on the board
:param win: The window
:return: None
"""
self._drawSquares(win)
for row in range(ROWS):
for col in range(COLS):
piece = self.board[row][col]
if piece != 0:
piece.draw(win)
def move(self, piece: Piece, row: int, col: int) -> None:
"""
Moves a piece and make it a king if it reaches the end of the board
:param piece: Piece to move
:param row: Row to move to
:param col: Column to move to
:return: None
"""
self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
piece.move(row, col)
if row == ROWS - 1 or row == 0:
piece.makeKing()
if piece.colour == WHITE:
self.whiteKings += 1
if piece.colour == GREEN:
self.greenKings += 1
def remove(self, skipped: tuple) -> None:
"""
Removes a piece from the board
:param skipped: A tuple of the piece to remove
"""
for piece in skipped:
self.board[piece.row][piece.col] = 0
if piece != 0:
if piece.colour == GREEN:
self.greenLeft -= 1
continue
self.whiteLeft -= 1
def getAllMoves(self, colour: int) -> list:
"""
Gets all the possible moves for a player
:param colour: colour of the player
:return:
"""
moves = []
possibleMoves = []
possiblePieces = []
pieces = self.getAllPieces(colour)
hasForcedCapture = False
for piece in pieces:
validMoves = self.getValidMoves(piece)
# Check if there are forced capture moves for this piece
forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
if forcedCaptureMoves:
hasForcedCapture = True
possiblePieces.append(piece)
possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
if hasForcedCapture:
# If there are forced capture moves, consider only those
for i in range(len(possibleMoves)):
for move, skip in possibleMoves[i].items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
else:
# If no forced capture moves, consider all valid moves
for piece in pieces:
validMoves = self.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves
def _simulateMove(self, piece: Piece, move: list, board: Board, skip: tuple) -> Board:
"""
Simulates a move on the board
:param piece: Piece to move
:param move: Move to make
:param board: Board to make the move on
:param skip: Tuple of pieces to skip
:return: Board after the move
"""
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getPiece(self, row: int, col: int) -> Piece:
"""
Gets a piece from the board
:param row: Row of the piece
:param col: Column of the piece
:return: Piece
"""
return self.board[row][col]
def winner(self):
if self.greenLeft <= 0:
return WHITE
if self.whiteLeft <= 0:
return GREEN
return None
def getValidMoves(self, piece: Piece) -> dict:
"""
Gets all the valid moves for a piece
:param piece: Piece to get the moves for
:return: dictionary of moves
"""
moves = {}
forcedCapture = {}
left = piece.col - 1
right = piece.col + 1
row = piece.row
if piece.colour == GREEN or piece.king:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE or piece.king:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if len(moves.values()) <= 1:
return moves
movesValues = list(moves.values())
movesKeys = list(moves.keys())
forced = {}
for i in range(len(movesKeys)):
if not movesValues[i]:
forced[movesKeys[i]] = moves[movesKeys[i]]
if len(forced) != len(moves):
forced.clear()
for i in range(len(movesKeys)):
if movesValues[i]:
forced[movesKeys[i]] = moves[movesKeys[i]]
if len(forced) != len(moves):
for i in range(len(movesKeys)):
if movesValues[i]:
forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
else:
forcedCapture = forced
else:
forcedCapture = forced
return forcedCapture
def scoreOfTheBoard(self) -> int:
"""
Calculates the score of the board
:return: score of the board
"""
return self.whiteLeft - self.greenLeft
def getAllPieces(self, colour):
"""
Gets all the pieces of a player
:param colour: Piece colour
:return: Pieces of the player
"""
pieces = []
for row in self.board:
for piece in row:
if piece != 0 and piece.colour == colour:
pieces.append(piece)
return pieces
def _traverseLeft(self, start: int, stop: int, step: int, colour: int, left: int, skipped: list = []) -> dict:
"""
Traverses the left side of the board
:param start: Start position
:param stop: Stop position
:param step: Step size
:param colour: colour of the player
:param left: Left position
:param skipped: List of pieces to skip
:return: dictionary of moves
"""
moves = {}
last = []
for row in range(start, stop, step):
if left < 0:
break
mvs = self._traverse(row, left, skipped, moves, step, last, colour)
if mvs is None:
break
elif isinstance(mvs, list):
last = mvs
else:
moves.update(mvs)
left -= 1
return moves
def _traverseRight(self, start: int, stop: int, step: int, colour: int, right: int, skipped: list = []) -> dict:
"""
Traverses the left side of the board
:param start: Start position
:param stop: Stop position
:param step: Step size
:param colour: colour of the player
:param right: Right position
:param skipped: List of pieces to skip
:return: dictionary of moves
"""
moves = {}
last = []
for row in range(start, stop, step):
if right >= COLS:
break
mvs = self._traverse(row, right, skipped, moves, step, last, colour)
if mvs is None:
break
elif isinstance(mvs, list):
last = mvs
else:
moves.update(mvs)
right += 1
return moves
def _traverse(self, row: int, col: int, skipped: list, moves: dict, step: int, last: list, colour: int) -> list or None:
"""
Traverses the board
:param row: Row to traverse
:param col: Column to traverse
:param skipped: List of pieces to jump
:param moves: Dictionary of moves
:param step: Step size
:param last: List of last pieces
:param colour: Colour of the player
:return: list of last pieces or None
"""
current = self.board[row][col]
if current == 0:
if skipped and not last:
return None
elif skipped:
moves[(row, col)] = last + skipped
else:
moves[(row, col)] = last
if last:
if step == -1:
rowCalc = max(row - 3, 0)
else:
rowCalc = min(row + 3, ROWS)
moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
return None
elif current.colour == colour:
return None
else:
last = [current]
return last
def step(self, move: int, colour: int) -> None:
"""
Takes a move and executes it
:param move: The move to execute
:param colour: The colour of the player
:return: None
"""
start, end = self._decode(move)
start[0] = start[0] - 1
start[1] = start[1] - 1
end[0] = end[0] - 1
end[1] = end[1] - 1
reward = 0
done = False
piece = self.getPiece(start[0], start[1])
if piece == 0:
newStart = end
end = start
start = newStart
piece = self.getPiece(start[0], start[1])
moves = self.getValidMoves(piece)
for move, skip in moves.items():
if tuple(end) == move:
self._simulateMove(piece, move, self, skip)
if len(skip) == 1:
reward = 2
break
if len(skip) > 1:
reward = 3 + len(skip) * 0.2
break
reward = -0.5
break
if self.winner() == colour:
done = True
reward = 10
return reward, self, done
def _decode(self, move: int) -> tuple:
"""
Decodes the move from a integer to a start and end tuple
:param move: The move to decode
:return: Start and end tuple
"""
# Split digits back out
str_code = str(move)
# print(str_code)
start_row = int(str_code[0])
start_col = int(str_code[1])
end_row = int(str_code[2])
end_col = int(str_code[3])
# Reconstruct positions
start = [start_row, start_col]
end = [end_row, end_col]
return start, end
# def reset(self):
# self.board = []
# self.whiteLeft = self.greenLeft = 12
# self.whiteKings = self.greenKings = 0
# self._createBoard()
# return self.board

View File

@ -1,185 +0,0 @@
import pygame
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
from .piece import Piece
class Board:
def __init__(self):
self.board = []
self.greenLeft = self.whiteLeft = 12
self.greenKings = self.whiteKings = 0
self.createBoard()
def drawSquares(self, win):
win.fill(BLACK)
for row in range(ROWS):
for col in range(row % 2, ROWS, 2):
pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
def createBoard(self):
for row in range(ROWS):
self.board.append([])
for col in range(COLS):
if col % 2 == ((row + 1) % 2):
if row < 3:
self.board[row].append(Piece(row, col, WHITE))
elif row > 4:
self.board[row].append(Piece(row, col, GREEN))
else:
self.board[row].append(None)
else:
self.board[row].append(None)
def draw(self, win):
self.drawSquares(win)
for row in range(ROWS):
for col in range(COLS):
piece = self.board[row][col]
if piece is not None:
piece.draw(win)
def move(self, piece, row, col):
self.board[piece.row][piece.col], self.board[row][col] = self.board[row][col], self.board[piece.row][piece.col]
piece.move(row, col)
if row == ROWS - 1 or row == 0:
piece.makeKing()
if piece.colour == WHITE:
self.whiteKings += 1
else:
self.greenKings += 1
def remove(self, skipped):
for piece in skipped:
self.board[piece.row][piece.col] = None
if piece is not None:
if piece.colour == GREEN:
self.greenLeft -= 1
else:
self.whiteLeft -= 1
def getPiece(self, row, col):
return self.board[row][col]
def winner(self):
if self.greenLeft <= 0:
return WHITE
elif self.whiteLeft <= 0:
return GREEN
return None
def getValidMoves(self, piece):
moves = {}
forcedCapture = {}
left = piece.col - 1
right = piece.col + 1
row = piece.row
if piece.colour == GREEN:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if piece.king:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if len(moves.values()) <= 1:
return moves
movesValues = list(moves.values())
movesKeys = list(moves.keys())
forced = {}
for i in range(len(movesKeys)):
if not movesValues[i]:
forced[movesKeys[i]] = moves[movesKeys[i]]
if len(forced) != len(moves):
forced.clear()
for i in range(len(movesKeys)):
if movesValues[i]:
forced[movesKeys[i]] = moves[movesKeys[i]]
if len(forced) != len(moves):
for i in range(len(movesKeys)):
if movesValues[i]:
forcedCapture[movesKeys[i]] = moves[movesKeys[i]]
else:
forcedCapture = forced
else:
forcedCapture = forced
return forcedCapture
def scoreOfTheBoard(self):
return self.whiteLeft - self.greenLeft
def getAllPieces(self, colour):
pieces = []
for row in self.board:
for piece in row:
if piece is not None and piece.colour == colour:
pieces.append(piece)
return pieces
def _traverseLeft(self, start, stop, step, colour, left, skipped=[]):
moves = {}
last = []
for row in range(start, stop, step):
if left < 0:
break
mvs = self._traverse(row, left, skipped, moves, step, last, colour)
if mvs is None:
break
elif isinstance(mvs, list):
last = mvs
else:
moves.update(mvs)
left -= 1
return moves
def _traverseRight(self, start, stop, step, colour, right, skipped=[]):
moves = {}
last = []
for row in range(start, stop, step):
if right >= COLS:
break
mvs = self._traverse(row, right, skipped, moves, step, last, colour)
if mvs is None:
break
elif isinstance(mvs, list):
last = mvs
else:
moves.update(mvs)
right += 1
return moves
def _traverse(self, row, col, skipped, moves, step, last, colour):
current = self.board[row][col]
if current is None:
if skipped and not last:
return None
elif skipped:
moves[(row, col)] = last + skipped
else:
moves[(row, col)] = last
if last:
if step == -1:
rowCalc = max(row - 3, 0)
else:
rowCalc = min(row + 3, ROWS)
moves.update(self._traverseLeft(row + step, rowCalc, step, colour, col - 1, skipped=last))
moves.update(self._traverseRight(row + step, rowCalc, step, colour, col + 1, skipped=last))
return None
elif current.colour == colour:
return None
else:
last = [current]
return last

View File

@ -4,10 +4,10 @@ WIDTH, HEIGHT = 800, 800
ROWS, COLS = 8, 8 ROWS, COLS = 8, 8
SQUARE_SIZE = WIDTH // COLS SQUARE_SIZE = WIDTH // COLS
# RGB color # RGB colour
GREEN = (144, 184, 59) GREEN = 1
WHITE = (255, 255, 255) WHITE = 2
BLACK = (0, 0, 0) BLACK = (0, 0, 0)
BLUE = (0, 0, 255) BLUE = (0, 0, 255)
GREY = (128, 128, 128) GREY = (128, 128, 128)

View File

@ -1,42 +1,74 @@
from __future__ import annotations
import pygame import pygame
from utilities.board import Board from utilities.Board import Board
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
class GameManager: class GameManager:
def __init__(self, win, colour): def __init__(self, win: pygame.display, colour: int) -> None:
"""
Constructor for the GameManager class
:param win: The window
:param colour: The colour of the player
"""
self._init(colour) self._init(colour)
self.win = win self.win = win
def _init(self, colour): def _init(self, colour: int) -> None:
"""
Initializes the game
:param colour: the colour of the player
"""
self.selected = None self.selected = None
self.board = Board() self.board = Board()
self.turn = colour self.turn = colour
self.validMoves = {} self.validMoves = {}
self.legCount = 0 self.legCount = 0
def update(self): def update(self) -> None:
"""
Updates the GUI
return: None
"""
self.board.draw(self.win) self.board.draw(self.win)
self.drawValidMoves(self.validMoves) self.drawValidMoves(self.validMoves)
pygame.display.update() pygame.display.update()
def reset(self): def reset(self) -> None:
"""
Resets the game
:return: None
"""
self._init(self.turn) self._init(self.turn)
def select(self, row, col): def select(self, row: int, col: int) -> bool:
"""
Selects a piece
:param row: Row of the piece
:param col: Column of the piece
:return: True
"""
if self.selected: if self.selected:
result = self._move(row, col) result = self._move(row, col)
if not result: if not result:
self.selected = None self.selected = None
self.select(row, col) self.select(row, col)
piece = self.board.getPiece(row, col) piece = self.board.getPiece(row, col)
if piece is not None and piece.colour == self.turn: if piece != 0 and piece.colour == self.turn:
self.selected = piece self.selected = piece
self.validMoves = self.board.getValidMoves(piece) self.validMoves = self.board.getValidMoves(piece)
return True return True
def _move(self, row, col): def _move(self, row: int, col: int) -> bool:
"""
Moves a piece
:param row: Row of the piece
:param col: Column of the piece
:return: True if the move was successful, False otherwise
"""
piece = self.board.getPiece(row, col) piece = self.board.getPiece(row, col)
if self.selected and piece is None and (row, col) in self.validMoves: if self.selected and piece == 0 and (row, col) in self.validMoves:
self.board.move(self.selected, row, col) self.board.move(self.selected, row, col)
skipped = self.validMoves[row, col] skipped = self.validMoves[row, col]
if self.validMoves[list(self.validMoves.keys())[0]]: if self.validMoves[list(self.validMoves.keys())[0]]:
@ -58,21 +90,39 @@ class GameManager:
self.validMoves = {} self.validMoves = {}
if self.turn == GREEN: if self.turn == GREEN:
self.turn = WHITE self.turn = WHITE
else: return
self.turn = GREEN self.turn = GREEN
def drawValidMoves(self, moves): def drawValidMoves(self, moves: list) -> None:
"""
Draws the valid moves
:param moves: list of valid moves
:return: None
"""
for row, col in moves: for row, col in moves:
pygame.draw.circle(self.win, BLUE, pygame.draw.circle(self.win, BLUE,
(col * SQUARE_SIZE + SQUARE_SIZE // 2, row * SQUARE_SIZE + SQUARE_SIZE // 2), 15) (col * SQUARE_SIZE + SQUARE_SIZE // 2, row * SQUARE_SIZE + SQUARE_SIZE // 2), 15)
def winner(self): def winner(self) -> int or None:
"""
Gets the winner
:return: The winner
"""
return self.board.winner() return self.board.winner()
def getBoard(self): def getBoard(self) -> Board:
"""
Gets the board
:return: The board
"""
return self.board return self.board
def aiMove(self, board): def aiMove(self, board: Board) -> None:
"""
Makes a move for the AI
:param board: The new board
:return: None
"""
if board is None: if board is None:
# colour = "green" if self.turn == GREEN else "white" # colour = "green" if self.turn == GREEN else "white"
# print("no move left for " + colour + " to make") # print("no move left for " + colour + " to make")

View File

@ -1,10 +1,16 @@
import pygame.draw import pygame.draw
from utilities.constants import SQUARE_SIZE, GREY, CROWN from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
class Piece: class Piece:
def __init__(self, row, col, colour): def __init__(self, row: int, col: int, colour: int) -> None:
"""
Initialises the piece class, which represents a piece on the board. Constructor for the piece class
:param row: Row of the piece
:param col: Column of the piece
:param colour: Colour of the piece
"""
self.row = row self.row = row
self.col = col self.col = col
self.colour = colour self.colour = colour
@ -14,25 +20,50 @@ class Piece:
self.calcPosition() self.calcPosition()
self.padding = 20 self.padding = 20
self.border = 2 self.border = 2
self.green = (144, 184, 59)
self.white = (255, 255, 255)
def calcPosition(self): def calcPosition(self) -> None:
"""
Calculates the position of the piece
:return: None
"""
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2 self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
self.y = SQUARE_SIZE * self.row + SQUARE_SIZE // 2 self.y = SQUARE_SIZE * self.row + SQUARE_SIZE // 2
def makeKing(self): def makeKing(self) -> None:
"""
Makes the piece a king
:return: None
"""
self.king = True self.king = True
def draw(self, win): def draw(self, win) -> None:
"""
Draws the piece
:param win: The window to draw the piece on
:return: None
"""
radius = SQUARE_SIZE // 2 - self.padding radius = SQUARE_SIZE // 2 - self.padding
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border) pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
pygame.draw.circle(win, self.colour, (self.x, self.y), radius) pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
if self.king: if self.king:
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2)) win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))
def move(self, row, col): def move(self, row: int, col: int) -> None:
"""
Moves the piece to a new position
:param row: Row to move to
:param col: Column to move to
:return: None
"""
self.row = row self.row = row
self.col = col self.col = col
self.calcPosition() self.calcPosition()
def __repr__(self): def __repr__(self) -> str:
"""
String representation of the piece
:return: String representation of the colour
"""
return str(self.colour) return str(self.colour)

100
winners-3.txt Normal file
View File

@ -0,0 +1,100 @@
2
2
2
2
0
2
2
2
2
2
2
0
0
2
1
2
2
2
2
2
2
2
2
2
2
0
2
2
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2
1
0
2
2
2
2
2
2
2
2
2
1
2
1
0
2
0
0
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2

100
winners-5.txt Normal file
View File

@ -0,0 +1,100 @@
2
2
1
2
1
1
2
2
2
2
2
2
2
2
2
2
0
2
2
2
1
2
2
0
2
2
0
2
2
0
0
2
2
2
2
2
0
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
0
2
2
2
0
2
0
2
2
2
2
1
0
2
2
2
2
2
2
1
2
2
2
2
0
2
0
2
2
2
2
2
1
2
2
1
2
2
2
2
2
2
2
2