Compare commits

..

No commits in common. "6d4e364f8d9fef9cc6ab34bac15853ffe9535d49" and "1eb0a04f30fddc4bcccbe913bd4574729539b346" have entirely different histories.

12 changed files with 223 additions and 547 deletions

View File

@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" /> <excludeFolder url="file://$MODULE_DIR$/venv" />
</content> </content>
<orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" />
</project> </project>

BIN
Report.pdf Normal file

Binary file not shown.

115
main.py
View File

@ -1,10 +1,7 @@
import sys import sys
import pygame import pygame
import numpy as np
from matplotlib import pyplot as plt
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
from utilities.gameManager import GameManager from utilities.gameManager import GameManager
from minimax.minimaxAlgo import MiniMax from minimax.minimaxAlgo import MiniMax
@ -47,15 +44,15 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
y += word_height # Start on new row. y += word_height # Start on new row.
def main(difficulty=0): def main():
pygame.init() pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT)) screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock() menuClock = pygame.time.Clock()
click = False click = False
width = screen.get_width() width = screen.get_width()
font = pygame.font.SysFont("", 25) font = pygame.font.SysFont(None, 25)
difficulty = 0
if difficulty == 0:
while True: while True:
# menu # menu
screen.fill((128, 128, 128)) screen.fill((128, 128, 128))
@ -110,7 +107,7 @@ def main(difficulty=0):
pygame.display.update() pygame.display.update()
menuClock.tick(60) menuClock.tick(60)
if difficulty != 0:
game(difficulty) game(difficulty)
@ -119,8 +116,8 @@ def rulesGUI():
menuClock = pygame.time.Clock() menuClock = pygame.time.Clock()
click = False click = False
width = screen.get_width() width = screen.get_width()
titleFont = pygame.font.SysFont("", 48) titleFont = pygame.font.SysFont(None, 48)
font = pygame.font.SysFont("", 21) font = pygame.font.SysFont(None, 21)
while True: while True:
screen.fill((128, 128, 128)) screen.fill((128, 128, 128))
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20) drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@ -178,97 +175,39 @@ def game(difficulty):
run = True run = True
clock = pygame.time.Clock() clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN) gameManager = GameManager(WIN, GREEN)
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
model = rl.buildMainModel() while run:
model.load_weights("./modelWeights/model_final.h5")
mm = MiniMax()
totalReward = []
winners = []
for i in range(100):
score = 0
for j in range(200):
print(j)
clock.tick(FPS) clock.tick(FPS)
reward = 0
if gameManager.turn == WHITE: if gameManager.turn == WHITE:
# mm = MiniMax() mm = MiniMax()
# value, newBoard = mm.AI(difficulty, WHITE, gameManager) value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
# gameManager.aiMove(newBoard)
# reward, newBoard = rl.AI(gameManager.board)
actionSpace = rl.encodeMoves(WHITE, gameManager.board)
if len(actionSpace) == 0:
print("Cannot make move")
continue
totalMoves = len(actionSpace)
# moves = np.squeeze(moves)
moves = np.pad(actionSpace, (0, rl.maxSize - totalMoves), 'constant', constant_values=(1, 1))
act_values = model.predict(rl.normalise(moves))
val = np.argmax(act_values[0])
val = val if val < totalMoves else totalMoves - 1
reward, newBoard, done = gameManager.board.step(actionSpace[val], WHITE)
# if newBoard is None:
# print("Cannot make move")
# continue
gameManager.aiMove(newBoard) gameManager.aiMove(newBoard)
# time.sleep(0.15)
gameManager.update()
pygame.display.update()
if gameManager.turn == GREEN: if gameManager.turn == GREEN:
value, newBoard = mm.AI(difficulty, GREEN, gameManager) mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard) gameManager.aiMove(newBoard)
# time.sleep(0.15)
score += reward if gameManager.winner() != None:
print(gameManager.winner())
run = False
if gameManager.winner() is not None: for event in pygame.event.get():
print("Green" if gameManager.winner() == GREEN else "White", " wins") if event.type == pygame.QUIT:
with open("winners.txt", "a+") as f: run = False
f.write(str(gameManager.winner()) + "\n") if event.type == pygame.MOUSEBUTTONDOWN:
winners.append(gameManager.winner()) pos = pygame.mouse.get_pos()
break row, col = getRowColFromMouse(pos)
# if gameManager.turn == GREEN:
# for event in pygame.event.get(): gameManager.select(row, col)
# if event.type == pygame.QUIT:
# break
# if event.type == pygame.MOUSEBUTTONDOWN:
# pos = pygame.mouse.get_pos()
# row, col = getRowColFromMouse(pos)
# # if gameManager.turn == GREEN:
# gameManager.select(row, col)
gameManager.update() gameManager.update()
pygame.display.update() pygame.display.update()
if gameManager.winner() is None:
with open("winners.txt", "a+") as f:
f.write(str(0) + "\n")
winners.append(0)
gameManager.reset()
rl.resetScore()
print("Game: ", i, " Reward: ", score)
with open("rewards.txt", "a+") as f:
f.write(str(score) + "\n")
totalReward.append(score)
# save model weights every 25 games
if i % 250 == 0 and i != 0:
rl.model.save("./modelWeights/model_" + str(i) + ".h5")
# pygame.quit() # pygame.quit()
rl.model.save("./modelWeights/model_final.h5")
plt.plot([i for i in range(len(totalReward))], totalReward) main()
plt.xlabel("Games")
plt.ylabel("Reward")
plt.show()
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
ax.set_title("Winners")
ax.bar_label(bar)
plt.show()
main(3)

View File

@ -1,38 +1,56 @@
import random import random
from copy import deepcopy
from math import inf from math import inf
from utilities.constants import GREEN, WHITE from utilities.constants import GREEN, WHITE
class MiniMax: class MiniMax():
def AI(self, depth, maxPlayer, gameManager): def AI(self, board, depth, maxPlayer, gameManager):
if depth == 0 or gameManager.board.winner() is not None: if depth == 0 or board.winner() is not None:
return gameManager.board.scoreOfTheBoard(), gameManager.board return board.scoreOfTheBoard(), board
if type(maxPlayer) == int: if maxPlayer:
maxEval = -inf maxEval = -inf
bestMove = None bestMove = None
for move in gameManager.board.getAllMoves(maxPlayer): for move in self.getAllMoves(board, maxPlayer):
evaluation = self.AI(depth - 1, False, gameManager)[0] evaluation = self.AI(move, depth - 1, False, gameManager)[0]
maxEval = max(maxEval, evaluation) maxEval = max(maxEval, evaluation)
if maxEval > evaluation: if maxEval > evaluation:
bestMove = move bestMove = move
if maxEval == evaluation: if maxEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move bestMove = bestMove if random.choice([True, False]) else move
return maxEval, bestMove return maxEval, bestMove
else: else:
minEval = inf minEval = inf
bestMove = None bestMove = None
colour = WHITE if gameManager.turn == GREEN else GREEN colour = WHITE if gameManager.turn == GREEN else GREEN
for move in gameManager.board.getAllMoves(colour): for move in self.getAllMoves(board, colour):
evaluation = self.AI(depth - 1, True, gameManager)[0] evaluation = self.AI(move, depth - 1, True, gameManager)[0]
minEval = min(minEval, evaluation) minEval = min(minEval, evaluation)
if minEval < evaluation: if minEval < evaluation:
bestMove = move bestMove = move
if minEval == evaluation: if minEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move bestMove = bestMove if random.choice([True, False]) else move
return minEval, bestMove return minEval, bestMove
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getAllMoves(self, board, colour):
moves = []
for piece in board.getAllPieces(colour):
validMoves = board.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(board)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves

View File

@ -1,245 +1,96 @@
import random import random
from collections import deque from collections import deque
from typing import Any
from copy import deepcopy
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from keras.engine.input_layer import InputLayer from tensorflow.python.keras import Sequential, regularizers
from keras.layers import BatchNormalization from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential, regularizers, Input
from tensorflow.python.keras.layers import Dense, Lambda, Dropout
from tensorflow.python.keras.optimizer_v2.adam import Adam
from minimax.minimaxAlgo import MiniMax
from utilities import Board
from utilities.constants import WHITE, GREEN
from utilities.gameManager import GameManager
class ReinforcementLearning(): class ReinforcementLearning():
def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None: def __init__(self, action_space, state_space, env):
""" self.action_space = action_space
Constructor for the ReinforcementLearning class self.state_space = state_space
:param actionSpace: the number of possible actions self.env = env
:param board: the game board
"""
self.gameManager = gameManager
self.actionSpace = actionSpace
self.board = board
self.state = self.board.board
self.colour = colour
self.score = 0
self.epsilon = 1 self.epsilon = 1
self.gamma = .95 self.gamma = .95
self.batchSize = 256 self.batch_size = 64
self.maxSize = 32 self.epsilon_min = .01
self.epsilonMin = .01 self.epsilon_decay = .995
self.epsilonDecay = .995 self.learning_rate = 0.001
self.learningRate = 0.0001 self.memory = deque(maxlen=100000)
self.memory = deque(maxlen=10000000) self.model = self._buildModel()
self.model = self.buildMainModel()
print(self.model.summary())
def AI(self, board: Board) -> tuple: def AI(self, episode):
""" loss = []
Learns to play the draughts game
:return: the loss
"""
self.board = board
self.state = self._convertState(self.board.board)
self.actionSpace = self.encodeMoves(self.colour, self.board)
if len(self.actionSpace) == 0:
return self.score, None
action = self._act() max_steps = 1000
reward, nextState, done = self.board.step(action, self.colour)
self.score += reward
self.state = self._convertState(nextState.board)
self._remember(deepcopy(self.board), action, reward, self.state, done)
self._replay()
return self.score, nextState for e in range(episode):
state = self.env.reset()
state = np.reshape(state, (1, self.state_space))
score = 0
for i in range(max_steps):
action = self.act(state)
reward, next_state, done = self.env.step(action)
score += reward
next_state = np.reshape(next_state, (1, self.state_space))
self.remember(state, action, reward, next_state, done)
state = next_state
self.replay()
if done:
print("episode: {}/{}, score: {}".format(e, episode, score))
break
loss.append(score)
def buildMainModel(self) -> Sequential: def _buildModel(self):
"""
Build the model for the AI
:return: the model
"""
# Board model # Board model
modelLayers = [ board_model = Sequential()
Lambda(lambda x: tf.reshape(x, [-1, 32])),
Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
]
boardModel = Sequential(modelLayers)
# boardModel.add(BatchNormalization()) # input dimensions is 32 board position values
boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error') board_model.add(Dense(64, activation='relu', input_dim=32))
boardModel.build(input_shape=(None, None))
return boardModel # use regularizers, to prevent fitting noisy labels
board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
def _replay(self) -> None: # output isn't squashed, because it might lose information
""" board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
trains the model board_model.compile(optimizer='nadam', loss='binary_crossentropy')
:return: None (void)
""" return board_model
if len(self.memory) < self.batchSize:
# Not enough data to replay and test the model def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def replay(self):
if len(self.memory) < self.batch_size:
return return
# Get a random sample from the memory minibatch = random.sample(self.memory, self.batch_size)
minibatch = random.sample(self.memory, int(self.maxSize)) states = np.array([i[0] for i in minibatch])
actions = np.array([i[1] for i in minibatch])
rewards = np.array([i[2] for i in minibatch])
next_states = np.array([i[3] for i in minibatch])
dones = np.array([i[4] for i in minibatch])
# Extract states, rewards, dones states = np.squeeze(states)
states = [m[0] for m in minibatch] next_states = np.squeeze(next_states)
rewards = [m[2] for m in minibatch]
dones = [m[4] for m in minibatch]
# Encoded moves targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
encodedMoves = [] targets_full = self.model.predict_on_batch(states)
for state in states:
encodedMoves.append(self.encodeMoves(self.colour, state))
# Calculate targets ind = np.array([i for i in range(self.batch_size)])
targets = [] targets_full[[ind], [actions]] = targets
for i, moves in enumerate(encodedMoves):
if dones[i]:
target = rewards[i]
else:
target = rewards[i] + self.gamma * self._maxNextQ()
targets.append(target) self.model.fit(states, targets_full, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1)) def act(self, state):
for m in encodedMoves])
targets = np.array(targets)
self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
if self.epsilon > self.epsilonMin:
self.epsilon *= self.epsilonDecay
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
"""
Remembers what it has learnt
:param state: the current state
:param action: the action taken
:param reward: the reward for the action
:param nextState: the next state
:param done: whether the game is finished
:return: None (void)
"""
self.memory.append((state, action, reward, nextState, done))
def _act(self) -> Any:
"""
Chooses an action based on the available moves
:return: the action
"""
if np.random.rand() <= self.epsilon: if np.random.rand() <= self.epsilon:
# choose a random action from the action spaces list return random.randrange(self.action_space)
mm = MiniMax() act_values = self.model.predict(state)
value, newBoard = mm.AI(3, self.colour, self.gameManager) return np.argmax(act_values[0])
if newBoard is None:
return random.choice(self.actionSpace)
where = self._boardDiff(self.board, newBoard)
return self._encode(where[0]+1, where[1]+1)
if len(self.actionSpace) == 1:
return self.actionSpace[0]
encodedMoves = np.squeeze(self.actionSpace)
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
act_values = self.model.predict(self.normalise(encodedMoves))
val = np.argmax(act_values[0])
val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
return self.actionSpace[val]
def resetScore(self):
self.score = 0
def _convertState(self, board: list) -> list:
"""
Converts the board into a 2D list of numbers
:param board: 2D list of pieces
:return: new 2D list of numbers
"""
num_board = []
for row in board:
num_row = []
for piece in row:
if piece == 0:
num_row.append(0)
continue
if piece.colour == 1:
num_row.append(1)
continue
num_row.append(2)
num_board.append(num_row)
return num_board
def _encode(self, start: tuple, end: tuple) -> int:
"""
Encodes the move into an integer
:param start: tuple of start position
:param end: tuple of end position
:return: encoded move
"""
start_row = start[0]
start_col = end[0]
end_row = start[-1]
end_col = end[-1]
# Concatenate into integer
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
def _maxNextQ(self) -> float:
colour = WHITE if self.colour == GREEN else GREEN
encodedMoves = self.encodeMoves(colour, self.board)
if len(encodedMoves) == 0:
return -1
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
return np.max(nextQValues)
def encodeMoves(self, colour: int, board: Board) -> list:
"""
Encodes the moves into a list encoded moves
:param colour: colour of the player
:param board: the board
:return: list of encoded moves
"""
encodedMoves = []
moves = board.getAllMoves(colour)
for move in moves:
where = self._boardDiff(board, move)
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
return encodedMoves
def _boardDiff(self, board, move):
cnvState = np.array(self._convertState(board.board))
cnvMove = np.array(self._convertState(move.board))
diff = np.subtract(cnvMove, cnvState)
diff = np.nonzero(diff)
return diff
def normalise(self, data):
"""
Normalise the data
"""
return data / 10000

View File

@ -1,27 +0,0 @@
import matplotlib.pyplot as plt
from utilities.constants import GREEN, WHITE
# winners = []
with open("winners.txt") as f:
winners = f.readlines()
winners = [int(x.strip()) for x in winners]
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
ax.set_title("Winners")
ax.bar_label(bar)
plt.show()
with open("rewardsA.txt") as f:
totalReward = f.readlines()
plt.plot([i for i in range(len(totalReward))], totalReward)
plt.xlabel("Games")
plt.ylabel("Reward")
plt.show()

2
run.sh
View File

@ -1,2 +0,0 @@
conda activate
python main.py

View File

@ -1,5 +1,5 @@
import pygame import pygame
from copy import deepcopy
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
from .piece import Piece from .piece import Piece
@ -9,39 +9,34 @@ class Board:
self.board = [] self.board = []
self.greenLeft = self.whiteLeft = 12 self.greenLeft = self.whiteLeft = 12
self.greenKings = self.whiteKings = 0 self.greenKings = self.whiteKings = 0
self.green = (144, 184, 59) self.createBoard()
self._createBoard()
def _drawSquares(self, win): def drawSquares(self, win):
win.fill(BLACK) win.fill(BLACK)
for row in range(ROWS): for row in range(ROWS):
for col in range(row % 2, ROWS, 2): for col in range(row % 2, ROWS, 2):
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE)) pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
def _createBoard(self): def createBoard(self):
for row in range(ROWS): for row in range(ROWS):
self.board.append([]) self.board.append([])
for col in range(COLS): for col in range(COLS):
if col % 2 == ((row + 1) % 2): if col % 2 == ((row + 1) % 2):
if row < 3: if row < 3:
self.board[row].append(Piece(row, col, WHITE)) self.board[row].append(Piece(row, col, WHITE))
continue elif row > 4:
if row > 4:
self.board[row].append(Piece(row, col, GREEN)) self.board[row].append(Piece(row, col, GREEN))
continue else:
self.board[row].append(None)
self.board[row].append(0) else:
continue self.board[row].append(None)
self.board[row].append(0)
def draw(self, win): def draw(self, win):
self._drawSquares(win) self.drawSquares(win)
for row in range(ROWS): for row in range(ROWS):
for col in range(COLS): for col in range(COLS):
piece = self.board[row][col] piece = self.board[row][col]
if piece != 0: if piece is not None:
piece.draw(win) piece.draw(win)
def move(self, piece, row, col): def move(self, piece, row, col):
@ -50,65 +45,19 @@ class Board:
if row == ROWS - 1 or row == 0: if row == ROWS - 1 or row == 0:
piece.makeKing() piece.makeKing()
if piece.colour == WHITE: if piece.colour == WHITE:
self.whiteKings += 1 self.whiteKings += 1
else:
if piece.colour == GREEN:
self.greenKings += 1 self.greenKings += 1
def remove(self, skipped): def remove(self, skipped):
for piece in skipped: for piece in skipped:
self.board[piece.row][piece.col] = 0 self.board[piece.row][piece.col] = None
if piece != 0: if piece is not None:
if piece.colour == GREEN: if piece.colour == GREEN:
self.greenLeft -= 1 self.greenLeft -= 1
continue
self.whiteLeft -= 1
def getAllMoves(self, colour):
moves = []
possibleMoves = []
possiblePieces = []
pieces = self.getAllPieces(colour)
hasForcedCapture = False
for piece in pieces:
validMoves = self.getValidMoves(piece)
# Check if there are forced capture moves for this piece
forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
if forcedCaptureMoves:
hasForcedCapture = True
possiblePieces.append(piece)
possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
if hasForcedCapture:
# If there are forced capture moves, consider only those
for i in range(len(possibleMoves)):
for move, skip in possibleMoves[i].items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
else: else:
# If no forced capture moves, consider all valid moves self.whiteLeft -= 1
for piece in pieces:
validMoves = self.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getPiece(self, row, col): def getPiece(self, row, col):
return self.board[row][col] return self.board[row][col]
@ -116,8 +65,7 @@ class Board:
def winner(self): def winner(self):
if self.greenLeft <= 0: if self.greenLeft <= 0:
return WHITE return WHITE
elif self.whiteLeft <= 0:
if self.whiteLeft <= 0:
return GREEN return GREEN
return None return None
@ -128,10 +76,16 @@ class Board:
left = piece.col - 1 left = piece.col - 1
right = piece.col + 1 right = piece.col + 1
row = piece.row row = piece.row
if piece.colour == GREEN or piece.king: if piece.colour == GREEN:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if piece.king:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left)) moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right)) moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE or piece.king:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left)) moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right)) moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
@ -159,7 +113,6 @@ class Board:
forcedCapture = forced forcedCapture = forced
else: else:
forcedCapture = forced forcedCapture = forced
return forcedCapture return forcedCapture
def scoreOfTheBoard(self): def scoreOfTheBoard(self):
@ -169,7 +122,7 @@ class Board:
pieces = [] pieces = []
for row in self.board: for row in self.board:
for piece in row: for piece in row:
if piece != 0 and piece.colour == colour: if piece is not None and piece.colour == colour:
pieces.append(piece) pieces.append(piece)
return pieces return pieces
@ -209,7 +162,7 @@ class Board:
def _traverse(self, row, col, skipped, moves, step, last, colour): def _traverse(self, row, col, skipped, moves, step, last, colour):
current = self.board[row][col] current = self.board[row][col]
if current == 0: if current is None:
if skipped and not last: if skipped and not last:
return None return None
elif skipped: elif skipped:
@ -230,56 +183,3 @@ class Board:
else: else:
last = [current] last = [current]
return last return last
def step(self, move, colour):
start, end = self._decode(move)
start[0] = start[0] - 1
start[1] = start[1] - 1
end[0] = end[0] - 1
end[1] = end[1] - 1
reward = 0
done = False
piece = self.getPiece(start[0], start[1])
if piece == 0:
newStart = end
end = start
start = newStart
piece = self.getPiece(start[0], start[1])
moves = self.getValidMoves(piece)
for move, skip in moves.items():
if tuple(end) == move:
self._simulateMove(piece, move, self, skip)
if len(skip) == 1:
reward = 2
break
if len(skip) > 1:
reward = 3 + len(skip) * 0.2
break
reward = -0.5
break
if self.winner() == colour:
done = True
reward = 10
return reward, self, done
def _decode(self, move):
# Split digits back out
str_code = str(move)
# print(str_code)
start_row = int(str_code[0])
start_col = int(str_code[1])
end_row = int(str_code[2])
end_col = int(str_code[3])
# Reconstruct positions
start = [start_row, start_col]
end = [end_row, end_col]
return start, end
# def reset(self):
# self.board = []
# self.whiteLeft = self.greenLeft = 12
# self.whiteKings = self.greenKings = 0
# self._createBoard()
# return self.board

View File

@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS
# RGB color # RGB color
GREEN = 1 GREEN = (144, 184, 59)
WHITE = 2 WHITE = (255, 255, 255)
BLACK = (0, 0, 0) BLACK = (0, 0, 0)
BLUE = (0, 0, 255) BLUE = (0, 0, 255)
GREY = (128, 128, 128) GREY = (128, 128, 128)

View File

@ -1,8 +1,7 @@
import pygame import pygame
from utilities.Board import Board from utilities.board import Board
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
class GameManager: class GameManager:
def __init__(self, win, colour): def __init__(self, win, colour):
self._init(colour) self._init(colour)
@ -30,14 +29,14 @@ class GameManager:
self.selected = None self.selected = None
self.select(row, col) self.select(row, col)
piece = self.board.getPiece(row, col) piece = self.board.getPiece(row, col)
if piece != 0 and piece.colour == self.turn: if piece is not None and piece.colour == self.turn:
self.selected = piece self.selected = piece
self.validMoves = self.board.getValidMoves(piece) self.validMoves = self.board.getValidMoves(piece)
return True return True
def _move(self, row, col): def _move(self, row, col):
piece = self.board.getPiece(row, col) piece = self.board.getPiece(row, col)
if self.selected and piece == 0 and (row, col) in self.validMoves: if self.selected and piece is None and (row, col) in self.validMoves:
self.board.move(self.selected, row, col) self.board.move(self.selected, row, col)
skipped = self.validMoves[row, col] skipped = self.validMoves[row, col]
if self.validMoves[list(self.validMoves.keys())[0]]: if self.validMoves[list(self.validMoves.keys())[0]]:
@ -59,7 +58,7 @@ class GameManager:
self.validMoves = {} self.validMoves = {}
if self.turn == GREEN: if self.turn == GREEN:
self.turn = WHITE self.turn = WHITE
return else:
self.turn = GREEN self.turn = GREEN
def drawValidMoves(self, moves): def drawValidMoves(self, moves):

View File

@ -1,6 +1,6 @@
import pygame.draw import pygame.draw
from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN from utilities.constants import SQUARE_SIZE, GREY, CROWN
class Piece: class Piece:
@ -14,8 +14,6 @@ class Piece:
self.calcPosition() self.calcPosition()
self.padding = 20 self.padding = 20
self.border = 2 self.border = 2
self.green = (144, 184, 59)
self.white = (255, 255, 255)
def calcPosition(self): def calcPosition(self):
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2 self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
@ -27,7 +25,7 @@ class Piece:
def draw(self, win): def draw(self, win):
radius = SQUARE_SIZE // 2 - self.padding radius = SQUARE_SIZE // 2 - self.padding
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border) pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius) pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
if self.king: if self.king:
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2)) win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))