reinforcement-learning #1

Merged
rodude123 merged 4 commits from reinforcement-learning into master 2023-09-28 23:59:04 +01:00
9 changed files with 466 additions and 221 deletions
Showing only changes of commit 1aa8ffa8fc - Show all commits

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
</project>

194
main.py
View File

@ -1,7 +1,9 @@
import sys
import pygame
from matplotlib import pyplot as plt
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
from utilities.gameManager import GameManager
from minimax.minimaxAlgo import MiniMax
@ -44,71 +46,71 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
y += word_height # Start on new row.
def main():
def main(difficulty=0):
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
font = pygame.font.SysFont(None, 25)
difficulty = 0
font = pygame.font.SysFont("", 25)
while True:
# menu
screen.fill((128, 128, 128))
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
if difficulty == 0:
while True:
# menu
screen.fill((128, 128, 128))
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
mx, my = pygame.mouse.get_pos()
mx, my = pygame.mouse.get_pos()
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
pygame.draw.rect(screen, (0, 255, 0), easy)
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
pygame.draw.rect(screen, (255, 125, 0), medium)
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
pygame.draw.rect(screen, (255, 0, 0), hard)
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
pygame.draw.rect(screen, (0, 0, 255), rules)
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
pygame.draw.rect(screen, (0, 0, 0), quitGame)
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
pygame.draw.rect(screen, (0, 255, 0), easy)
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
pygame.draw.rect(screen, (255, 125, 0), medium)
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
pygame.draw.rect(screen, (255, 0, 0), hard)
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
pygame.draw.rect(screen, (0, 0, 255), rules)
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
pygame.draw.rect(screen, (0, 0, 0), quitGame)
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
if easy.collidepoint((mx, my)):
if click:
difficulty = 1
break
if medium.collidepoint((mx, my)):
if click:
difficulty = 3
break
if hard.collidepoint((mx, my)):
if click:
difficulty = 5
break
if rules.collidepoint((mx, my)):
if click:
rulesGUI()
break
if quitGame.collidepoint((mx, my)):
if click:
pygame.quit()
sys.exit()
click = False
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.MOUSEBUTTONDOWN:
if event.button == 1:
click = True
if easy.collidepoint((mx, my)):
if click:
difficulty = 1
break
if medium.collidepoint((mx, my)):
if click:
difficulty = 3
break
if hard.collidepoint((mx, my)):
if click:
difficulty = 5
break
if rules.collidepoint((mx, my)):
if click:
rulesGUI()
break
if quitGame.collidepoint((mx, my)):
if click:
pygame.quit()
sys.exit()
click = False
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.MOUSEBUTTONDOWN:
if event.button == 1:
click = True
pygame.display.update()
menuClock.tick(60)
if difficulty != 0:
game(difficulty)
pygame.display.update()
menuClock.tick(60)
game(difficulty)
def rulesGUI():
@ -116,8 +118,8 @@ def rulesGUI():
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
titleFont = pygame.font.SysFont(None, 48)
font = pygame.font.SysFont(None, 21)
titleFont = pygame.font.SysFont("", 48)
font = pygame.font.SysFont("", 21)
while True:
screen.fill((128, 128, 128))
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@ -175,39 +177,65 @@ def game(difficulty):
run = True
clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN)
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
mm = MiniMax()
totalReward = []
for i in range(2000):
score = 0
for j in range(200):
clock.tick(FPS)
reward = 0
if gameManager.turn == WHITE:
mm = MiniMax()
value, newBoard = mm.AI(difficulty, WHITE, gameManager)
# gameManager.aiMove(newBoard)
# reward, newBoard = rl.AI(gameManager.board)
if newBoard is None:
print("Cannot make move")
continue
gameManager.aiMove(newBoard)
#
while run:
clock.tick(FPS)
gameManager.update()
pygame.display.update()
if gameManager.turn == WHITE:
mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
gameManager.aiMove(newBoard)
# time.sleep(0.15)
if gameManager.turn == GREEN:
value, newBoard = mm.AI(difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
if gameManager.turn == GREEN:
mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
# time.sleep(0.15)
score += reward
if gameManager.winner() != None:
print(gameManager.winner())
run = False
if gameManager.winner() is not None:
print(gameManager.winner())
break
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
if event.type == pygame.MOUSEBUTTONDOWN:
pos = pygame.mouse.get_pos()
row, col = getRowColFromMouse(pos)
# if gameManager.turn == GREEN:
gameManager.select(row, col)
# for event in pygame.event.get():
# if event.type == pygame.QUIT:
# break
# if event.type == pygame.MOUSEBUTTONDOWN:
# pos = pygame.mouse.get_pos()
# row, col = getRowColFromMouse(pos)
# # if gameManager.turn == GREEN:
# gameManager.select(row, col)
gameManager.update()
pygame.display.update()
gameManager.update()
pygame.display.update()
gameManager.reset()
rl.resetScore()
print("Game: ", i, " Reward: ", score)
totalReward.append(score)
# save model weights every 25 games
if i % 250 == 0 and i != 0:
rl.model.save("./modelWeights/model_" + str(i) + ".h5")
# pygame.quit()
rl.model.save("./modelWeights/model_final.h5")
main()
plt.plot([i for i in range(len(totalReward))], totalReward)
plt.xlabel("Games")
plt.ylabel("Reward")
plt.show()
main(3)

View File

@ -1,56 +1,38 @@
import random
from copy import deepcopy
from math import inf
from utilities.constants import GREEN, WHITE
class MiniMax():
class MiniMax:
def AI(self, board, depth, maxPlayer, gameManager):
if depth == 0 or board.winner() is not None:
return board.scoreOfTheBoard(), board
def AI(self, depth, maxPlayer, gameManager):
if depth == 0 or gameManager.board.winner() is not None:
return gameManager.board.scoreOfTheBoard(), gameManager.board
if maxPlayer:
if type(maxPlayer) == int:
maxEval = -inf
bestMove = None
for move in self.getAllMoves(board, maxPlayer):
evaluation = self.AI(move, depth - 1, False, gameManager)[0]
for move in gameManager.board.getAllMoves(maxPlayer):
evaluation = self.AI(depth - 1, False, gameManager)[0]
maxEval = max(maxEval, evaluation)
if maxEval > evaluation:
bestMove = move
if maxEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return maxEval, bestMove
else:
minEval = inf
bestMove = None
colour = WHITE if gameManager.turn == GREEN else GREEN
for move in self.getAllMoves(board, colour):
evaluation = self.AI(move, depth - 1, True, gameManager)[0]
for move in gameManager.board.getAllMoves(colour):
evaluation = self.AI(depth - 1, True, gameManager)[0]
minEval = min(minEval, evaluation)
if minEval < evaluation:
bestMove = move
if minEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return minEval, bestMove
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getAllMoves(self, board, colour):
moves = []
for piece in board.getAllPieces(colour):
validMoves = board.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(board)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves

View File

@ -1,96 +1,252 @@
import random
from collections import deque
from typing import Any
from copy import deepcopy
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import Sequential, regularizers
from tensorflow.python.keras.layers import Dense
from keras.engine.input_layer import InputLayer
from keras.layers import BatchNormalization
from tensorflow.python.keras import Sequential, regularizers, Input
from tensorflow.python.keras.layers import Dense, Lambda, Dropout
from tensorflow.python.keras.optimizer_v2.adam import Adam
from minimax.minimaxAlgo import MiniMax
from utilities import Board
from utilities.constants import WHITE, GREEN
from utilities.gameManager import GameManager
class ReinforcementLearning():
def __init__(self, action_space, state_space, env):
self.action_space = action_space
self.state_space = state_space
self.env = env
def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
"""
Constructor for the ReinforcementLearning class
:param actionSpace: the number of possible actions
:param board: the game board
"""
self.gameManager = gameManager
self.actionSpace = actionSpace
self.board = board
self.state = self.board.board
self.colour = colour
self.score = 0
self.epsilon = 1
self.gamma = .95
self.batch_size = 64
self.epsilon_min = .01
self.epsilon_decay = .995
self.learning_rate = 0.001
self.memory = deque(maxlen=100000)
self.model = self._buildModel()
self.batchSize = 256
self.maxSize = 32
self.epsilonMin = .01
self.epsilonDecay = .995
self.learningRate = 0.001
self.memory = deque(maxlen=10000000)
self.model = self._buildMainModel()
def AI(self, episode):
loss = []
def AI(self, board: Board) -> tuple:
"""
Learns to play the draughts game
:return: the loss
"""
self.board = board
self.state = self._convertState(self.board.board)
self.actionSpace = self._encodeMoves(self.colour, self.board)
if len(self.actionSpace) == 0:
return self.score, None
max_steps = 1000
action = self._act()
reward, nextState, done = self.board.step(action, self.colour)
self.score += reward
self.state = self._convertState(nextState.board)
self._remember(deepcopy(self.board), action, reward, self.state, done)
self._replay()
for e in range(episode):
state = self.env.reset()
state = np.reshape(state, (1, self.state_space))
score = 0
for i in range(max_steps):
action = self.act(state)
reward, next_state, done = self.env.step(action)
score += reward
next_state = np.reshape(next_state, (1, self.state_space))
self.remember(state, action, reward, next_state, done)
state = next_state
self.replay()
if done:
print("episode: {}/{}, score: {}".format(e, episode, score))
break
loss.append(score)
return self.score, nextState
def _buildModel(self):
def _buildMainModel(self) -> Sequential:
"""
Build the model for the AI
:return: the model
"""
# Board model
board_model = Sequential()
modelLayers = [
Lambda(lambda x: tf.reshape(x, [-1, 32])),
Dense(256, activation='relu'),
Dropout(0.2),
Dense(128, activation='relu'),
Dropout(0.2),
Dense(64, activation='relu'),
Dropout(0.2),
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))
]
boardModel = Sequential(modelLayers)
# input dimensions is 32 board position values
board_model.add(Dense(64, activation='relu', input_dim=32))
# boardModel.add(BatchNormalization())
boardModel.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error')
boardModel.build(input_shape=(None, None))
# use regularizers, to prevent fitting noisy labels
board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
print(boardModel.summary())
# output isn't squashed, because it might lose information
board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
board_model.compile(optimizer='nadam', loss='binary_crossentropy')
return boardModel
return board_model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def replay(self):
if len(self.memory) < self.batch_size:
def _replay(self) -> None:
"""
trains the model
:return: None (void)
"""
if len(self.memory) < self.batchSize:
# Not enough data to replay and test the model
return
minibatch = random.sample(self.memory, self.batch_size)
states = np.array([i[0] for i in minibatch])
actions = np.array([i[1] for i in minibatch])
rewards = np.array([i[2] for i in minibatch])
next_states = np.array([i[3] for i in minibatch])
dones = np.array([i[4] for i in minibatch])
# Get a random sample from the memory
minibatch = random.sample(self.memory, int(self.maxSize))
states = np.squeeze(states)
next_states = np.squeeze(next_states)
# Extract states, rewards, dones
states = [m[0] for m in minibatch]
rewards = [m[2] for m in minibatch]
dones = [m[4] for m in minibatch]
targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
targets_full = self.model.predict_on_batch(states)
# Encoded moves
encodedMoves = []
for state in states:
encodedMoves.append(self._encodeMoves(self.colour, state))
ind = np.array([i for i in range(self.batch_size)])
targets_full[[ind], [actions]] = targets
# Calculate targets
targets = []
for i, moves in enumerate(encodedMoves):
if dones[i]:
target = rewards[i]
else:
target = rewards[i] + self.gamma * self._maxNextQ()
self.model.fit(states, targets_full, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
targets.append(target)
def act(self, state):
encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
for m in encodedMoves])
targets = np.array(targets)
self.model.fit(self._normalise(encodedMoves), self._normalise(targets), epochs=20)
if self.epsilon > self.epsilonMin:
self.epsilon *= self.epsilonDecay
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
"""
Remembers what it has learnt
:param state: the current state
:param action: the action taken
:param reward: the reward for the action
:param nextState: the next state
:param done: whether the game is finished
:return: None (void)
"""
self.memory.append((state, action, reward, nextState, done))
def _act(self) -> Any:
"""
Chooses an action based on the available moves
:return: the action
"""
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_space)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
# choose a random action from the action spaces list
mm = MiniMax()
value, newBoard = mm.AI(3, self.colour, self.gameManager)
if newBoard is None:
return random.choice(self.actionSpace)
where = self._boardDiff(self.board, newBoard)
return self._encode(where[0]+1, where[1]+1)
if len(self.actionSpace) == 1:
return self.actionSpace[0]
encodedMoves = np.squeeze(self.actionSpace)
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
act_values = self.model.predict(self._normalise(encodedMoves))
return self.actionSpace[np.argmax(act_values[0])]
def resetScore(self):
self.score = 0
def _convertState(self, board: list) -> list:
"""
Converts the board into a 2D list of numbers
:param board: 2D list of pieces
:return: new 2D list of numbers
"""
num_board = []
for row in board:
num_row = []
for piece in row:
if piece == 0:
num_row.append(0)
continue
if piece.colour == 1:
num_row.append(1)
continue
num_row.append(2)
num_board.append(num_row)
return num_board
def _encode(self, start: tuple, end: tuple) -> int:
"""
Encodes the move into an integer
:param start: tuple of start position
:param end: tuple of end position
:return: encoded move
"""
start_row = start[0]
start_col = end[0]
end_row = start[-1]
end_col = end[-1]
# Concatenate into integer
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
def _maxNextQ(self) -> float:
colour = WHITE if self.colour == GREEN else GREEN
encodedMoves = self._encodeMoves(colour, self.board)
if len(encodedMoves) == 0:
return -1
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
# paddedMoves = np.reshape(paddedMoves, (32, 8, 8))
# paddedMoves = paddedMoves / np.max(paddedMoved
# paddedMoves = paddedMoves.reshape(32,)
# pm = tf.convert_to_tensor(paddedMoves, dtype=tf.float32)
# pm = tf.reshape(pm, [32])
print(paddedMoves.shape)
nextQValues = self.model.predict_on_batch(self._normalise(paddedMoves))
return np.max(nextQValues)
def _encodeMoves(self, colour: int, board: Board) -> list:
"""
Encodes the moves into a list encoded moves
:param colour: colour of the player
:param board: the board
:return: list of encoded moves
"""
encodedMoves = []
moves = board.getAllMoves(colour)
for move in moves:
where = self._boardDiff(board, move)
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
return encodedMoves
def _boardDiff(self, board, move):
cnvState = np.array(self._convertState(board.board))
cnvMove = np.array(self._convertState(move.board))
diff = np.subtract(cnvMove, cnvState)
diff = np.nonzero(diff)
return diff
def _normalise(self, data):
"""
Normalise the data
"""
for i in range(len(data)):
data[i] = data[i] / np.linalg.norm(data[i])
return data

2
run.sh Executable file
View File

@ -0,0 +1,2 @@
conda activate
python main.py

View File

@ -1,5 +1,5 @@
import pygame
from copy import deepcopy
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
from .piece import Piece
@ -9,34 +9,39 @@ class Board:
self.board = []
self.greenLeft = self.whiteLeft = 12
self.greenKings = self.whiteKings = 0
self.createBoard()
self.green = (144, 184, 59)
self._createBoard()
def drawSquares(self, win):
def _drawSquares(self, win):
win.fill(BLACK)
for row in range(ROWS):
for col in range(row % 2, ROWS, 2):
pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
def createBoard(self):
def _createBoard(self):
for row in range(ROWS):
self.board.append([])
for col in range(COLS):
if col % 2 == ((row + 1) % 2):
if row < 3:
self.board[row].append(Piece(row, col, WHITE))
elif row > 4:
continue
if row > 4:
self.board[row].append(Piece(row, col, GREEN))
else:
self.board[row].append(None)
else:
self.board[row].append(None)
continue
self.board[row].append(0)
continue
self.board[row].append(0)
def draw(self, win):
self.drawSquares(win)
self._drawSquares(win)
for row in range(ROWS):
for col in range(COLS):
piece = self.board[row][col]
if piece is not None:
if piece != 0:
piece.draw(win)
def move(self, piece, row, col):
@ -45,19 +50,40 @@ class Board:
if row == ROWS - 1 or row == 0:
piece.makeKing()
if piece.colour == WHITE:
self.whiteKings += 1
else:
self.greenKings += 1
if piece.colour == WHITE:
self.whiteKings += 1
if piece.colour == GREEN:
self.greenKings += 1
def remove(self, skipped):
for piece in skipped:
self.board[piece.row][piece.col] = None
if piece is not None:
self.board[piece.row][piece.col] = 0
if piece != 0:
if piece.colour == GREEN:
self.greenLeft -= 1
else:
self.whiteLeft -= 1
return
self.whiteLeft -= 1
def getAllMoves(self, colour):
moves = []
for piece in self.getAllPieces(colour):
validMoves = self.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getPiece(self, row, col):
return self.board[row][col]
@ -65,7 +91,8 @@ class Board:
def winner(self):
if self.greenLeft <= 0:
return WHITE
elif self.whiteLeft <= 0:
if self.whiteLeft <= 0:
return GREEN
return None
@ -76,16 +103,10 @@ class Board:
left = piece.col - 1
right = piece.col + 1
row = piece.row
if piece.colour == GREEN:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if piece.king:
if piece.colour == GREEN or piece.king:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE or piece.king:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
@ -122,7 +143,7 @@ class Board:
pieces = []
for row in self.board:
for piece in row:
if piece is not None and piece.colour == colour:
if piece != 0 and piece.colour == colour:
pieces.append(piece)
return pieces
@ -162,7 +183,7 @@ class Board:
def _traverse(self, row, col, skipped, moves, step, last, colour):
current = self.board[row][col]
if current is None:
if current == 0:
if skipped and not last:
return None
elif skipped:
@ -183,3 +204,56 @@ class Board:
else:
last = [current]
return last
def step(self, move, colour):
start, end = self._decode(move)
start[0] = start[0] - 1
start[1] = start[1] - 1
end[0] = end[0] - 1
end[1] = end[1] - 1
reward = 0
done = False
piece = self.getPiece(start[0], start[1])
if piece == 0:
newStart = end
end = start
start = newStart
piece = self.getPiece(start[0], start[1])
moves = self.getValidMoves(piece)
for move, skip in moves.items():
if tuple(end) == move:
self._simulateMove(piece, move, self, skip)
if len(skip) == 1:
reward = 2
break
if len(skip) > 1:
reward = 3 + len(skip) * 0.2
break
reward = -0.5
break
if self.winner() == colour:
done = True
reward = 10
return reward, self, done
def _decode(self, move):
# Split digits back out
str_code = str(move)
print(str_code)
start_row = int(str_code[0])
start_col = int(str_code[1])
end_row = int(str_code[2])
end_col = int(str_code[3])
# Reconstruct positions
start = [start_row, start_col]
end = [end_row, end_col]
return start, end
# def reset(self):
# self.board = []
# self.whiteLeft = self.greenLeft = 12
# self.whiteKings = self.greenKings = 0
# self._createBoard()
# return self.board

View File

@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS
# RGB color
GREEN = (144, 184, 59)
WHITE = (255, 255, 255)
GREEN = 1
WHITE = 2
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)
GREY = (128, 128, 128)

View File

@ -1,7 +1,8 @@
import pygame
from utilities.board import Board
from utilities.Board import Board
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
class GameManager:
def __init__(self, win, colour):
self._init(colour)
@ -29,14 +30,14 @@ class GameManager:
self.selected = None
self.select(row, col)
piece = self.board.getPiece(row, col)
if piece is not None and piece.colour == self.turn:
if piece != 0 and piece.colour == self.turn:
self.selected = piece
self.validMoves = self.board.getValidMoves(piece)
return True
def _move(self, row, col):
piece = self.board.getPiece(row, col)
if self.selected and piece is None and (row, col) in self.validMoves:
if self.selected and piece == 0 and (row, col) in self.validMoves:
self.board.move(self.selected, row, col)
skipped = self.validMoves[row, col]
if self.validMoves[list(self.validMoves.keys())[0]]:
@ -58,8 +59,8 @@ class GameManager:
self.validMoves = {}
if self.turn == GREEN:
self.turn = WHITE
else:
self.turn = GREEN
return
self.turn = GREEN
def drawValidMoves(self, moves):
for row, col in moves:

View File

@ -1,6 +1,6 @@
import pygame.draw
from utilities.constants import SQUARE_SIZE, GREY, CROWN
from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
class Piece:
@ -14,6 +14,8 @@ class Piece:
self.calcPosition()
self.padding = 20
self.border = 2
self.green = (144, 184, 59)
self.white = (255, 255, 255)
def calcPosition(self):
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
@ -25,7 +27,7 @@ class Piece:
def draw(self, win):
radius = SQUARE_SIZE // 2 - self.padding
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
if self.king:
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))