Compare commits
No commits in common. "6d4e364f8d9fef9cc6ab34bac15853ffe9535d49" and "1eb0a04f30fddc4bcccbe913bd4574729539b346" have entirely different histories.
6d4e364f8d
...
1eb0a04f30
@ -4,7 +4,7 @@
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" />
|
||||
</project>
|
BIN
Report.pdf
Normal file
BIN
Report.pdf
Normal file
Binary file not shown.
227
main.py
227
main.py
@ -1,10 +1,7 @@
|
||||
import sys
|
||||
|
||||
import pygame
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
|
||||
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
|
||||
from utilities.gameManager import GameManager
|
||||
from minimax.minimaxAlgo import MiniMax
|
||||
@ -47,71 +44,71 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
|
||||
y += word_height # Start on new row.
|
||||
|
||||
|
||||
def main(difficulty=0):
|
||||
def main():
|
||||
pygame.init()
|
||||
screen = pygame.display.set_mode((WIDTH, HEIGHT))
|
||||
menuClock = pygame.time.Clock()
|
||||
click = False
|
||||
width = screen.get_width()
|
||||
font = pygame.font.SysFont("", 25)
|
||||
font = pygame.font.SysFont(None, 25)
|
||||
difficulty = 0
|
||||
|
||||
if difficulty == 0:
|
||||
while True:
|
||||
# menu
|
||||
screen.fill((128, 128, 128))
|
||||
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
|
||||
while True:
|
||||
# menu
|
||||
screen.fill((128, 128, 128))
|
||||
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
|
||||
|
||||
mx, my = pygame.mouse.get_pos()
|
||||
mx, my = pygame.mouse.get_pos()
|
||||
|
||||
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 255, 0), easy)
|
||||
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
|
||||
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
|
||||
pygame.draw.rect(screen, (255, 125, 0), medium)
|
||||
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
|
||||
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
|
||||
pygame.draw.rect(screen, (255, 0, 0), hard)
|
||||
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
|
||||
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 0, 255), rules)
|
||||
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
|
||||
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 0, 0), quitGame)
|
||||
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
|
||||
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 255, 0), easy)
|
||||
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
|
||||
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
|
||||
pygame.draw.rect(screen, (255, 125, 0), medium)
|
||||
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
|
||||
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
|
||||
pygame.draw.rect(screen, (255, 0, 0), hard)
|
||||
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
|
||||
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 0, 255), rules)
|
||||
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
|
||||
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
|
||||
pygame.draw.rect(screen, (0, 0, 0), quitGame)
|
||||
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
|
||||
|
||||
if easy.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 1
|
||||
break
|
||||
if medium.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 3
|
||||
break
|
||||
if hard.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 5
|
||||
break
|
||||
if rules.collidepoint((mx, my)):
|
||||
if click:
|
||||
rulesGUI()
|
||||
break
|
||||
if quitGame.collidepoint((mx, my)):
|
||||
if click:
|
||||
pygame.quit()
|
||||
sys.exit()
|
||||
click = False
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
pygame.quit()
|
||||
sys.exit()
|
||||
if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
if event.button == 1:
|
||||
click = True
|
||||
if easy.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 1
|
||||
break
|
||||
if medium.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 3
|
||||
break
|
||||
if hard.collidepoint((mx, my)):
|
||||
if click:
|
||||
difficulty = 5
|
||||
break
|
||||
if rules.collidepoint((mx, my)):
|
||||
if click:
|
||||
rulesGUI()
|
||||
break
|
||||
if quitGame.collidepoint((mx, my)):
|
||||
if click:
|
||||
pygame.quit()
|
||||
sys.exit()
|
||||
click = False
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
pygame.quit()
|
||||
sys.exit()
|
||||
if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
if event.button == 1:
|
||||
click = True
|
||||
|
||||
pygame.display.update()
|
||||
menuClock.tick(60)
|
||||
|
||||
game(difficulty)
|
||||
pygame.display.update()
|
||||
menuClock.tick(60)
|
||||
if difficulty != 0:
|
||||
game(difficulty)
|
||||
|
||||
|
||||
def rulesGUI():
|
||||
@ -119,8 +116,8 @@ def rulesGUI():
|
||||
menuClock = pygame.time.Clock()
|
||||
click = False
|
||||
width = screen.get_width()
|
||||
titleFont = pygame.font.SysFont("", 48)
|
||||
font = pygame.font.SysFont("", 21)
|
||||
titleFont = pygame.font.SysFont(None, 48)
|
||||
font = pygame.font.SysFont(None, 21)
|
||||
while True:
|
||||
screen.fill((128, 128, 128))
|
||||
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
|
||||
@ -178,97 +175,39 @@ def game(difficulty):
|
||||
run = True
|
||||
clock = pygame.time.Clock()
|
||||
gameManager = GameManager(WIN, GREEN)
|
||||
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
|
||||
model = rl.buildMainModel()
|
||||
model.load_weights("./modelWeights/model_final.h5")
|
||||
mm = MiniMax()
|
||||
totalReward = []
|
||||
winners = []
|
||||
for i in range(100):
|
||||
score = 0
|
||||
for j in range(200):
|
||||
print(j)
|
||||
clock.tick(FPS)
|
||||
reward = 0
|
||||
if gameManager.turn == WHITE:
|
||||
# mm = MiniMax()
|
||||
# value, newBoard = mm.AI(difficulty, WHITE, gameManager)
|
||||
# gameManager.aiMove(newBoard)
|
||||
# reward, newBoard = rl.AI(gameManager.board)
|
||||
actionSpace = rl.encodeMoves(WHITE, gameManager.board)
|
||||
if len(actionSpace) == 0:
|
||||
print("Cannot make move")
|
||||
continue
|
||||
totalMoves = len(actionSpace)
|
||||
# moves = np.squeeze(moves)
|
||||
moves = np.pad(actionSpace, (0, rl.maxSize - totalMoves), 'constant', constant_values=(1, 1))
|
||||
act_values = model.predict(rl.normalise(moves))
|
||||
val = np.argmax(act_values[0])
|
||||
val = val if val < totalMoves else totalMoves - 1
|
||||
reward, newBoard, done = gameManager.board.step(actionSpace[val], WHITE)
|
||||
|
||||
# if newBoard is None:
|
||||
# print("Cannot make move")
|
||||
# continue
|
||||
gameManager.aiMove(newBoard)
|
||||
while run:
|
||||
clock.tick(FPS)
|
||||
|
||||
gameManager.update()
|
||||
pygame.display.update()
|
||||
if gameManager.turn == WHITE:
|
||||
mm = MiniMax()
|
||||
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
|
||||
gameManager.aiMove(newBoard)
|
||||
# time.sleep(0.15)
|
||||
|
||||
if gameManager.turn == GREEN:
|
||||
value, newBoard = mm.AI(difficulty, GREEN, gameManager)
|
||||
gameManager.aiMove(newBoard)
|
||||
if gameManager.turn == GREEN:
|
||||
mm = MiniMax()
|
||||
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
|
||||
gameManager.aiMove(newBoard)
|
||||
# time.sleep(0.15)
|
||||
|
||||
score += reward
|
||||
if gameManager.winner() != None:
|
||||
print(gameManager.winner())
|
||||
run = False
|
||||
|
||||
if gameManager.winner() is not None:
|
||||
print("Green" if gameManager.winner() == GREEN else "White", " wins")
|
||||
with open("winners.txt", "a+") as f:
|
||||
f.write(str(gameManager.winner()) + "\n")
|
||||
winners.append(gameManager.winner())
|
||||
break
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
run = False
|
||||
if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
pos = pygame.mouse.get_pos()
|
||||
row, col = getRowColFromMouse(pos)
|
||||
# if gameManager.turn == GREEN:
|
||||
gameManager.select(row, col)
|
||||
|
||||
# for event in pygame.event.get():
|
||||
# if event.type == pygame.QUIT:
|
||||
# break
|
||||
# if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
# pos = pygame.mouse.get_pos()
|
||||
# row, col = getRowColFromMouse(pos)
|
||||
# # if gameManager.turn == GREEN:
|
||||
# gameManager.select(row, col)
|
||||
gameManager.update()
|
||||
pygame.display.update()
|
||||
|
||||
gameManager.update()
|
||||
pygame.display.update()
|
||||
|
||||
if gameManager.winner() is None:
|
||||
with open("winners.txt", "a+") as f:
|
||||
f.write(str(0) + "\n")
|
||||
winners.append(0)
|
||||
gameManager.reset()
|
||||
rl.resetScore()
|
||||
print("Game: ", i, " Reward: ", score)
|
||||
with open("rewards.txt", "a+") as f:
|
||||
f.write(str(score) + "\n")
|
||||
|
||||
totalReward.append(score)
|
||||
# save model weights every 25 games
|
||||
if i % 250 == 0 and i != 0:
|
||||
rl.model.save("./modelWeights/model_" + str(i) + ".h5")
|
||||
# pygame.quit()
|
||||
|
||||
rl.model.save("./modelWeights/model_final.h5")
|
||||
|
||||
plt.plot([i for i in range(len(totalReward))], totalReward)
|
||||
plt.xlabel("Games")
|
||||
plt.ylabel("Reward")
|
||||
plt.show()
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
|
||||
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
|
||||
ax.set_title("Winners")
|
||||
ax.bar_label(bar)
|
||||
plt.show()
|
||||
|
||||
|
||||
main(3)
|
||||
main()
|
||||
|
@ -1,38 +1,56 @@
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from math import inf
|
||||
|
||||
from utilities.constants import GREEN, WHITE
|
||||
|
||||
|
||||
class MiniMax:
|
||||
class MiniMax():
|
||||
|
||||
def AI(self, depth, maxPlayer, gameManager):
|
||||
if depth == 0 or gameManager.board.winner() is not None:
|
||||
return gameManager.board.scoreOfTheBoard(), gameManager.board
|
||||
def AI(self, board, depth, maxPlayer, gameManager):
|
||||
if depth == 0 or board.winner() is not None:
|
||||
return board.scoreOfTheBoard(), board
|
||||
|
||||
if type(maxPlayer) == int:
|
||||
if maxPlayer:
|
||||
maxEval = -inf
|
||||
bestMove = None
|
||||
for move in gameManager.board.getAllMoves(maxPlayer):
|
||||
evaluation = self.AI(depth - 1, False, gameManager)[0]
|
||||
for move in self.getAllMoves(board, maxPlayer):
|
||||
evaluation = self.AI(move, depth - 1, False, gameManager)[0]
|
||||
maxEval = max(maxEval, evaluation)
|
||||
if maxEval > evaluation:
|
||||
bestMove = move
|
||||
if maxEval == evaluation:
|
||||
# bestMove = move
|
||||
bestMove = bestMove if random.choice([True, False]) else move
|
||||
return maxEval, bestMove
|
||||
else:
|
||||
minEval = inf
|
||||
bestMove = None
|
||||
colour = WHITE if gameManager.turn == GREEN else GREEN
|
||||
for move in gameManager.board.getAllMoves(colour):
|
||||
evaluation = self.AI(depth - 1, True, gameManager)[0]
|
||||
for move in self.getAllMoves(board, colour):
|
||||
evaluation = self.AI(move, depth - 1, True, gameManager)[0]
|
||||
minEval = min(minEval, evaluation)
|
||||
if minEval < evaluation:
|
||||
bestMove = move
|
||||
if minEval == evaluation:
|
||||
# bestMove = move
|
||||
bestMove = bestMove if random.choice([True, False]) else move
|
||||
|
||||
return minEval, bestMove
|
||||
|
||||
def _simulateMove(self, piece, move, board, skip):
|
||||
board.move(piece, move[0], move[1])
|
||||
if skip:
|
||||
board.remove(skip)
|
||||
|
||||
return board
|
||||
|
||||
def getAllMoves(self, board, colour):
|
||||
moves = []
|
||||
|
||||
for piece in board.getAllPieces(colour):
|
||||
validMoves = board.getValidMoves(piece)
|
||||
for move, skip in validMoves.items():
|
||||
tempBoard = deepcopy(board)
|
||||
tempPiece = tempBoard.getPiece(piece.row, piece.col)
|
||||
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
|
||||
moves.append(newBoard)
|
||||
return moves
|
||||
|
@ -1,245 +1,96 @@
|
||||
import random
|
||||
from collections import deque
|
||||
from typing import Any
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from keras.engine.input_layer import InputLayer
|
||||
from keras.layers import BatchNormalization
|
||||
from tensorflow.python.keras import Sequential, regularizers, Input
|
||||
from tensorflow.python.keras.layers import Dense, Lambda, Dropout
|
||||
from tensorflow.python.keras.optimizer_v2.adam import Adam
|
||||
|
||||
from minimax.minimaxAlgo import MiniMax
|
||||
from utilities import Board
|
||||
from utilities.constants import WHITE, GREEN
|
||||
from utilities.gameManager import GameManager
|
||||
from tensorflow.python.keras import Sequential, regularizers
|
||||
from tensorflow.python.keras.layers import Dense
|
||||
|
||||
|
||||
class ReinforcementLearning():
|
||||
|
||||
def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
|
||||
"""
|
||||
Constructor for the ReinforcementLearning class
|
||||
:param actionSpace: the number of possible actions
|
||||
:param board: the game board
|
||||
"""
|
||||
self.gameManager = gameManager
|
||||
self.actionSpace = actionSpace
|
||||
self.board = board
|
||||
self.state = self.board.board
|
||||
self.colour = colour
|
||||
self.score = 0
|
||||
def __init__(self, action_space, state_space, env):
|
||||
self.action_space = action_space
|
||||
self.state_space = state_space
|
||||
self.env = env
|
||||
self.epsilon = 1
|
||||
self.gamma = .95
|
||||
self.batchSize = 256
|
||||
self.maxSize = 32
|
||||
self.epsilonMin = .01
|
||||
self.epsilonDecay = .995
|
||||
self.learningRate = 0.0001
|
||||
self.memory = deque(maxlen=10000000)
|
||||
self.model = self.buildMainModel()
|
||||
print(self.model.summary())
|
||||
self.batch_size = 64
|
||||
self.epsilon_min = .01
|
||||
self.epsilon_decay = .995
|
||||
self.learning_rate = 0.001
|
||||
self.memory = deque(maxlen=100000)
|
||||
self.model = self._buildModel()
|
||||
|
||||
def AI(self, board: Board) -> tuple:
|
||||
"""
|
||||
Learns to play the draughts game
|
||||
:return: the loss
|
||||
"""
|
||||
self.board = board
|
||||
self.state = self._convertState(self.board.board)
|
||||
self.actionSpace = self.encodeMoves(self.colour, self.board)
|
||||
if len(self.actionSpace) == 0:
|
||||
return self.score, None
|
||||
def AI(self, episode):
|
||||
loss = []
|
||||
|
||||
action = self._act()
|
||||
reward, nextState, done = self.board.step(action, self.colour)
|
||||
self.score += reward
|
||||
self.state = self._convertState(nextState.board)
|
||||
self._remember(deepcopy(self.board), action, reward, self.state, done)
|
||||
self._replay()
|
||||
max_steps = 1000
|
||||
|
||||
return self.score, nextState
|
||||
for e in range(episode):
|
||||
state = self.env.reset()
|
||||
state = np.reshape(state, (1, self.state_space))
|
||||
score = 0
|
||||
for i in range(max_steps):
|
||||
action = self.act(state)
|
||||
reward, next_state, done = self.env.step(action)
|
||||
score += reward
|
||||
next_state = np.reshape(next_state, (1, self.state_space))
|
||||
self.remember(state, action, reward, next_state, done)
|
||||
state = next_state
|
||||
self.replay()
|
||||
if done:
|
||||
print("episode: {}/{}, score: {}".format(e, episode, score))
|
||||
break
|
||||
loss.append(score)
|
||||
|
||||
def buildMainModel(self) -> Sequential:
|
||||
"""
|
||||
Build the model for the AI
|
||||
:return: the model
|
||||
"""
|
||||
def _buildModel(self):
|
||||
# Board model
|
||||
modelLayers = [
|
||||
Lambda(lambda x: tf.reshape(x, [-1, 32])),
|
||||
Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
|
||||
Dropout(0.2),
|
||||
Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
|
||||
Dropout(0.2),
|
||||
Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
|
||||
Dropout(0.2),
|
||||
Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
|
||||
Dropout(0.2),
|
||||
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
|
||||
Dropout(0.2),
|
||||
Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
|
||||
]
|
||||
boardModel = Sequential(modelLayers)
|
||||
board_model = Sequential()
|
||||
|
||||
# boardModel.add(BatchNormalization())
|
||||
boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
|
||||
boardModel.build(input_shape=(None, None))
|
||||
# input dimensions is 32 board position values
|
||||
board_model.add(Dense(64, activation='relu', input_dim=32))
|
||||
|
||||
return boardModel
|
||||
# use regularizers, to prevent fitting noisy labels
|
||||
board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
||||
board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
|
||||
board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
|
||||
|
||||
def _replay(self) -> None:
|
||||
"""
|
||||
trains the model
|
||||
:return: None (void)
|
||||
"""
|
||||
if len(self.memory) < self.batchSize:
|
||||
# Not enough data to replay and test the model
|
||||
# output isn't squashed, because it might lose information
|
||||
board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
|
||||
board_model.compile(optimizer='nadam', loss='binary_crossentropy')
|
||||
|
||||
return board_model
|
||||
|
||||
def remember(self, state, action, reward, next_state, done):
|
||||
self.memory.append((state, action, reward, next_state, done))
|
||||
|
||||
def replay(self):
|
||||
if len(self.memory) < self.batch_size:
|
||||
return
|
||||
|
||||
# Get a random sample from the memory
|
||||
minibatch = random.sample(self.memory, int(self.maxSize))
|
||||
minibatch = random.sample(self.memory, self.batch_size)
|
||||
states = np.array([i[0] for i in minibatch])
|
||||
actions = np.array([i[1] for i in minibatch])
|
||||
rewards = np.array([i[2] for i in minibatch])
|
||||
next_states = np.array([i[3] for i in minibatch])
|
||||
dones = np.array([i[4] for i in minibatch])
|
||||
|
||||
# Extract states, rewards, dones
|
||||
states = [m[0] for m in minibatch]
|
||||
rewards = [m[2] for m in minibatch]
|
||||
dones = [m[4] for m in minibatch]
|
||||
states = np.squeeze(states)
|
||||
next_states = np.squeeze(next_states)
|
||||
|
||||
# Encoded moves
|
||||
encodedMoves = []
|
||||
for state in states:
|
||||
encodedMoves.append(self.encodeMoves(self.colour, state))
|
||||
targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
|
||||
targets_full = self.model.predict_on_batch(states)
|
||||
|
||||
# Calculate targets
|
||||
targets = []
|
||||
for i, moves in enumerate(encodedMoves):
|
||||
if dones[i]:
|
||||
target = rewards[i]
|
||||
else:
|
||||
target = rewards[i] + self.gamma * self._maxNextQ()
|
||||
ind = np.array([i for i in range(self.batch_size)])
|
||||
targets_full[[ind], [actions]] = targets
|
||||
|
||||
targets.append(target)
|
||||
self.model.fit(states, targets_full, epochs=1, verbose=0)
|
||||
if self.epsilon > self.epsilon_min:
|
||||
self.epsilon *= self.epsilon_decay
|
||||
|
||||
encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
|
||||
for m in encodedMoves])
|
||||
targets = np.array(targets)
|
||||
self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
|
||||
if self.epsilon > self.epsilonMin:
|
||||
self.epsilon *= self.epsilonDecay
|
||||
|
||||
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
|
||||
"""
|
||||
Remembers what it has learnt
|
||||
:param state: the current state
|
||||
:param action: the action taken
|
||||
:param reward: the reward for the action
|
||||
:param nextState: the next state
|
||||
:param done: whether the game is finished
|
||||
:return: None (void)
|
||||
"""
|
||||
self.memory.append((state, action, reward, nextState, done))
|
||||
|
||||
def _act(self) -> Any:
|
||||
"""
|
||||
Chooses an action based on the available moves
|
||||
:return: the action
|
||||
"""
|
||||
def act(self, state):
|
||||
if np.random.rand() <= self.epsilon:
|
||||
# choose a random action from the action spaces list
|
||||
mm = MiniMax()
|
||||
value, newBoard = mm.AI(3, self.colour, self.gameManager)
|
||||
if newBoard is None:
|
||||
return random.choice(self.actionSpace)
|
||||
where = self._boardDiff(self.board, newBoard)
|
||||
return self._encode(where[0]+1, where[1]+1)
|
||||
|
||||
if len(self.actionSpace) == 1:
|
||||
return self.actionSpace[0]
|
||||
encodedMoves = np.squeeze(self.actionSpace)
|
||||
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
|
||||
act_values = self.model.predict(self.normalise(encodedMoves))
|
||||
val = np.argmax(act_values[0])
|
||||
val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
|
||||
return self.actionSpace[val]
|
||||
|
||||
def resetScore(self):
|
||||
self.score = 0
|
||||
|
||||
def _convertState(self, board: list) -> list:
|
||||
"""
|
||||
Converts the board into a 2D list of numbers
|
||||
:param board: 2D list of pieces
|
||||
:return: new 2D list of numbers
|
||||
"""
|
||||
num_board = []
|
||||
|
||||
for row in board:
|
||||
num_row = []
|
||||
for piece in row:
|
||||
if piece == 0:
|
||||
num_row.append(0)
|
||||
continue
|
||||
|
||||
if piece.colour == 1:
|
||||
num_row.append(1)
|
||||
continue
|
||||
|
||||
num_row.append(2)
|
||||
|
||||
num_board.append(num_row)
|
||||
|
||||
return num_board
|
||||
|
||||
def _encode(self, start: tuple, end: tuple) -> int:
|
||||
"""
|
||||
Encodes the move into an integer
|
||||
:param start: tuple of start position
|
||||
:param end: tuple of end position
|
||||
:return: encoded move
|
||||
"""
|
||||
start_row = start[0]
|
||||
start_col = end[0]
|
||||
|
||||
end_row = start[-1]
|
||||
end_col = end[-1]
|
||||
|
||||
# Concatenate into integer
|
||||
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
|
||||
|
||||
def _maxNextQ(self) -> float:
|
||||
colour = WHITE if self.colour == GREEN else GREEN
|
||||
encodedMoves = self.encodeMoves(colour, self.board)
|
||||
if len(encodedMoves) == 0:
|
||||
return -1
|
||||
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
|
||||
nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
|
||||
return np.max(nextQValues)
|
||||
|
||||
def encodeMoves(self, colour: int, board: Board) -> list:
|
||||
"""
|
||||
Encodes the moves into a list encoded moves
|
||||
:param colour: colour of the player
|
||||
:param board: the board
|
||||
:return: list of encoded moves
|
||||
"""
|
||||
encodedMoves = []
|
||||
moves = board.getAllMoves(colour)
|
||||
for move in moves:
|
||||
where = self._boardDiff(board, move)
|
||||
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
|
||||
return encodedMoves
|
||||
|
||||
def _boardDiff(self, board, move):
|
||||
cnvState = np.array(self._convertState(board.board))
|
||||
cnvMove = np.array(self._convertState(move.board))
|
||||
diff = np.subtract(cnvMove, cnvState)
|
||||
diff = np.nonzero(diff)
|
||||
return diff
|
||||
|
||||
def normalise(self, data):
|
||||
"""
|
||||
Normalise the data
|
||||
"""
|
||||
return data / 10000
|
||||
return random.randrange(self.action_space)
|
||||
act_values = self.model.predict(state)
|
||||
return np.argmax(act_values[0])
|
||||
|
27
results.py
27
results.py
@ -1,27 +0,0 @@
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from utilities.constants import GREEN, WHITE
|
||||
|
||||
# winners = []
|
||||
with open("winners.txt") as f:
|
||||
winners = f.readlines()
|
||||
|
||||
winners = [int(x.strip()) for x in winners]
|
||||
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
|
||||
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
|
||||
ax.set_title("Winners")
|
||||
ax.bar_label(bar)
|
||||
plt.show()
|
||||
|
||||
|
||||
with open("rewardsA.txt") as f:
|
||||
totalReward = f.readlines()
|
||||
|
||||
|
||||
plt.plot([i for i in range(len(totalReward))], totalReward)
|
||||
plt.xlabel("Games")
|
||||
plt.ylabel("Reward")
|
||||
plt.show()
|
@ -1,5 +1,5 @@
|
||||
import pygame
|
||||
from copy import deepcopy
|
||||
|
||||
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
|
||||
from .piece import Piece
|
||||
|
||||
@ -9,39 +9,34 @@ class Board:
|
||||
self.board = []
|
||||
self.greenLeft = self.whiteLeft = 12
|
||||
self.greenKings = self.whiteKings = 0
|
||||
self.green = (144, 184, 59)
|
||||
self._createBoard()
|
||||
self.createBoard()
|
||||
|
||||
def _drawSquares(self, win):
|
||||
def drawSquares(self, win):
|
||||
win.fill(BLACK)
|
||||
for row in range(ROWS):
|
||||
for col in range(row % 2, ROWS, 2):
|
||||
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
|
||||
pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
|
||||
|
||||
def _createBoard(self):
|
||||
def createBoard(self):
|
||||
for row in range(ROWS):
|
||||
self.board.append([])
|
||||
for col in range(COLS):
|
||||
if col % 2 == ((row + 1) % 2):
|
||||
if row < 3:
|
||||
self.board[row].append(Piece(row, col, WHITE))
|
||||
continue
|
||||
|
||||
if row > 4:
|
||||
elif row > 4:
|
||||
self.board[row].append(Piece(row, col, GREEN))
|
||||
continue
|
||||
|
||||
self.board[row].append(0)
|
||||
continue
|
||||
|
||||
self.board[row].append(0)
|
||||
else:
|
||||
self.board[row].append(None)
|
||||
else:
|
||||
self.board[row].append(None)
|
||||
|
||||
def draw(self, win):
|
||||
self._drawSquares(win)
|
||||
self.drawSquares(win)
|
||||
for row in range(ROWS):
|
||||
for col in range(COLS):
|
||||
piece = self.board[row][col]
|
||||
if piece != 0:
|
||||
if piece is not None:
|
||||
piece.draw(win)
|
||||
|
||||
def move(self, piece, row, col):
|
||||
@ -50,65 +45,19 @@ class Board:
|
||||
|
||||
if row == ROWS - 1 or row == 0:
|
||||
piece.makeKing()
|
||||
|
||||
if piece.colour == WHITE:
|
||||
self.whiteKings += 1
|
||||
|
||||
if piece.colour == GREEN:
|
||||
self.greenKings += 1
|
||||
if piece.colour == WHITE:
|
||||
self.whiteKings += 1
|
||||
else:
|
||||
self.greenKings += 1
|
||||
|
||||
def remove(self, skipped):
|
||||
for piece in skipped:
|
||||
self.board[piece.row][piece.col] = 0
|
||||
if piece != 0:
|
||||
self.board[piece.row][piece.col] = None
|
||||
if piece is not None:
|
||||
if piece.colour == GREEN:
|
||||
self.greenLeft -= 1
|
||||
continue
|
||||
self.whiteLeft -= 1
|
||||
|
||||
def getAllMoves(self, colour):
|
||||
moves = []
|
||||
possibleMoves = []
|
||||
possiblePieces = []
|
||||
pieces = self.getAllPieces(colour)
|
||||
hasForcedCapture = False
|
||||
|
||||
for piece in pieces:
|
||||
validMoves = self.getValidMoves(piece)
|
||||
|
||||
# Check if there are forced capture moves for this piece
|
||||
forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
|
||||
if forcedCaptureMoves:
|
||||
hasForcedCapture = True
|
||||
possiblePieces.append(piece)
|
||||
possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
|
||||
|
||||
if hasForcedCapture:
|
||||
# If there are forced capture moves, consider only those
|
||||
for i in range(len(possibleMoves)):
|
||||
for move, skip in possibleMoves[i].items():
|
||||
tempBoard = deepcopy(self)
|
||||
tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
|
||||
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
|
||||
moves.append(newBoard)
|
||||
else:
|
||||
# If no forced capture moves, consider all valid moves
|
||||
for piece in pieces:
|
||||
validMoves = self.getValidMoves(piece)
|
||||
for move, skip in validMoves.items():
|
||||
tempBoard = deepcopy(self)
|
||||
tempPiece = tempBoard.getPiece(piece.row, piece.col)
|
||||
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
|
||||
moves.append(newBoard)
|
||||
|
||||
return moves
|
||||
|
||||
def _simulateMove(self, piece, move, board, skip):
|
||||
board.move(piece, move[0], move[1])
|
||||
if skip:
|
||||
board.remove(skip)
|
||||
|
||||
return board
|
||||
else:
|
||||
self.whiteLeft -= 1
|
||||
|
||||
def getPiece(self, row, col):
|
||||
return self.board[row][col]
|
||||
@ -116,8 +65,7 @@ class Board:
|
||||
def winner(self):
|
||||
if self.greenLeft <= 0:
|
||||
return WHITE
|
||||
|
||||
if self.whiteLeft <= 0:
|
||||
elif self.whiteLeft <= 0:
|
||||
return GREEN
|
||||
|
||||
return None
|
||||
@ -128,10 +76,16 @@ class Board:
|
||||
left = piece.col - 1
|
||||
right = piece.col + 1
|
||||
row = piece.row
|
||||
if piece.colour == GREEN or piece.king:
|
||||
if piece.colour == GREEN:
|
||||
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
|
||||
if piece.colour == WHITE:
|
||||
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
|
||||
|
||||
if piece.king:
|
||||
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
|
||||
if piece.colour == WHITE or piece.king:
|
||||
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
|
||||
|
||||
@ -159,7 +113,6 @@ class Board:
|
||||
forcedCapture = forced
|
||||
else:
|
||||
forcedCapture = forced
|
||||
|
||||
return forcedCapture
|
||||
|
||||
def scoreOfTheBoard(self):
|
||||
@ -169,7 +122,7 @@ class Board:
|
||||
pieces = []
|
||||
for row in self.board:
|
||||
for piece in row:
|
||||
if piece != 0 and piece.colour == colour:
|
||||
if piece is not None and piece.colour == colour:
|
||||
pieces.append(piece)
|
||||
return pieces
|
||||
|
||||
@ -209,7 +162,7 @@ class Board:
|
||||
|
||||
def _traverse(self, row, col, skipped, moves, step, last, colour):
|
||||
current = self.board[row][col]
|
||||
if current == 0:
|
||||
if current is None:
|
||||
if skipped and not last:
|
||||
return None
|
||||
elif skipped:
|
||||
@ -230,56 +183,3 @@ class Board:
|
||||
else:
|
||||
last = [current]
|
||||
return last
|
||||
|
||||
def step(self, move, colour):
|
||||
start, end = self._decode(move)
|
||||
start[0] = start[0] - 1
|
||||
start[1] = start[1] - 1
|
||||
end[0] = end[0] - 1
|
||||
end[1] = end[1] - 1
|
||||
reward = 0
|
||||
done = False
|
||||
piece = self.getPiece(start[0], start[1])
|
||||
if piece == 0:
|
||||
newStart = end
|
||||
end = start
|
||||
start = newStart
|
||||
piece = self.getPiece(start[0], start[1])
|
||||
moves = self.getValidMoves(piece)
|
||||
for move, skip in moves.items():
|
||||
if tuple(end) == move:
|
||||
self._simulateMove(piece, move, self, skip)
|
||||
if len(skip) == 1:
|
||||
reward = 2
|
||||
break
|
||||
if len(skip) > 1:
|
||||
reward = 3 + len(skip) * 0.2
|
||||
break
|
||||
reward = -0.5
|
||||
break
|
||||
|
||||
if self.winner() == colour:
|
||||
done = True
|
||||
reward = 10
|
||||
return reward, self, done
|
||||
|
||||
|
||||
def _decode(self, move):
|
||||
# Split digits back out
|
||||
str_code = str(move)
|
||||
# print(str_code)
|
||||
start_row = int(str_code[0])
|
||||
start_col = int(str_code[1])
|
||||
end_row = int(str_code[2])
|
||||
end_col = int(str_code[3])
|
||||
# Reconstruct positions
|
||||
start = [start_row, start_col]
|
||||
end = [end_row, end_col]
|
||||
return start, end
|
||||
|
||||
# def reset(self):
|
||||
# self.board = []
|
||||
# self.whiteLeft = self.greenLeft = 12
|
||||
# self.whiteKings = self.greenKings = 0
|
||||
# self._createBoard()
|
||||
# return self.board
|
@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS
|
||||
|
||||
# RGB color
|
||||
|
||||
GREEN = 1
|
||||
WHITE = 2
|
||||
GREEN = (144, 184, 59)
|
||||
WHITE = (255, 255, 255)
|
||||
BLACK = (0, 0, 0)
|
||||
BLUE = (0, 0, 255)
|
||||
GREY = (128, 128, 128)
|
||||
|
@ -1,8 +1,7 @@
|
||||
import pygame
|
||||
from utilities.Board import Board
|
||||
from utilities.board import Board
|
||||
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
|
||||
|
||||
|
||||
class GameManager:
|
||||
def __init__(self, win, colour):
|
||||
self._init(colour)
|
||||
@ -30,14 +29,14 @@ class GameManager:
|
||||
self.selected = None
|
||||
self.select(row, col)
|
||||
piece = self.board.getPiece(row, col)
|
||||
if piece != 0 and piece.colour == self.turn:
|
||||
if piece is not None and piece.colour == self.turn:
|
||||
self.selected = piece
|
||||
self.validMoves = self.board.getValidMoves(piece)
|
||||
return True
|
||||
|
||||
def _move(self, row, col):
|
||||
piece = self.board.getPiece(row, col)
|
||||
if self.selected and piece == 0 and (row, col) in self.validMoves:
|
||||
if self.selected and piece is None and (row, col) in self.validMoves:
|
||||
self.board.move(self.selected, row, col)
|
||||
skipped = self.validMoves[row, col]
|
||||
if self.validMoves[list(self.validMoves.keys())[0]]:
|
||||
@ -59,8 +58,8 @@ class GameManager:
|
||||
self.validMoves = {}
|
||||
if self.turn == GREEN:
|
||||
self.turn = WHITE
|
||||
return
|
||||
self.turn = GREEN
|
||||
else:
|
||||
self.turn = GREEN
|
||||
|
||||
def drawValidMoves(self, moves):
|
||||
for row, col in moves:
|
||||
|
@ -1,6 +1,6 @@
|
||||
import pygame.draw
|
||||
|
||||
from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
|
||||
from utilities.constants import SQUARE_SIZE, GREY, CROWN
|
||||
|
||||
|
||||
class Piece:
|
||||
@ -14,8 +14,6 @@ class Piece:
|
||||
self.calcPosition()
|
||||
self.padding = 20
|
||||
self.border = 2
|
||||
self.green = (144, 184, 59)
|
||||
self.white = (255, 255, 255)
|
||||
|
||||
def calcPosition(self):
|
||||
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
|
||||
@ -27,7 +25,7 @@ class Piece:
|
||||
def draw(self, win):
|
||||
radius = SQUARE_SIZE // 2 - self.padding
|
||||
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
|
||||
pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
|
||||
pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
|
||||
if self.king:
|
||||
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user