Merge pull request 'reinforcement-learning' (#1) from reinforcement-learning into master

Reviewed-on: #1
This commit is contained in:
Rohit Pai 2023-09-28 23:59:04 +01:00
commit 7e9b0a475e
24 changed files with 2514 additions and 401 deletions

View File

@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.11 (draughts)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="$USER_HOME$/anaconda3" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (draughts)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/anaconda3" project-jdk-type="Python SDK" />
</project>

6
.idea/other.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

Binary file not shown.

100
changeInRewards-3.txt Normal file
View File

@ -0,0 +1,100 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

100
changeInRewards-5.txt Normal file
View File

@ -0,0 +1,100 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

289
main.py
View File

@ -1,7 +1,10 @@
import sys
import pygame
import numpy as np
from matplotlib import pyplot as plt
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
from utilities.constants import WIDTH, HEIGHT, SQUARE_SIZE, WHITE, GREEN
from utilities.gameManager import GameManager
from minimax.minimaxAlgo import MiniMax
@ -11,21 +14,45 @@ WIN = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Draughts")
def getRowColFromMouse(pos):
def getRowColFromMouse(pos: dict) -> tuple:
"""
Gets the row and column from the mouse position
:param pos: X and Y position of the mouse
:return: Row and column
"""
x, y = pos
row = y // SQUARE_SIZE
col = x // SQUARE_SIZE
return row, col
def drawText(text, font, color, surface, x, y):
textobj = font.render(text, 1, color)
def drawText(text: str, font: pygame.font.SysFont, colour: tuple, surface: pygame.display, x: float, y: int) -> None:
"""
Draws text on the screen
:param text: Text to draw
:param font: System font
:param colour: Colour of the text
:param surface: The display surface
:param x: X position of the text
:param y: Y position of the text
:return None
"""
textobj = font.render(text, 1, colour)
textrect = textobj.get_rect()
textrect.topleft = (x, y)
surface.blit(textobj, textrect)
def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
def drawMultiLineText(surface: pygame.display, text: str, pos: dict, font: pygame.font.SysFont, colour: tuple = pygame.Color('black')) -> None:
"""
Draws multiline text on the screen
:param surface: the display surface
:param text: text to draw
:param pos: X and Y position of the text
:param font: System font
:param colour: colour of the text
:return None
"""
words = [word.split(' ') for word in text.splitlines()] # 2D array where each row is a list of words.
space = font.size(' ')[0] # The width of a space.
max_width, max_height = surface.get_size()
@ -33,7 +60,7 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
word_height = None
for line in words:
for word in line:
word_surface = font.render(word, 0, color)
word_surface = font.render(word, 0, colour)
word_width, word_height = word_surface.get_size()
if x + word_width >= max_width:
x = pos[0] # Reset the x.
@ -44,80 +71,89 @@ def drawMultiLineText(surface, text, pos, font, color=pygame.Color('black')):
y += word_height # Start on new row.
def main():
def main(difficulty: int = 0) -> None:
"""
Main function, that shows the menu before running the game
:param difficulty: difficulty of minimax
:return: None
"""
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
font = pygame.font.SysFont(None, 25)
difficulty = 0
font = pygame.font.SysFont("", 25)
while True:
# menu
screen.fill((128, 128, 128))
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
if difficulty == 0:
while True:
# menu
screen.fill((128, 128, 128))
drawText('Main Menu', font, (255, 255, 255), screen, width / 2, 20)
mx, my = pygame.mouse.get_pos()
mx, my = pygame.mouse.get_pos()
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
pygame.draw.rect(screen, (0, 255, 0), easy)
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
pygame.draw.rect(screen, (255, 125, 0), medium)
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
pygame.draw.rect(screen, (255, 0, 0), hard)
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
pygame.draw.rect(screen, (0, 0, 255), rules)
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
pygame.draw.rect(screen, (0, 0, 0), quitGame)
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
easy = pygame.Rect(width / 2 - 50, 100, 200, 50)
pygame.draw.rect(screen, (0, 255, 0), easy)
drawText("easy", font, (255, 255, 255), screen, width / 2, 100)
medium = pygame.Rect(width / 2 - 50, 200, 200, 50)
pygame.draw.rect(screen, (255, 125, 0), medium)
drawText("medium", font, (255, 255, 255), screen, width / 2, 200)
hard = pygame.Rect(width / 2 - 50, 300, 200, 50)
pygame.draw.rect(screen, (255, 0, 0), hard)
drawText("hard", font, (255, 255, 255), screen, width / 2, 300)
rules = pygame.Rect(width / 2 - 50, 400, 200, 50)
pygame.draw.rect(screen, (0, 0, 255), rules)
drawText("rules", font, (255, 255, 255), screen, width / 2, 400)
quitGame = pygame.Rect(width / 2 - 50, 500, 200, 50)
pygame.draw.rect(screen, (0, 0, 0), quitGame)
drawText("quit", font, (255, 255, 255), screen, width / 2, 500)
if easy.collidepoint((mx, my)):
if click:
difficulty = 1
break
if medium.collidepoint((mx, my)):
if click:
difficulty = 3
break
if hard.collidepoint((mx, my)):
if click:
difficulty = 5
break
if rules.collidepoint((mx, my)):
if click:
rulesGUI()
break
if quitGame.collidepoint((mx, my)):
if click:
pygame.quit()
sys.exit()
click = False
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.MOUSEBUTTONDOWN:
if event.button == 1:
click = True
if easy.collidepoint((mx, my)):
if click:
difficulty = 1
break
if medium.collidepoint((mx, my)):
if click:
difficulty = 3
break
if hard.collidepoint((mx, my)):
if click:
difficulty = 5
break
if rules.collidepoint((mx, my)):
if click:
rulesGUI()
break
if quitGame.collidepoint((mx, my)):
if click:
pygame.quit()
sys.exit()
click = False
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
if event.type == pygame.MOUSEBUTTONDOWN:
if event.button == 1:
click = True
pygame.display.update()
menuClock.tick(60)
if difficulty != 0:
game(difficulty)
pygame.display.update()
menuClock.tick(60)
game(difficulty)
def rulesGUI():
def rulesGUI() -> None:
"""
Shows the rules of the game
:return: None
"""
screen = pygame.display.set_mode((WIDTH, HEIGHT))
menuClock = pygame.time.Clock()
click = False
width = screen.get_width()
titleFont = pygame.font.SysFont(None, 48)
font = pygame.font.SysFont(None, 21)
titleFont = pygame.font.SysFont("", 48)
font = pygame.font.SysFont("", 21)
while True:
screen.fill((128, 128, 128))
drawText("Rules", titleFont, (255, 255, 255), screen, width / 2, 20)
@ -171,43 +207,116 @@ multi-jump until the next move.""", (50, 50), font)
menuClock.tick(60)
def game(difficulty):
def game(difficulty: int) -> None:
"""
Runs the game with the given difficulty. Used for training and testing the RL algorithm
:param difficulty: The difficulty of the minimax algorithm
"""
run = True
clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN)
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
# model = rl.buildMainModel()
rl.model.load_weights("./modelWeights/model_final.h5")
mm = MiniMax()
totalReward = []
winners = []
for i in range(50):
score = 0
for j in range(200):
print(j)
clock.tick(FPS)
reward = 0
if gameManager.turn == WHITE:
# mm = MiniMax()
# value, newBoard = mm.AI(difficulty, WHITE, gameManager)
# gameManager.aiMove(newBoard)
# reward, newBoard = rl.AITrain(gameManager.board)
newBoard = rl.AITest(gameManager.board)
while run:
clock.tick(FPS)
if newBoard is None:
print("Cannot make move")
continue
gameManager.aiMove(newBoard)
if gameManager.turn == WHITE:
mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, WHITE, gameManager)
gameManager.aiMove(newBoard)
# time.sleep(0.15)
gameManager.update()
pygame.display.update()
if gameManager.turn == GREEN:
mm = MiniMax()
value, newBoard = mm.AI(gameManager.getBoard(), difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
# time.sleep(0.15)
if gameManager.turn == GREEN:
value, newBoard = mm.AI(difficulty, GREEN, gameManager)
gameManager.aiMove(newBoard)
if gameManager.winner() != None:
print(gameManager.winner())
run = False
score += reward
for event in pygame.event.get():
if event.type == pygame.QUIT:
run = False
if event.type == pygame.MOUSEBUTTONDOWN:
pos = pygame.mouse.get_pos()
row, col = getRowColFromMouse(pos)
# if gameManager.turn == GREEN:
gameManager.select(row, col)
if gameManager.winner() is not None:
print("Green" if gameManager.winner() == GREEN else "White", " wins")
# with open(f"winners-{difficulty}.txt", "a+") as f:
# f.write(str(gameManager.winner()) + "\n")
winners.append(gameManager.winner())
break
gameManager.update()
pygame.display.update()
# for event in pygame.event.get():
# if event.type == pygame.QUIT:
# break
# if event.type == pygame.MOUSEBUTTONDOWN:
# pos = pygame.mouse.get_pos()
# row, col = getRowColFromMouse(pos)
# # if gameManager.turn == GREEN:
# gameManager.select(row, col)
gameManager.update()
pygame.display.update()
if gameManager.winner() is None:
# with open(f"winners-{difficulty}.txt", "a+") as f:
# f.write(str(0) + "\n")
winners.append(0)
gameManager.reset()
rl.resetScore()
print("Game: ", i, " Reward: ", score)
# with open(f"rewards-{difficulty}.txt", "a+") as f:
# f.write(str(score) + "\n")
totalReward.append(score)
# save model weights every 25 games
# if i % 250 == 0 and i != 0:
# rl.model.save("./modelWeights/model_" + str(i) + ".h5")
# pygame.quit()
# rl.model.save("./modelWeights/model_final.h5")
change_in_rewards = [0] # Initialize with 0 for the first episode
for i in range(1, len(totalReward)):
change_in_reward = totalReward[i] - totalReward[i - 1]
change_in_rewards.append(change_in_reward)
main()
# with open(f"changeInRewards-{difficulty}.txt", "a+") as f:
# for i in change_in_rewards:
# f.write(str(i) + "\n")
# episodes = list(range(1, len(totalReward) + 1))
#
# plt.plot(episodes, change_in_rewards)
# plt.xlabel('Training Games')
# plt.ylabel('Change in Game Reward')
# plt.title('Change in Game Reward vs. Training Games')
# plt.grid(True)
# plt.show()
#
# plt.plot([i for i in range(len(totalReward))], totalReward)
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.show()
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
ax.set_title(f"Winners for difficulty — {difficulty}")
ax.bar_label(bar)
plt.show()
# difficulties = [3, 5, 7, 9]
#
# for diff in difficulties:
# main(diff)
main(3)

View File

@ -1,56 +1,46 @@
import random
from copy import deepcopy
from math import inf
from utilities.constants import GREEN, WHITE
from utilities.gameManager import GameManager
class MiniMax():
class MiniMax:
def AI(self, board, depth, maxPlayer, gameManager):
if depth == 0 or board.winner() is not None:
return board.scoreOfTheBoard(), board
def AI(self, depth: int, maxPlayer: int, gameManager: GameManager) -> tuple:
"""
The minimax algorithm
:param depth: How deep the algorithm should go
:param maxPlayer: The current player
:param gameManager: The game manager
:return: the best evaluation and board
"""
if depth == 0 or gameManager.board.winner() is not None:
return gameManager.board.scoreOfTheBoard(), gameManager.board
if maxPlayer:
if type(maxPlayer) == int:
maxEval = -inf
bestMove = None
for move in self.getAllMoves(board, maxPlayer):
evaluation = self.AI(move, depth - 1, False, gameManager)[0]
for move in gameManager.board.getAllMoves(maxPlayer):
evaluation = self.AI(depth - 1, False, gameManager)[0]
maxEval = max(maxEval, evaluation)
if maxEval > evaluation:
bestMove = move
if maxEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return maxEval, bestMove
else:
minEval = inf
bestMove = None
colour = WHITE if gameManager.turn == GREEN else GREEN
for move in self.getAllMoves(board, colour):
evaluation = self.AI(move, depth - 1, True, gameManager)[0]
for move in gameManager.board.getAllMoves(colour):
evaluation = self.AI(depth - 1, True, gameManager)[0]
minEval = min(minEval, evaluation)
if minEval < evaluation:
bestMove = move
if minEval == evaluation:
# bestMove = move
bestMove = bestMove if random.choice([True, False]) else move
return minEval, bestMove
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getAllMoves(self, board, colour):
moves = []
for piece in board.getAllPieces(colour):
validMoves = board.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(board)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves

BIN
modelWeights/model_250.h5 Normal file

Binary file not shown.

BIN
modelWeights/model_final.h5 Normal file

Binary file not shown.

View File

@ -1,96 +1,280 @@
import random
from collections import deque
from typing import Any
from copy import deepcopy
import numpy as np
import tensorflow as tf
from tensorflow.python.keras import Sequential, regularizers
from tensorflow.python.keras.layers import Dense
from keras.engine.input_layer import InputLayer
from keras.layers import BatchNormalization
from tensorflow.python.keras import Sequential, regularizers, Input
from tensorflow.python.keras.layers import Dense, Lambda, Dropout
from tensorflow.python.keras.optimizer_v2.adam import Adam
from minimax.minimaxAlgo import MiniMax
from utilities import Board
from utilities.constants import WHITE, GREEN
from utilities.gameManager import GameManager
class ReinforcementLearning():
def __init__(self, action_space, state_space, env):
self.action_space = action_space
self.state_space = state_space
self.env = env
def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
"""
Constructor for the ReinforcementLearning class
:param actionSpace: The number of possible actions
:param board: The game board
"""
self.gameManager = gameManager
self.actionSpace = actionSpace
self.board = board
self.state = self.board.board
self.colour = colour
self.score = 0
self.epsilon = 1
self.gamma = .95
self.batch_size = 64
self.epsilon_min = .01
self.epsilon_decay = .995
self.learning_rate = 0.001
self.memory = deque(maxlen=100000)
self.model = self._buildModel()
self.batchSize = 512
self.maxSize = 32
self.epsilonMin = .01
self.epsilonDecay = .995
self.learningRate = 0.0001
self.memory = deque(maxlen=10000000)
self.model = self.buildMainModel()
print(self.model.summary())
def AI(self, episode):
loss = []
def AITrain(self, board: Board) -> tuple:
"""
Learns to play the draughts game
:return: The loss
"""
self.board = board
self.state = self._convertState(self.board.board)
self.actionSpace = self.encodeMoves(self.colour, self.board)
if len(self.actionSpace) == 0:
return self.score, None
max_steps = 1000
action = self._act()
reward, nextState, done = self.board.step(action, self.colour)
self.score += reward
self.state = self._convertState(nextState.board)
self._remember(deepcopy(self.board), action, reward, self.state, done)
self._replay()
for e in range(episode):
state = self.env.reset()
state = np.reshape(state, (1, self.state_space))
score = 0
for i in range(max_steps):
action = self.act(state)
reward, next_state, done = self.env.step(action)
score += reward
next_state = np.reshape(next_state, (1, self.state_space))
self.remember(state, action, reward, next_state, done)
state = next_state
self.replay()
if done:
print("episode: {}/{}, score: {}".format(e, episode, score))
break
loss.append(score)
return self.score, nextState
def _buildModel(self):
def AITest(self, board: Board) -> Board:
"""
Runs the AI
:param board: The board
:return: The new board
"""
actionSpace = self.encodeMoves(WHITE, board)
if len(actionSpace) == 0:
print("Cannot make move")
return None
totalMoves = len(actionSpace)
# moves = np.squeeze(moves)
moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
act_values = self.model.predict(self.normalise(moves))
val = np.argmax(act_values[0])
val = val if val < totalMoves else totalMoves - 1
reward, newBoard, done = board.step(actionSpace[val], WHITE)
return newBoard
def buildMainModel(self) -> Sequential:
"""
Build the model for the AI
:return: The model
"""
# Board model
board_model = Sequential()
modelLayers = [
Lambda(lambda x: tf.reshape(x, [-1, 32])),
Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
]
boardModel = Sequential(modelLayers)
# input dimensions is 32 board position values
board_model.add(Dense(64, activation='relu', input_dim=32))
# boardModel.add(BatchNormalization())
boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
boardModel.build(input_shape=(None, None))
# use regularizers, to prevent fitting noisy labels
board_model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
board_model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 16
board_model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) # 8
return boardModel
# output isn't squashed, because it might lose information
board_model.add(Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01)))
board_model.compile(optimizer='nadam', loss='binary_crossentropy')
return board_model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def replay(self):
if len(self.memory) < self.batch_size:
def _replay(self) -> None:
"""
trains the model
:return: None
"""
if len(self.memory) < self.batchSize:
# Not enough data to replay and test the model
return
minibatch = random.sample(self.memory, self.batch_size)
states = np.array([i[0] for i in minibatch])
actions = np.array([i[1] for i in minibatch])
rewards = np.array([i[2] for i in minibatch])
next_states = np.array([i[3] for i in minibatch])
dones = np.array([i[4] for i in minibatch])
# Get a random sample from the memory
minibatch = random.sample(self.memory, int(self.maxSize))
states = np.squeeze(states)
next_states = np.squeeze(next_states)
# Extract states, rewards, dones
states = [m[0] for m in minibatch]
rewards = [m[2] for m in minibatch]
dones = [m[4] for m in minibatch]
targets = rewards + self.gamma * (np.amax(self.model.predict_on_batch(next_states), axis=1)) * (1 - dones)
targets_full = self.model.predict_on_batch(states)
# Encoded moves
encodedMoves = []
for state in states:
encodedMoves.append(self.encodeMoves(self.colour, state))
ind = np.array([i for i in range(self.batch_size)])
targets_full[[ind], [actions]] = targets
# Calculate targets
targets = []
for i, moves in enumerate(encodedMoves):
if dones[i]:
target = rewards[i]
else:
target = rewards[i] + self.gamma * self._maxNextQ()
self.model.fit(states, targets_full, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
targets.append(target)
def act(self, state):
encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
for m in encodedMoves])
targets = np.array(targets)
self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
if self.epsilon > self.epsilonMin:
self.epsilon *= self.epsilonDecay
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
"""
Remembers what it has learnt
:param state: The current state
:param action: The action taken
:param reward: The reward for the action
:param nextState: The next state
:param done: Whether the game is finished
:return: None
"""
self.memory.append((state, action, reward, nextState, done))
def _act(self) -> Any:
"""
Chooses an action based on the available moves
:return: The action
"""
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_space)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
# choose a random action from the action spaces list
mm = MiniMax()
value, newBoard = mm.AI(3, self.colour, self.gameManager)
if newBoard is None:
return random.choice(self.actionSpace)
where = self._boardDiff(self.board, newBoard)
return self._encode(where[0]+1, where[1]+1)
if len(self.actionSpace) == 1:
return self.actionSpace[0]
encodedMoves = np.squeeze(self.actionSpace)
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
actValues = self.model.predict(self.normalise(encodedMoves))
val = np.argmax(actValues[0])
val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
return self.actionSpace[val]
def resetScore(self) -> None:
"""
Resets the score
:return: None
"""
self.score = 0
def _convertState(self, board: list) -> list:
"""
Converts the board into a 2D list of numbers
:param board: 2D list of pieces
:return: new 2D list of numbers
"""
num_board = []
for row in board:
num_row = []
for piece in row:
if piece == 0:
num_row.append(0)
continue
if piece.colour == 1:
num_row.append(1)
continue
num_row.append(2)
num_board.append(num_row)
return num_board
def _encode(self, start: tuple, end: tuple) -> int:
"""
Encodes the move into an integer
:param start: Tuple of start position
:param end: Tuple of end position
:return: Encoded move
"""
start_row = start[0]
start_col = end[0]
end_row = start[-1]
end_col = end[-1]
# Concatenate into integer
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
def _maxNextQ(self) -> float:
"""
Calculates the max Q value for the next state
:return: the max Q value
"""
colour = WHITE if self.colour == GREEN else GREEN
encodedMoves = self.encodeMoves(colour, self.board)
if len(encodedMoves) == 0:
return -1
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
return np.max(nextQValues)
def encodeMoves(self, colour: int, board: Board) -> list:
"""
Encodes the moves into a list encoded moves
:param colour: Colour of the player
:param board: The board
:return: list Of encoded moves
"""
encodedMoves = []
moves = board.getAllMoves(colour)
for move in moves:
where = self._boardDiff(board, move)
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
return encodedMoves
def _boardDiff(self, board: Board, move: Board) -> np.array:
"""
Finds the difference between the two boards
:param board: The current board
:param move: The new board
:return: the difference between the two boards
"""
cnvState = np.array(self._convertState(board.board))
cnvMove = np.array(self._convertState(move.board))
diff = np.subtract(cnvMove, cnvState)
diff = np.nonzero(diff)
return diff
def normalise(self, data: np.array) -> np.array:
"""
Normalise the data
:param data: the data to normalise
:return: normalised data
"""
return data / 10000

80
results.py Normal file
View File

@ -0,0 +1,80 @@
import matplotlib.pyplot as plt
import numpy as np
from utilities.constants import GREEN, WHITE
# winners = []
with open("winners-5.txt", "r") as f:
winners = f.readlines()
winners = [int(x.strip()) for x in winners]
# lavg = []
# for i in range(0, len(winners), 25):
# lavg.append(winners[i:i+25].count(2) / 25)
#
# x = np.arange(0, len(lavg))
# y = np.array(lavg) * 100
#
# a, b = np.polyfit(x, y, 1)
#
# fig, ax = plt.subplots(figsize=(10, 5))
# ax.plot(y)
# ax.set_xticks(np.arange(0, len(lavg), 2))
# ax.minorticks_on()
# ax.plot(x, a*x+b, color='red', linestyle='--', linewidth=2)
# ax.set_ylim([0, 100])
# ax.set_title("Winners Average")
# ax.grid(which='major', linestyle='-', linewidth='0.5', color='black')
# ax.grid(which='minor', linestyle=':', linewidth='0.5')
# ax.set_xlabel("Average Set")
# ax.set_ylabel("Percentage of Wins")
# ax.tick_params(which="minor", bottom=False, left=False)
# plt.show()
fig, ax = plt.subplots()
bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 100])
ax.set_title("Winners at Depth 5")
ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey', axis='y')
ax.bar_label(bar)
plt.show()
# with open("trainedRewards.txt", "r") as f:
# totalReward = f.readlines()
#
# totalReward = [float(x.strip()) for x in totalReward]
# filteredReward = list(filter(lambda x: x > -1500, totalReward))
# change_in_rewards = [0] # Initialize with 0 for the first episode
# for i in range(1, len(totalReward)):
# change_in_reward = totalReward[i] - totalReward[i - 1]
# change_in_rewards.append(change_in_reward)
#
# games = list(range(1, len(totalReward) + 1))
# plt.plot(games, change_in_rewards)
# plt.xlabel('Training Games')
# plt.ylabel('Change in Game Reward')
# plt.title('Change in Game Reward vs. Training Games')
# plt.grid(True)
# plt.show()
# major_ticks = np.arange(0, 101, 20)
# minor_ticks = np.arange(0, 101, 5)
#
# plt.plot([i for i in range(len(totalReward))], totalReward)
# plt.title("Rewards to Games")
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.xticks(major_ticks)
# plt.xticks(minor_ticks, minor=True)
# plt.yticks(major_ticks)
# plt.yticks(minor_ticks, minor=True)
# plt.grid(which='both')
# plt.show()
#
# plt.plot([i for i in range(len(filteredReward))], filteredReward)
# plt.title("Filtered Rewards to Games")
# plt.xlabel("Games")
# plt.ylabel("Reward")
# plt.grid(which='both')
# plt.show()

56
rewards-5.txt Normal file
View File

@ -0,0 +1,56 @@
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

BIN
rewardsA.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

2
run.sh Executable file
View File

@ -0,0 +1,2 @@
conda activate
python main.py

500
trainedRewards.txt Normal file
View File

@ -0,0 +1,500 @@
180.5
115.19999999999999
-155.39999999999998
-5169.4000000000015
100.0
-3354.2999999999956
123.79999999999998
-1738.0
261.40000000000015
120.89999999999999
147.80000000000004
108.0
113.50000000000001
110.5000000000002
-1048.3000000000006
75.8
232.70000000000016
89.10000000000001
279.9000000000002
165.40000000000003
85.4
34.20000000000016
266.20000000000016
101.69999999999999
283.0
-264.5
225.0
328.0
215.5
150.0
-217.5
-2920.0
82.5
-208.5
150.5
196.5
223.0
265.5
-282.5
175.5
206.5
221.5
127.5
-6337.5
147.5
231.5
137.5
-180.5
108.0
-339.5
190.0
-69.0
52.5
58.0
-5575.0
-159.5
197.5
177.5
-5547.5
-65.5
136.5
292.5
-169.5
185.0
115.5
198.0
30.0
162.5
95.5
170.0
113.0
-1405.0
-27.0
-4832.199999999999
147.5
228.0
59.0
262.5
-220.0
150.5
177.5
140.0
123.0
119.0
137.5
134.0
175.5
-5598.5
46.5
135.0
205.0
186.5
177.5
120.1
332.5
162.5
122.5
262.5
-70.0
159.0
138.0
240.5
215.0
147.5
-118.0
260.5
199.0
130.0
265.0
142.5
230.0
135.0
197.5
-179.5
198.0
288.0
200.5
-222.5
165.5
139.0
228.0
211.5
197.5
102.5
233.0
95.5
-129.0
187.5
158.0
295.0
240.5
-222.5
-1841.5
198.0
113.0
305.0
-482.5
125.5
215.0
110.0
-180.0
170.0
-62.5
215.5
132.5
187.5
135.0
-65.0
138.0
-1972.0
240.5
-237.5
610.0
267.5
52.5
-211.5
217.5
88.0
305.5
165.5
115.0
182.5
-69.5
333.0
363.0
112.5
-15.5
150.5
118.0
-52.5
318.0
174.0
198.0
-5705.0
160.5
155.0
125.0
165.0
259.0
165.5
155.0
-236.0
220.5
-15.5
117.5
367.5
237.5
255.0
85.0
-5342.5
141.5
-3582.5
-600.0
915.5
179.0
190.0
-47.5
275.5
-5.0
195.0
128.0
146.5
750.5
153.0
-5157.5
-279.5
219.0
154.0
153.0
-234.5
248.0
182.5
122.5
155.5
1078.0
102.5
358.0
152.5
261.5
239.0
128.0
111.5
93.0
310.5
-87.0
158.0
113.0
165.5
120.0
256.5
90.5
245.0
159.0
160.0
-5272.0
-88.5
159.0
169.0
147.5
-1149.5
-372.0
-270.0
95.0
142.5
212.5
154.0
425.0
153.0
213.0
280.5
-80.5
-45.90000000000003
-2250.5
123.50000000000003
149.40000000000006
219.0
108.0
180.0
271.19999999999993
202.5
121.8000000000001
47.599999999999966
-35.0
281.5
307.5
99.80000000000001
154.0
166.30000000000004
271.5
205.5
145.5
265.0
113.0
144.0
88.0
-204.5
204.0
215.0
177.5
168.0
263.0
66.5
258.0
-5477.5
94.5
-139.0
190.5
160.0
-35.5
149.0
100.5
130.0
-40.0
175.0
132.5
107.5
143.0
-5097.5
97.5
-1880.0
-15.0
213.0
-601.0
282.5
276.5
113.0
106.5
-1011.5
128.0
150.0
145.5
233.0
209.0
136.5
240.0
7.5
-1535.0
238.0
185.0
157.5
-1660.0
-15.5
-145.0
178.0
-4997.5
182.5
197.5
355.5
130.0
232.5
-5420.0
190.0
128.0
115.0
2.5
149.0
220.0
-87.0
-447.5
-4122.5
-67.5
-425.0
283.0
925.0
49.5
-15.0
233.0
215.5
234.0
154.0
141.5
226.5
220.0
110.5
270.0
253.0
-1944.0
215.0
250.5
155.0
260.5
185.0
261.5
232.5
177.5