diff --git a/.idea/draughts.iml b/.idea/draughts.iml
index 3c96bee..3858d78 100644
--- a/.idea/draughts.iml
+++ b/.idea/draughts.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/Report.pdf b/Report.pdf
deleted file mode 100644
index 3f309d9..0000000
Binary files a/Report.pdf and /dev/null differ
diff --git a/main.py b/main.py
index 40c2f98..a793849 100644
--- a/main.py
+++ b/main.py
@@ -1,6 +1,7 @@
import sys
import pygame
+import numpy as np
from matplotlib import pyplot as plt
from reinforcementLearning.ReinforcementLearning import ReinforcementLearning
@@ -178,23 +179,38 @@ def game(difficulty):
clock = pygame.time.Clock()
gameManager = GameManager(WIN, GREEN)
rl = ReinforcementLearning(gameManager.board.getAllMoves(WHITE), gameManager.board, WHITE, gameManager)
+ model = rl.buildMainModel()
+ model.load_weights("./modelWeights/model_final.h5")
mm = MiniMax()
totalReward = []
- for i in range(2000):
+ winners = []
+ for i in range(100):
score = 0
for j in range(200):
+ print(j)
clock.tick(FPS)
reward = 0
if gameManager.turn == WHITE:
- mm = MiniMax()
- value, newBoard = mm.AI(difficulty, WHITE, gameManager)
+ # mm = MiniMax()
+ # value, newBoard = mm.AI(difficulty, WHITE, gameManager)
# gameManager.aiMove(newBoard)
# reward, newBoard = rl.AI(gameManager.board)
- if newBoard is None:
+ actionSpace = rl.encodeMoves(WHITE, gameManager.board)
+ if len(actionSpace) == 0:
print("Cannot make move")
continue
+ totalMoves = len(actionSpace)
+ # moves = np.squeeze(moves)
+ moves = np.pad(actionSpace, (0, rl.maxSize - totalMoves), 'constant', constant_values=(1, 1))
+ act_values = model.predict(rl.normalise(moves))
+ val = np.argmax(act_values[0])
+ val = val if val < totalMoves else totalMoves - 1
+ reward, newBoard, done = gameManager.board.step(actionSpace[val], WHITE)
+
+ # if newBoard is None:
+ # print("Cannot make move")
+ # continue
gameManager.aiMove(newBoard)
- #
gameManager.update()
pygame.display.update()
@@ -206,7 +222,10 @@ def game(difficulty):
score += reward
if gameManager.winner() is not None:
- print(gameManager.winner())
+ print("Green" if gameManager.winner() == GREEN else "White", " wins")
+ with open("winners.txt", "a+") as f:
+ f.write(str(gameManager.winner()) + "\n")
+ winners.append(gameManager.winner())
break
# for event in pygame.event.get():
@@ -221,9 +240,16 @@ def game(difficulty):
gameManager.update()
pygame.display.update()
+ if gameManager.winner() is None:
+ with open("winners.txt", "a+") as f:
+ f.write(str(0) + "\n")
+ winners.append(0)
gameManager.reset()
rl.resetScore()
print("Game: ", i, " Reward: ", score)
+ with open("rewards.txt", "a+") as f:
+ f.write(str(score) + "\n")
+
totalReward.append(score)
# save model weights every 25 games
if i % 250 == 0 and i != 0:
@@ -237,5 +263,12 @@ def game(difficulty):
plt.ylabel("Reward")
plt.show()
+ fig, ax = plt.subplots()
+ bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+ ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
+ ax.set_title("Winners")
+ ax.bar_label(bar)
+ plt.show()
+
main(3)
diff --git a/reinforcementLearning/ReinforcementLearning.py b/reinforcementLearning/ReinforcementLearning.py
index 59c8661..1e8f57d 100644
--- a/reinforcementLearning/ReinforcementLearning.py
+++ b/reinforcementLearning/ReinforcementLearning.py
@@ -37,9 +37,10 @@ class ReinforcementLearning():
self.maxSize = 32
self.epsilonMin = .01
self.epsilonDecay = .995
- self.learningRate = 0.001
+ self.learningRate = 0.0001
self.memory = deque(maxlen=10000000)
- self.model = self._buildMainModel()
+ self.model = self.buildMainModel()
+ print(self.model.summary())
def AI(self, board: Board) -> tuple:
"""
@@ -48,7 +49,7 @@ class ReinforcementLearning():
"""
self.board = board
self.state = self._convertState(self.board.board)
- self.actionSpace = self._encodeMoves(self.colour, self.board)
+ self.actionSpace = self.encodeMoves(self.colour, self.board)
if len(self.actionSpace) == 0:
return self.score, None
@@ -61,7 +62,7 @@ class ReinforcementLearning():
return self.score, nextState
- def _buildMainModel(self) -> Sequential:
+ def buildMainModel(self) -> Sequential:
"""
Build the model for the AI
:return: the model
@@ -69,26 +70,24 @@ class ReinforcementLearning():
# Board model
modelLayers = [
Lambda(lambda x: tf.reshape(x, [-1, 32])),
- Dense(256, activation='relu'),
+ Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
- Dense(128, activation='relu'),
+ Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
- Dense(64, activation='relu'),
+ Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
+ Dropout(0.2),
+ Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
Dropout(0.2),
- Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
- Dropout(0.2),
- Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))
+ Dense(16, activation='linear', kernel_regularizer=regularizers.l2(0.01))
]
boardModel = Sequential(modelLayers)
# boardModel.add(BatchNormalization())
- boardModel.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error')
+ boardModel.compile(optimizer=Adam(learning_rate=self.learningRate), loss='mean_squared_error')
boardModel.build(input_shape=(None, None))
- print(boardModel.summary())
-
return boardModel
def _replay(self) -> None:
@@ -111,7 +110,7 @@ class ReinforcementLearning():
# Encoded moves
encodedMoves = []
for state in states:
- encodedMoves.append(self._encodeMoves(self.colour, state))
+ encodedMoves.append(self.encodeMoves(self.colour, state))
# Calculate targets
targets = []
@@ -126,7 +125,7 @@ class ReinforcementLearning():
encodedMoves = np.array([np.pad(m, (0, self.maxSize - len(m)), 'constant', constant_values=(1, 1))
for m in encodedMoves])
targets = np.array(targets)
- self.model.fit(self._normalise(encodedMoves), self._normalise(targets), epochs=20)
+ self.model.fit(self.normalise(encodedMoves), self.normalise(targets), epochs=20)
if self.epsilon > self.epsilonMin:
self.epsilon *= self.epsilonDecay
@@ -160,8 +159,10 @@ class ReinforcementLearning():
return self.actionSpace[0]
encodedMoves = np.squeeze(self.actionSpace)
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
- act_values = self.model.predict(self._normalise(encodedMoves))
- return self.actionSpace[np.argmax(act_values[0])]
+ act_values = self.model.predict(self.normalise(encodedMoves))
+ val = np.argmax(act_values[0])
+ val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
+ return self.actionSpace[val]
def resetScore(self):
self.score = 0
@@ -209,20 +210,14 @@ class ReinforcementLearning():
def _maxNextQ(self) -> float:
colour = WHITE if self.colour == GREEN else GREEN
- encodedMoves = self._encodeMoves(colour, self.board)
+ encodedMoves = self.encodeMoves(colour, self.board)
if len(encodedMoves) == 0:
return -1
paddedMoves = np.array(np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1)))
- # paddedMoves = np.reshape(paddedMoves, (32, 8, 8))
- # paddedMoves = paddedMoves / np.max(paddedMoved
- # paddedMoves = paddedMoves.reshape(32,)
- # pm = tf.convert_to_tensor(paddedMoves, dtype=tf.float32)
- # pm = tf.reshape(pm, [32])
- print(paddedMoves.shape)
- nextQValues = self.model.predict_on_batch(self._normalise(paddedMoves))
+ nextQValues = self.model.predict_on_batch(self.normalise(paddedMoves))
return np.max(nextQValues)
- def _encodeMoves(self, colour: int, board: Board) -> list:
+ def encodeMoves(self, colour: int, board: Board) -> list:
"""
Encodes the moves into a list encoded moves
:param colour: colour of the player
@@ -243,10 +238,8 @@ class ReinforcementLearning():
diff = np.nonzero(diff)
return diff
- def _normalise(self, data):
+ def normalise(self, data):
"""
Normalise the data
"""
- for i in range(len(data)):
- data[i] = data[i] / np.linalg.norm(data[i])
- return data
+ return data / 10000
\ No newline at end of file
diff --git a/results.py b/results.py
new file mode 100644
index 0000000..3573d0a
--- /dev/null
+++ b/results.py
@@ -0,0 +1,27 @@
+import matplotlib.pyplot as plt
+
+from utilities.constants import GREEN, WHITE
+
+# winners = []
+with open("winners.txt") as f:
+ winners = f.readlines()
+
+winners = [int(x.strip()) for x in winners]
+
+
+fig, ax = plt.subplots()
+bar = ax.bar(["Draw", "White", "Green"], [winners.count(0), winners.count(WHITE), winners.count(GREEN)])
+ax.set(xlabel='Winner', ylabel='Frequency', ylim=[0, 500])
+ax.set_title("Winners")
+ax.bar_label(bar)
+plt.show()
+
+
+with open("rewardsA.txt") as f:
+ totalReward = f.readlines()
+
+
+plt.plot([i for i in range(len(totalReward))], totalReward)
+plt.xlabel("Games")
+plt.ylabel("Reward")
+plt.show()
\ No newline at end of file
diff --git a/utilities/Board.py b/utilities/Board.py
index b4387a3..d01954b 100644
--- a/utilities/Board.py
+++ b/utilities/Board.py
@@ -63,19 +63,44 @@ class Board:
if piece != 0:
if piece.colour == GREEN:
self.greenLeft -= 1
- return
+ continue
self.whiteLeft -= 1
def getAllMoves(self, colour):
moves = []
+ possibleMoves = []
+ possiblePieces = []
+ pieces = self.getAllPieces(colour)
+ hasForcedCapture = False
- for piece in self.getAllPieces(colour):
+ for piece in pieces:
validMoves = self.getValidMoves(piece)
- for move, skip in validMoves.items():
- tempBoard = deepcopy(self)
- tempPiece = tempBoard.getPiece(piece.row, piece.col)
- newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
- moves.append(newBoard)
+
+ # Check if there are forced capture moves for this piece
+ forcedCaptureMoves = [move for move, skip in validMoves.items() if skip]
+ if forcedCaptureMoves:
+ hasForcedCapture = True
+ possiblePieces.append(piece)
+ possibleMoves.append({move: skip for move, skip in validMoves.items() if skip})
+
+ if hasForcedCapture:
+ # If there are forced capture moves, consider only those
+ for i in range(len(possibleMoves)):
+ for move, skip in possibleMoves[i].items():
+ tempBoard = deepcopy(self)
+ tempPiece = tempBoard.getPiece(possiblePieces[i].row, possiblePieces[i].col)
+ newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+ moves.append(newBoard)
+ else:
+ # If no forced capture moves, consider all valid moves
+ for piece in pieces:
+ validMoves = self.getValidMoves(piece)
+ for move, skip in validMoves.items():
+ tempBoard = deepcopy(self)
+ tempPiece = tempBoard.getPiece(piece.row, piece.col)
+ newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
+ moves.append(newBoard)
+
return moves
def _simulateMove(self, piece, move, board, skip):
@@ -134,6 +159,7 @@ class Board:
forcedCapture = forced
else:
forcedCapture = forced
+
return forcedCapture
def scoreOfTheBoard(self):
@@ -241,7 +267,7 @@ class Board:
def _decode(self, move):
# Split digits back out
str_code = str(move)
- print(str_code)
+ # print(str_code)
start_row = int(str_code[0])
start_col = int(str_code[1])
end_row = int(str_code[2])