Final code version before submitting
This commit is contained in:
@@ -22,8 +22,8 @@ class ReinforcementLearning():
|
||||
def __init__(self, actionSpace: list, board: Board, colour: int, gameManager: GameManager) -> None:
|
||||
"""
|
||||
Constructor for the ReinforcementLearning class
|
||||
:param actionSpace: the number of possible actions
|
||||
:param board: the game board
|
||||
:param actionSpace: The number of possible actions
|
||||
:param board: The game board
|
||||
"""
|
||||
self.gameManager = gameManager
|
||||
self.actionSpace = actionSpace
|
||||
@@ -33,7 +33,7 @@ class ReinforcementLearning():
|
||||
self.score = 0
|
||||
self.epsilon = 1
|
||||
self.gamma = .95
|
||||
self.batchSize = 256
|
||||
self.batchSize = 512
|
||||
self.maxSize = 32
|
||||
self.epsilonMin = .01
|
||||
self.epsilonDecay = .995
|
||||
@@ -42,10 +42,10 @@ class ReinforcementLearning():
|
||||
self.model = self.buildMainModel()
|
||||
print(self.model.summary())
|
||||
|
||||
def AI(self, board: Board) -> tuple:
|
||||
def AITrain(self, board: Board) -> tuple:
|
||||
"""
|
||||
Learns to play the draughts game
|
||||
:return: the loss
|
||||
:return: The loss
|
||||
"""
|
||||
self.board = board
|
||||
self.state = self._convertState(self.board.board)
|
||||
@@ -62,10 +62,29 @@ class ReinforcementLearning():
|
||||
|
||||
return self.score, nextState
|
||||
|
||||
def AITest(self, board: Board) -> Board:
|
||||
"""
|
||||
Runs the AI
|
||||
:param board: The board
|
||||
:return: The new board
|
||||
"""
|
||||
actionSpace = self.encodeMoves(WHITE, board)
|
||||
if len(actionSpace) == 0:
|
||||
print("Cannot make move")
|
||||
return None
|
||||
totalMoves = len(actionSpace)
|
||||
# moves = np.squeeze(moves)
|
||||
moves = np.pad(actionSpace, (0, self.maxSize - totalMoves), 'constant', constant_values=(1, 1))
|
||||
act_values = self.model.predict(self.normalise(moves))
|
||||
val = np.argmax(act_values[0])
|
||||
val = val if val < totalMoves else totalMoves - 1
|
||||
reward, newBoard, done = board.step(actionSpace[val], WHITE)
|
||||
return newBoard
|
||||
|
||||
def buildMainModel(self) -> Sequential:
|
||||
"""
|
||||
Build the model for the AI
|
||||
:return: the model
|
||||
:return: The model
|
||||
"""
|
||||
# Board model
|
||||
modelLayers = [
|
||||
@@ -93,7 +112,7 @@ class ReinforcementLearning():
|
||||
def _replay(self) -> None:
|
||||
"""
|
||||
trains the model
|
||||
:return: None (void)
|
||||
:return: None
|
||||
"""
|
||||
if len(self.memory) < self.batchSize:
|
||||
# Not enough data to replay and test the model
|
||||
@@ -132,19 +151,19 @@ class ReinforcementLearning():
|
||||
def _remember(self, state: np.array, action: int, reward: float, nextState: np.array, done: bool) -> None:
|
||||
"""
|
||||
Remembers what it has learnt
|
||||
:param state: the current state
|
||||
:param action: the action taken
|
||||
:param reward: the reward for the action
|
||||
:param nextState: the next state
|
||||
:param done: whether the game is finished
|
||||
:return: None (void)
|
||||
:param state: The current state
|
||||
:param action: The action taken
|
||||
:param reward: The reward for the action
|
||||
:param nextState: The next state
|
||||
:param done: Whether the game is finished
|
||||
:return: None
|
||||
"""
|
||||
self.memory.append((state, action, reward, nextState, done))
|
||||
|
||||
def _act(self) -> Any:
|
||||
"""
|
||||
Chooses an action based on the available moves
|
||||
:return: the action
|
||||
:return: The action
|
||||
"""
|
||||
if np.random.rand() <= self.epsilon:
|
||||
# choose a random action from the action spaces list
|
||||
@@ -159,12 +178,16 @@ class ReinforcementLearning():
|
||||
return self.actionSpace[0]
|
||||
encodedMoves = np.squeeze(self.actionSpace)
|
||||
encodedMoves = np.pad(encodedMoves, (0, self.maxSize - len(encodedMoves)), 'constant', constant_values=(1, 1))
|
||||
act_values = self.model.predict(self.normalise(encodedMoves))
|
||||
val = np.argmax(act_values[0])
|
||||
actValues = self.model.predict(self.normalise(encodedMoves))
|
||||
val = np.argmax(actValues[0])
|
||||
val = val if val < len(self.actionSpace) else len(self.actionSpace) - 1
|
||||
return self.actionSpace[val]
|
||||
|
||||
def resetScore(self):
|
||||
def resetScore(self) -> None:
|
||||
"""
|
||||
Resets the score
|
||||
:return: None
|
||||
"""
|
||||
self.score = 0
|
||||
|
||||
def _convertState(self, board: list) -> list:
|
||||
@@ -195,9 +218,9 @@ class ReinforcementLearning():
|
||||
def _encode(self, start: tuple, end: tuple) -> int:
|
||||
"""
|
||||
Encodes the move into an integer
|
||||
:param start: tuple of start position
|
||||
:param end: tuple of end position
|
||||
:return: encoded move
|
||||
:param start: Tuple of start position
|
||||
:param end: Tuple of end position
|
||||
:return: Encoded move
|
||||
"""
|
||||
start_row = start[0]
|
||||
start_col = end[0]
|
||||
@@ -209,6 +232,10 @@ class ReinforcementLearning():
|
||||
return int(str(start_row) + str(start_col) + str(end_row) + str(end_col))
|
||||
|
||||
def _maxNextQ(self) -> float:
|
||||
"""
|
||||
Calculates the max Q value for the next state
|
||||
:return: the max Q value
|
||||
"""
|
||||
colour = WHITE if self.colour == GREEN else GREEN
|
||||
encodedMoves = self.encodeMoves(colour, self.board)
|
||||
if len(encodedMoves) == 0:
|
||||
@@ -220,9 +247,9 @@ class ReinforcementLearning():
|
||||
def encodeMoves(self, colour: int, board: Board) -> list:
|
||||
"""
|
||||
Encodes the moves into a list encoded moves
|
||||
:param colour: colour of the player
|
||||
:param board: the board
|
||||
:return: list of encoded moves
|
||||
:param colour: Colour of the player
|
||||
:param board: The board
|
||||
:return: list Of encoded moves
|
||||
"""
|
||||
encodedMoves = []
|
||||
moves = board.getAllMoves(colour)
|
||||
@@ -231,15 +258,23 @@ class ReinforcementLearning():
|
||||
encodedMoves.append(self._encode(where[0]+1, where[1]+1))
|
||||
return encodedMoves
|
||||
|
||||
def _boardDiff(self, board, move):
|
||||
def _boardDiff(self, board: Board, move: Board) -> np.array:
|
||||
"""
|
||||
Finds the difference between the two boards
|
||||
:param board: The current board
|
||||
:param move: The new board
|
||||
:return: the difference between the two boards
|
||||
"""
|
||||
cnvState = np.array(self._convertState(board.board))
|
||||
cnvMove = np.array(self._convertState(move.board))
|
||||
diff = np.subtract(cnvMove, cnvState)
|
||||
diff = np.nonzero(diff)
|
||||
return diff
|
||||
|
||||
def normalise(self, data):
|
||||
def normalise(self, data: np.array) -> np.array:
|
||||
"""
|
||||
Normalise the data
|
||||
:param data: the data to normalise
|
||||
:return: normalised data
|
||||
"""
|
||||
return data / 10000
|
||||
Reference in New Issue
Block a user