created working reinforcement learning model

This commit is contained in:
2023-08-22 16:31:16 +01:00
parent 1eb0a04f30
commit 1aa8ffa8fc
9 changed files with 466 additions and 221 deletions
+105 -31
View File
@@ -1,5 +1,5 @@
import pygame
from copy import deepcopy
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
from .piece import Piece
@@ -9,34 +9,39 @@ class Board:
self.board = []
self.greenLeft = self.whiteLeft = 12
self.greenKings = self.whiteKings = 0
self.createBoard()
self.green = (144, 184, 59)
self._createBoard()
def drawSquares(self, win):
def _drawSquares(self, win):
win.fill(BLACK)
for row in range(ROWS):
for col in range(row % 2, ROWS, 2):
pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
def createBoard(self):
def _createBoard(self):
for row in range(ROWS):
self.board.append([])
for col in range(COLS):
if col % 2 == ((row + 1) % 2):
if row < 3:
self.board[row].append(Piece(row, col, WHITE))
elif row > 4:
continue
if row > 4:
self.board[row].append(Piece(row, col, GREEN))
else:
self.board[row].append(None)
else:
self.board[row].append(None)
continue
self.board[row].append(0)
continue
self.board[row].append(0)
def draw(self, win):
self.drawSquares(win)
self._drawSquares(win)
for row in range(ROWS):
for col in range(COLS):
piece = self.board[row][col]
if piece is not None:
if piece != 0:
piece.draw(win)
def move(self, piece, row, col):
@@ -45,19 +50,40 @@ class Board:
if row == ROWS - 1 or row == 0:
piece.makeKing()
if piece.colour == WHITE:
self.whiteKings += 1
else:
self.greenKings += 1
if piece.colour == WHITE:
self.whiteKings += 1
if piece.colour == GREEN:
self.greenKings += 1
def remove(self, skipped):
for piece in skipped:
self.board[piece.row][piece.col] = None
if piece is not None:
self.board[piece.row][piece.col] = 0
if piece != 0:
if piece.colour == GREEN:
self.greenLeft -= 1
else:
self.whiteLeft -= 1
return
self.whiteLeft -= 1
def getAllMoves(self, colour):
moves = []
for piece in self.getAllPieces(colour):
validMoves = self.getValidMoves(piece)
for move, skip in validMoves.items():
tempBoard = deepcopy(self)
tempPiece = tempBoard.getPiece(piece.row, piece.col)
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
moves.append(newBoard)
return moves
def _simulateMove(self, piece, move, board, skip):
board.move(piece, move[0], move[1])
if skip:
board.remove(skip)
return board
def getPiece(self, row, col):
return self.board[row][col]
@@ -65,7 +91,8 @@ class Board:
def winner(self):
if self.greenLeft <= 0:
return WHITE
elif self.whiteLeft <= 0:
if self.whiteLeft <= 0:
return GREEN
return None
@@ -76,16 +103,10 @@ class Board:
left = piece.col - 1
right = piece.col + 1
row = piece.row
if piece.colour == GREEN:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
if piece.king:
if piece.colour == GREEN or piece.king:
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
if piece.colour == WHITE or piece.king:
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
@@ -122,7 +143,7 @@ class Board:
pieces = []
for row in self.board:
for piece in row:
if piece is not None and piece.colour == colour:
if piece != 0 and piece.colour == colour:
pieces.append(piece)
return pieces
@@ -162,7 +183,7 @@ class Board:
def _traverse(self, row, col, skipped, moves, step, last, colour):
current = self.board[row][col]
if current is None:
if current == 0:
if skipped and not last:
return None
elif skipped:
@@ -183,3 +204,56 @@ class Board:
else:
last = [current]
return last
def step(self, move, colour):
start, end = self._decode(move)
start[0] = start[0] - 1
start[1] = start[1] - 1
end[0] = end[0] - 1
end[1] = end[1] - 1
reward = 0
done = False
piece = self.getPiece(start[0], start[1])
if piece == 0:
newStart = end
end = start
start = newStart
piece = self.getPiece(start[0], start[1])
moves = self.getValidMoves(piece)
for move, skip in moves.items():
if tuple(end) == move:
self._simulateMove(piece, move, self, skip)
if len(skip) == 1:
reward = 2
break
if len(skip) > 1:
reward = 3 + len(skip) * 0.2
break
reward = -0.5
break
if self.winner() == colour:
done = True
reward = 10
return reward, self, done
def _decode(self, move):
# Split digits back out
str_code = str(move)
print(str_code)
start_row = int(str_code[0])
start_col = int(str_code[1])
end_row = int(str_code[2])
end_col = int(str_code[3])
# Reconstruct positions
start = [start_row, start_col]
end = [end_row, end_col]
return start, end
# def reset(self):
# self.board = []
# self.whiteLeft = self.greenLeft = 12
# self.whiteKings = self.greenKings = 0
# self._createBoard()
# return self.board
+2 -2
View File
@@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS
# RGB color
GREEN = (144, 184, 59)
WHITE = (255, 255, 255)
GREEN = 1
WHITE = 2
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)
GREY = (128, 128, 128)
+6 -5
View File
@@ -1,7 +1,8 @@
import pygame
from utilities.board import Board
from utilities.Board import Board
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
class GameManager:
def __init__(self, win, colour):
self._init(colour)
@@ -29,14 +30,14 @@ class GameManager:
self.selected = None
self.select(row, col)
piece = self.board.getPiece(row, col)
if piece is not None and piece.colour == self.turn:
if piece != 0 and piece.colour == self.turn:
self.selected = piece
self.validMoves = self.board.getValidMoves(piece)
return True
def _move(self, row, col):
piece = self.board.getPiece(row, col)
if self.selected and piece is None and (row, col) in self.validMoves:
if self.selected and piece == 0 and (row, col) in self.validMoves:
self.board.move(self.selected, row, col)
skipped = self.validMoves[row, col]
if self.validMoves[list(self.validMoves.keys())[0]]:
@@ -58,8 +59,8 @@ class GameManager:
self.validMoves = {}
if self.turn == GREEN:
self.turn = WHITE
else:
self.turn = GREEN
return
self.turn = GREEN
def drawValidMoves(self, moves):
for row, col in moves:
+4 -2
View File
@@ -1,6 +1,6 @@
import pygame.draw
from utilities.constants import SQUARE_SIZE, GREY, CROWN
from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
class Piece:
@@ -14,6 +14,8 @@ class Piece:
self.calcPosition()
self.padding = 20
self.border = 2
self.green = (144, 184, 59)
self.white = (255, 255, 255)
def calcPosition(self):
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
@@ -25,7 +27,7 @@ class Piece:
def draw(self, win):
radius = SQUARE_SIZE // 2 - self.padding
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
if self.king:
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))