created working reinforcement learning model
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import pygame
|
||||
|
||||
from copy import deepcopy
|
||||
from .constants import BLACK, ROWS, GREEN, SQUARE_SIZE, COLS, WHITE
|
||||
from .piece import Piece
|
||||
|
||||
@@ -9,34 +9,39 @@ class Board:
|
||||
self.board = []
|
||||
self.greenLeft = self.whiteLeft = 12
|
||||
self.greenKings = self.whiteKings = 0
|
||||
self.createBoard()
|
||||
self.green = (144, 184, 59)
|
||||
self._createBoard()
|
||||
|
||||
def drawSquares(self, win):
|
||||
def _drawSquares(self, win):
|
||||
win.fill(BLACK)
|
||||
for row in range(ROWS):
|
||||
for col in range(row % 2, ROWS, 2):
|
||||
pygame.draw.rect(win, GREEN, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
|
||||
pygame.draw.rect(win, self.green, (row * SQUARE_SIZE, col * SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
|
||||
|
||||
def createBoard(self):
|
||||
def _createBoard(self):
|
||||
for row in range(ROWS):
|
||||
self.board.append([])
|
||||
for col in range(COLS):
|
||||
if col % 2 == ((row + 1) % 2):
|
||||
if row < 3:
|
||||
self.board[row].append(Piece(row, col, WHITE))
|
||||
elif row > 4:
|
||||
continue
|
||||
|
||||
if row > 4:
|
||||
self.board[row].append(Piece(row, col, GREEN))
|
||||
else:
|
||||
self.board[row].append(None)
|
||||
else:
|
||||
self.board[row].append(None)
|
||||
continue
|
||||
|
||||
self.board[row].append(0)
|
||||
continue
|
||||
|
||||
self.board[row].append(0)
|
||||
|
||||
def draw(self, win):
|
||||
self.drawSquares(win)
|
||||
self._drawSquares(win)
|
||||
for row in range(ROWS):
|
||||
for col in range(COLS):
|
||||
piece = self.board[row][col]
|
||||
if piece is not None:
|
||||
if piece != 0:
|
||||
piece.draw(win)
|
||||
|
||||
def move(self, piece, row, col):
|
||||
@@ -45,19 +50,40 @@ class Board:
|
||||
|
||||
if row == ROWS - 1 or row == 0:
|
||||
piece.makeKing()
|
||||
if piece.colour == WHITE:
|
||||
self.whiteKings += 1
|
||||
else:
|
||||
self.greenKings += 1
|
||||
|
||||
if piece.colour == WHITE:
|
||||
self.whiteKings += 1
|
||||
|
||||
if piece.colour == GREEN:
|
||||
self.greenKings += 1
|
||||
|
||||
def remove(self, skipped):
|
||||
for piece in skipped:
|
||||
self.board[piece.row][piece.col] = None
|
||||
if piece is not None:
|
||||
self.board[piece.row][piece.col] = 0
|
||||
if piece != 0:
|
||||
if piece.colour == GREEN:
|
||||
self.greenLeft -= 1
|
||||
else:
|
||||
self.whiteLeft -= 1
|
||||
return
|
||||
self.whiteLeft -= 1
|
||||
|
||||
def getAllMoves(self, colour):
|
||||
moves = []
|
||||
|
||||
for piece in self.getAllPieces(colour):
|
||||
validMoves = self.getValidMoves(piece)
|
||||
for move, skip in validMoves.items():
|
||||
tempBoard = deepcopy(self)
|
||||
tempPiece = tempBoard.getPiece(piece.row, piece.col)
|
||||
newBoard = self._simulateMove(tempPiece, move, tempBoard, skip)
|
||||
moves.append(newBoard)
|
||||
return moves
|
||||
|
||||
def _simulateMove(self, piece, move, board, skip):
|
||||
board.move(piece, move[0], move[1])
|
||||
if skip:
|
||||
board.remove(skip)
|
||||
|
||||
return board
|
||||
|
||||
def getPiece(self, row, col):
|
||||
return self.board[row][col]
|
||||
@@ -65,7 +91,8 @@ class Board:
|
||||
def winner(self):
|
||||
if self.greenLeft <= 0:
|
||||
return WHITE
|
||||
elif self.whiteLeft <= 0:
|
||||
|
||||
if self.whiteLeft <= 0:
|
||||
return GREEN
|
||||
|
||||
return None
|
||||
@@ -76,16 +103,10 @@ class Board:
|
||||
left = piece.col - 1
|
||||
right = piece.col + 1
|
||||
row = piece.row
|
||||
if piece.colour == GREEN:
|
||||
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
|
||||
if piece.colour == WHITE:
|
||||
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
|
||||
|
||||
if piece.king:
|
||||
if piece.colour == GREEN or piece.king:
|
||||
moves.update(self._traverseLeft(row - 1, max(row - 3, -1), -1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row - 1, max(row - 3, -1), -1, piece.colour, right))
|
||||
if piece.colour == WHITE or piece.king:
|
||||
moves.update(self._traverseLeft(row + 1, min(row + 3, ROWS), 1, piece.colour, left))
|
||||
moves.update(self._traverseRight(row + 1, min(row + 3, ROWS), 1, piece.colour, right))
|
||||
|
||||
@@ -122,7 +143,7 @@ class Board:
|
||||
pieces = []
|
||||
for row in self.board:
|
||||
for piece in row:
|
||||
if piece is not None and piece.colour == colour:
|
||||
if piece != 0 and piece.colour == colour:
|
||||
pieces.append(piece)
|
||||
return pieces
|
||||
|
||||
@@ -162,7 +183,7 @@ class Board:
|
||||
|
||||
def _traverse(self, row, col, skipped, moves, step, last, colour):
|
||||
current = self.board[row][col]
|
||||
if current is None:
|
||||
if current == 0:
|
||||
if skipped and not last:
|
||||
return None
|
||||
elif skipped:
|
||||
@@ -183,3 +204,56 @@ class Board:
|
||||
else:
|
||||
last = [current]
|
||||
return last
|
||||
|
||||
def step(self, move, colour):
|
||||
start, end = self._decode(move)
|
||||
start[0] = start[0] - 1
|
||||
start[1] = start[1] - 1
|
||||
end[0] = end[0] - 1
|
||||
end[1] = end[1] - 1
|
||||
reward = 0
|
||||
done = False
|
||||
piece = self.getPiece(start[0], start[1])
|
||||
if piece == 0:
|
||||
newStart = end
|
||||
end = start
|
||||
start = newStart
|
||||
piece = self.getPiece(start[0], start[1])
|
||||
moves = self.getValidMoves(piece)
|
||||
for move, skip in moves.items():
|
||||
if tuple(end) == move:
|
||||
self._simulateMove(piece, move, self, skip)
|
||||
if len(skip) == 1:
|
||||
reward = 2
|
||||
break
|
||||
if len(skip) > 1:
|
||||
reward = 3 + len(skip) * 0.2
|
||||
break
|
||||
reward = -0.5
|
||||
break
|
||||
|
||||
if self.winner() == colour:
|
||||
done = True
|
||||
reward = 10
|
||||
return reward, self, done
|
||||
|
||||
|
||||
def _decode(self, move):
|
||||
# Split digits back out
|
||||
str_code = str(move)
|
||||
print(str_code)
|
||||
start_row = int(str_code[0])
|
||||
start_col = int(str_code[1])
|
||||
end_row = int(str_code[2])
|
||||
end_col = int(str_code[3])
|
||||
# Reconstruct positions
|
||||
start = [start_row, start_col]
|
||||
end = [end_row, end_col]
|
||||
return start, end
|
||||
|
||||
# def reset(self):
|
||||
# self.board = []
|
||||
# self.whiteLeft = self.greenLeft = 12
|
||||
# self.whiteKings = self.greenKings = 0
|
||||
# self._createBoard()
|
||||
# return self.board
|
||||
@@ -6,8 +6,8 @@ SQUARE_SIZE = WIDTH // COLS
|
||||
|
||||
# RGB color
|
||||
|
||||
GREEN = (144, 184, 59)
|
||||
WHITE = (255, 255, 255)
|
||||
GREEN = 1
|
||||
WHITE = 2
|
||||
BLACK = (0, 0, 0)
|
||||
BLUE = (0, 0, 255)
|
||||
GREY = (128, 128, 128)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import pygame
|
||||
from utilities.board import Board
|
||||
from utilities.Board import Board
|
||||
from utilities.constants import GREEN, WHITE, BLUE, SQUARE_SIZE
|
||||
|
||||
|
||||
class GameManager:
|
||||
def __init__(self, win, colour):
|
||||
self._init(colour)
|
||||
@@ -29,14 +30,14 @@ class GameManager:
|
||||
self.selected = None
|
||||
self.select(row, col)
|
||||
piece = self.board.getPiece(row, col)
|
||||
if piece is not None and piece.colour == self.turn:
|
||||
if piece != 0 and piece.colour == self.turn:
|
||||
self.selected = piece
|
||||
self.validMoves = self.board.getValidMoves(piece)
|
||||
return True
|
||||
|
||||
def _move(self, row, col):
|
||||
piece = self.board.getPiece(row, col)
|
||||
if self.selected and piece is None and (row, col) in self.validMoves:
|
||||
if self.selected and piece == 0 and (row, col) in self.validMoves:
|
||||
self.board.move(self.selected, row, col)
|
||||
skipped = self.validMoves[row, col]
|
||||
if self.validMoves[list(self.validMoves.keys())[0]]:
|
||||
@@ -58,8 +59,8 @@ class GameManager:
|
||||
self.validMoves = {}
|
||||
if self.turn == GREEN:
|
||||
self.turn = WHITE
|
||||
else:
|
||||
self.turn = GREEN
|
||||
return
|
||||
self.turn = GREEN
|
||||
|
||||
def drawValidMoves(self, moves):
|
||||
for row, col in moves:
|
||||
|
||||
+4
-2
@@ -1,6 +1,6 @@
|
||||
import pygame.draw
|
||||
|
||||
from utilities.constants import SQUARE_SIZE, GREY, CROWN
|
||||
from utilities.constants import SQUARE_SIZE, GREY, CROWN, GREEN
|
||||
|
||||
|
||||
class Piece:
|
||||
@@ -14,6 +14,8 @@ class Piece:
|
||||
self.calcPosition()
|
||||
self.padding = 20
|
||||
self.border = 2
|
||||
self.green = (144, 184, 59)
|
||||
self.white = (255, 255, 255)
|
||||
|
||||
def calcPosition(self):
|
||||
self.x = SQUARE_SIZE * self.col + SQUARE_SIZE // 2
|
||||
@@ -25,7 +27,7 @@ class Piece:
|
||||
def draw(self, win):
|
||||
radius = SQUARE_SIZE // 2 - self.padding
|
||||
pygame.draw.circle(win, GREY, (self.x, self.y), radius + self.border)
|
||||
pygame.draw.circle(win, self.colour, (self.x, self.y), radius)
|
||||
pygame.draw.circle(win, self.green if self.colour == GREEN else self.white, (self.x, self.y), radius)
|
||||
if self.king:
|
||||
win.blit(CROWN, (self.x - CROWN.get_width() // 2, self.y - CROWN.get_height() // 2))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user