0% found this document useful (0 votes)
10 views8 pages

MDP Agents 2

Uploaded by

Ayla Gatland
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views8 pages

MDP Agents 2

Uploaded by

Ayla Gatland
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

# mdpAgents.

py

# parsons/20-nov-2017

# Version 1

# The starting point for CW2.

# Intended to work with the PacMan AI projects from:

# http://ai.berkeley.edu/

# These use a simple API that allow us to control Pacman's interaction


with

# the environment adding a layer on top of the AI Berkeley code.

# As required by the licensing agreement for the PacMan AI we have:

# Licensing Information: You are free to use or extend these projects for

# educational purposes provided that (1) you do not distribute or publish

# solutions, (2) you retain this notice, and (3) you provide clear

# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.

# Attribution Information: The Pacman AI projects were developed at UC


Berkeley.

# The core projects and autograders were primarily created by John


DeNero

# ([email protected]) and Dan Klein ([email protected]).

# Student side autograding was added by Brad Miller, Nick Hay, and

# Pieter Abbeel ([email protected]).


# The agent here is was written by Simon Parsons, based on the code in

# pacmanAgents.py

from pacman import Directions

from game import Agent

import api

import random

import game

import util

GAMMA = 0.9 #Discount factor

rewardEmptyLocation = -0.01 #Reward for empty location

rewardFood = 10 #Reward for eating food

rewardCapsule = 50

rewardEdibleGhost = 100

rewardGhost = -1000

dangerzone = 3

danger = 200

iterations = 50

class MDPAgent(Agent):

# Constructor: this gets run when we first invoke pacman.py

def __init__(self):

print "Starting up MDPAgent:D"

name = "Pacman"

self.map = None

self.walls = None

self.corners = None
self.height = self.width = None

self.values = {}

self.policy = {}

# Gets run after an MDPAgent object is created and once there is

# game state to access.

def registerInitialState(self, state):

print "Running registerInitialState for MDPAgent!"

print "I'm at:"

print api.whereAmI(state)

self.walls = api.walls(state)

self.corners = api.corners(state)

self.width = state.getWalls().width

self.height = state.getWalls().height

self.map = self.initial_map()

def getNextState(self, current_position, action):

if Directions.NORTH == action:

return current_position[0], current_position[1] + 1

elif Directions.SOUTH == action:

return current_position[0], current_position[1] - 1

elif Directions.WEST == action:

return current_position[0] - 1, current_position[1]

elif Directions.EAST == action:

return current_position[0] + 1, current_position[1]

return current_position

def initial_map(self):
PakMap = [

None if (i, j) in self.walls else -1

for j in range(self.height)

for i in range(self.width)

return PakMap

#def printValues(self):

# print "Current Values of the Map:"

# for y in range(self.height - 1, -1, -1): # Print rows top to bottom

# row = []

# for x in range(self.width): # Reverse x to flip horizontally

# value = self.values.get((x, y), None)

# if value is None:

# row.append(" WALL ") # Placeholder for wall

# else:

# row.append("{:8.2f}".format(value)) # Format values to 2


decimal places

# print " ".join(row) # Combine all entries in the row into a string

def valueIteration(self, state):

for _ in range(iterations):

new_values = self.values.copy()

for x in range(self.width):

for y in range(self.height):

if self.map[x][y] is not None:


new_values[(x,y)] = self.computeValue(state, (x,y))

self.values = new_values

def computeValue(self, state, position):

max_value = float('-inf')

for action in api.legalActions(state):

next_state = self.getNextState(position, action)

reward = self.getReward(state, next_state)

value = reward + GAMMA * self.values.get(next_state, 0)

max_value = max(max_value, value)

return max_value

def ghostReward(self, state, position):

closestGhost = None

closestGhostDist = float('inf')

closestEdibleGhost = None

closestEdibleGhostDist = float('inf')

for g, is_edible in api.ghostStates(state):

if not is_edible:

dist = util.manhattanDistance(g, position)

if dist < closestGhostDist:

closestGhostDist = dist

closestGhost = g

else:

dist = util.manhattanDistance(g, position)

if dist < closestEdibleGhostDist:

closestEdibleGhostDist = dist

closestEdibleGhost = g
_rewardGhost = 0

if closestGhost is not None:

if closestGhostDist <= dangerzone:

_rewardGhost = rewardGhost / (2 ** closestGhostDist)

else:

_rewardGhost = rewardGhost / (10 **closestGhostDist)

_rewardEdibleGhost = 0

if closestEdibleGhost is not None:

_rewardEdibleGhost = rewardEdibleGhost
*(1.5/(closestEdibleGhostDist + 1))

return _rewardGhost + _rewardEdibleGhost

def getReward(self, state, position):

reward = rewardEmptyLocation

if position in api.food(state):

reward = rewardFood

elif position in api.capsules(state):

nearby_ghosts = [g for g, is_edible in api.ghostStates(state) if not


is_edible]

if any(util.manhattanDistance(g,position) <= dangerzone for g in


nearby_ghosts):

reward = rewardCapsule + 300

else:

reward = rewardCapsule

ghostReward = self.ghostReward(state, position)

return reward + ghostReward


# This is what gets run in between multiple games

def final(self, state):

print "Looks like the game just ended!"

def getAction(self, state):

self.valueIteration(state)

best_action = None

max_value = float('-inf')

pacman = api.whereAmI(state)

legal = api.legalActions(state)

if Directions.STOP in legal:

legal.remove(Directions.STOP)

ghost_positions = [g for g, is_edible in api.ghostStates(state) if not


is_edible]

for action in legal:

next_state = self.getNextState(pacman, action)

value = self.values.get(next_state, -100000000)

ghost_distances = [util.manhattanDistance(next_state, g) for g in


ghost_positions]

if ghost_distances and min(ghost_distances) <= dangerzone:

value -= danger * (1 / (min(ghost_distances) + 1))

if value > max_value:

max_value = value

best_action = action

self.policy[state] = best_action
#self.printValues()

#raw_input("Press Enter")

return api.makeMove(best_action, api.legalActions(state))

You might also like