0% found this document useful (0 votes)

4 views

Reinforcement Learning - Project 3

Reinforcement learning

Uploaded by

azeemrm630

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views

Reinforcement Learning - Project 3

Reinforcement learning

Uploaded by

azeemrm630

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

Project 3

Course code: CSCE 5210

Section: 002

Pair: 14

Team members

Name: Naga Sai Sivani, Tutika Ketha, Tirumuru

Email: NagaSaiSivaniTutika@m [email protected]
y.unt.edu t.edu
ID: 11703058 11597873
import numpy as np

Task T1
# Creating functions to build policy using value_iteration using
Bellman's equation

# Function to create the environment for agent

def initialize_environment(R1, R2, DESTINATION, HAZARD):
environment = np.zeros(GRID_SIZE)
environment[DESTINATION] = R1 # exit reward at destination
environment[HAZARD] = R2 # negative reward at hazard
return environment

# Define function to check if a state is valid

def is_valid_state(state, GRID_SIZE):
row, col = state
return (0 <= row < GRID_SIZE[0]) and (0 <= col < GRID_SIZE[1]) and
(state not in OBSTACLE)

# Define function to calculate reward for a state

def get_reward(state, environment):
return environment[state[0], state[1]]

def transProb(next_state, a, intended_action, OBSTACLE, prob=0.8):

# intended action (default = 0.8)

if a == intended_action:
return prob

# obstacle no movement
elif (next_state in OBSTACLE):
return 0

# 180 turn -> prohibited

elif (a[0]+intended_action[0] == 0) and (a[0]+intended_action[0]
== 1):
return 0

# un-intended direction ( default = (1-0.8)/2.0 = 0.1)

else:
return (1-prob)/2.0

def value_iteration(environment, r):

# set values for checking convergence

epsilon = 1e-6 # Convergence threshold
delta = float('inf')

# initialize value_function U
value_function = np.copy(environment)

# empty policy to start with

policy = environment.copy() # Initialize policy matrix with
objects

max_iterations = 500
k = 0

# run until converged or until max_iterations are reached

while (delta > epsilon) and (k < max_iterations):

k += 1

delta = 0

# to store new values without disturbing previous state

temp_value_function = value_function.copy()

# iterate through each state in the grid/environment

for row in range(GRID_SIZE[0]):
for col in range(GRID_SIZE[1]):

state = (row, col)

# OBSTACLE or out of bounds

if not is_valid_state(state, GRID_SIZE):
continue

# states with constant values

if state == DESTINATION or state == HAZARD:
temp_value_function[row, col] =
value_function[state]
continue
max_value = float('-inf')
best_action = 0
old_value = value_function[row, col] # to compare

# check for each possible action

for intended_action_index, intended_action in
enumerate(ACTIONS):

# sum of probabilities of all actions

next_value = 0
# calculate the sum of possiblities of intended
and un-intended action
for action_index, action in enumerate(ACTIONS):

next_state = (row + action[0], col +

action[1])

if is_valid_state(next_state, GRID_SIZE):

next_value += transProb(next_state,
action, intended_action, OBSTACLE) * value_function[next_state[0],
next_state[1]]

# update if better than other actions

if next_value > max_value:
max_value = next_value
best_action = intended_action_index

# Bellman's equation R(s) + Discount_faction *

max(sum(trans_prob*U[row][col]))
temp_value_function[row, col] =
DISCOUNT_FACTOR*max_value
policy[(row, col)] = best_action # Update policy with
best action tuple
delta = max(delta, abs(old_value -
temp_value_function[row, col]))

value_function = temp_value_function

return (value_function, policy)

# Visualize the policy

def visualize_policy(policy):
arrows = [ '←', '→', '↑', '↓',]
for row in range(GRID_SIZE[0]):
for col in range(GRID_SIZE[1]):
if (row, col) == DESTINATION:
print('D', end='\t')
elif (row, col) == HAZARD:
print('H', end='\t')
# elif (row, col) == START_POSITION:
# print('S', end='\t')
elif not is_valid_state((row, col), GRID_SIZE):
print('X', end='\t') # obstacle
else:
action = int(policy[row, col])
print(arrows[action], end='\t')
print()

# test

# Create Environment
GRID_SIZE = (5,5)

NUM_ACTIONS = 4

ACTIONS = [(0, -1), (0, 1), (-1, 0), (1, 0)] # Left, Right, Up, Down
if it was (row,col)
global ACTIONS

R1 = 10 # range [10, 50, 100] # exit reward

R2 = -5 # range [-5, -50, -500] # negative reward for hazard
r = -5 # -1 or -1 live-in reward
DISCOUNT_FACTOR = 0.9

HAZARD = (2, 2)
DESTINATION = (0, 3)
START_POSITION = (3, 1)
OBSTACLE = [(0, 0), (0, 4), (4, 0), (4, 4)]

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy:

X → → D X
→ → → ↑ ←
→ ↑ H ↑ ←
→ ↑ → ↑ ←
X ↑ ↑ ↑ X

Value function:

[[ 0. 6.74561812 8.4879587 10. 0. ]

[ 5.51121011 7.04764314 7.56503407 9.19004158 7.19540404]
[ 4.85452436 5.53004519 -5. 7.35113498 6.42860151]
[ 4.18758786 5.20927943 5.28346748 6.73034413 5.42442169]
[ 0. 4.16946553 4.65316541 5.26463239 0. ]]

Implementing Tasks
# environment settings
# same environment is used for all Tasks

GRID_SIZE = (5,5)
NUM_ACTIONS = 4

# Left, Right, Up, Down

ACTIONS = [(0, -1), (0, 1), (-1, 0), (1, 0)]
global ACTIONS

HAZARD = (2, 2)
DESTINATION = (0, 3)
START_POSITION = (3, 1)
OBSTACLE = [(0, 0), (0, 4), (4, 0), (4, 4)]

Task - T1
R1 = 10
R2 = -5
r = -5

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy (P1): \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy (P1):

X → → D X
→ → → ↑ ←
→ ↑ H ↑ ←
→ ↑ → ↑ ←
X ↑ ↑ ↑ X

Value function:

[[ 0. 6.74561812 8.4879587 10. 0. ]

Task - T3: part 1

R1 = 50
R2 = -50
r = -5

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: (P2) \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy: (P2)

X → → D X
→ → → ↑ ←
↑ ↑ H ↑ ↑
↑ ↑ → ↑ ↑
X ↑ → ↑ X

Value function:

[[ 0. 33.25206812 42.13072607 50. 0. ]

[ 25.31103127 32.42161528 34.86711045 45.35749967 35.42768859]
[ 21.86076058 22.74018676 -50. 33.67741954 30.7809873 ]
[ 17.66890273 21.4350619 21.28535829 30.55256633 24.91204174]
[ 0. 17.29159918 20.6483922 23.85620279 0. ]]

Task - T3: part 2

R1 = 100
R2 = -500
r = -5

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: (P3) \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy: (P3)

X → → D X
→ ↑ → ↑ ←
↑ ↑ H ↑ ↑
↑ ← → → ↑
X → → ↑ X

Value function:

[[ 0. 62.02522952 79.89598344 100. 0.

]
[ 38.6474947 50.00134953 25.70791979 81.96827756
63.63604508]
[ 29.40581223 -4.98670044 -500. 21.4146747
51.32094723]
[ 22.53798954 15.17561011 -19.92293294 30.87366273
39.72971159]
[ 0. 13.22360647 16.46916947 23.7112622 0.
]]

Task - T3: part 3

R1 = 10
R2 = -5
r = -1

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: \n")
visualize_policy(policy)
print("\nValue function: \n")
print(optimal_value_function)

Policy:

X → → D X
→ → → ↑ ←
→ ↑ H ↑ ←
→ ↑ → ↑ ←
X ↑ ↑ ↑ X

Value function:

[[ 0. 6.74561812 8.4879587 10. 0. ]

R1 = 50
R2 = -50
r = -1

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy:

X → → D X
→ → → ↑ ←
↑ ↑ H ↑ ↑
↑ ↑ → ↑ ↑
X ↑ → ↑ X

Value function:

[[ 0. 33.25206812 42.13072607 50. 0. ]

# Create environment
environment = initialize_environment(R1, R2, DESTINATION, HAZARD)

# Apply value iteration

optimal_value_function, policy = value_iteration(environment,r)
print("\nPolicy: \n")
visualize_policy(policy)

print("\nValue function: \n")

print(optimal_value_function)

Policy:

X → → D X
→ ↑ → ↑ ←
↑ ↑ H ↑ ↑
↑ ← → → ↑
X → → ↑ X

Value function:

[[ 0. 62.02522952 79.89598344 100. 0.

Cpccbc4014 Aw At1 v1.0 Final
No ratings yet
Cpccbc4014 Aw At1 v1.0 Final
18 pages
Geomodelling - Tutorial
No ratings yet
Geomodelling - Tutorial
72 pages
CS 188 Fall 2018 Written HW4 Soln
No ratings yet
CS 188 Fall 2018 Written HW4 Soln
6 pages
Practical No4,5
No ratings yet
Practical No4,5
7 pages
Py Code Example 3 8 Value Iteration
No ratings yet
Py Code Example 3 8 Value Iteration
3 pages
01 Module 1 Early Reinforcement Learning
No ratings yet
01 Module 1 Early Reinforcement Learning
134 pages
Py Code Example 4 1 Policy Evaluation
No ratings yet
Py Code Example 4 1 Policy Evaluation
3 pages
Py Code Example 3 5 Policy Evaluation
No ratings yet
Py Code Example 3 5 Policy Evaluation
3 pages
Py Code Example 11 0 Baird Semi Gradient DP Like
No ratings yet
Py Code Example 11 0 Baird Semi Gradient DP Like
3 pages
Py Code Example 11 0 Baird Semi Gradient TD0
No ratings yet
Py Code Example 11 0 Baird Semi Gradient TD0
3 pages
AI Lab Manual
No ratings yet
AI Lab Manual
22 pages
2023 Week3 Discussion Updated
No ratings yet
2023 Week3 Discussion Updated
21 pages
DAA Record
No ratings yet
DAA Record
15 pages
AI Outputs (4,5,6,7)
No ratings yet
AI Outputs (4,5,6,7)
16 pages
Assignment 2
No ratings yet
Assignment 2
13 pages
REINFORCE_Algorithm
No ratings yet
REINFORCE_Algorithm
15 pages
21BAI10063 MonteCarloLab
No ratings yet
21BAI10063 MonteCarloLab
18 pages
Ai Exp 1-10
No ratings yet
Ai Exp 1-10
26 pages
Experiment Number
No ratings yet
Experiment Number
3 pages
Game Theory
No ratings yet
Game Theory
10 pages
Hill Climbing N Queen
No ratings yet
Hill Climbing N Queen
4 pages
Assignment Kirolosmaged 202100238 AI
No ratings yet
Assignment Kirolosmaged 202100238 AI
8 pages
RL_20241103355_report
No ratings yet
RL_20241103355_report
4 pages
Ai 5 11
No ratings yet
Ai 5 11
13 pages
Lec 4
No ratings yet
Lec 4
16 pages
AI Record
No ratings yet
AI Record
17 pages
Implement the Knn (2)
No ratings yet
Implement the Knn (2)
5 pages
AI assiggnment Afrar
No ratings yet
AI assiggnment Afrar
9 pages
AIES LAB DHARANI
No ratings yet
AIES LAB DHARANI
27 pages
All
No ratings yet
All
10 pages
Markov Decision Processes and Exact Solution Methods
No ratings yet
Markov Decision Processes and Exact Solution Methods
34 pages
Py Code Example 11 0 Baird Emphatic TD
No ratings yet
Py Code Example 11 0 Baird Emphatic TD
3 pages
Lab8-Solution
No ratings yet
Lab8-Solution
9 pages
AIES_LAB_PROGRAM_(1)[2].docx_20240926_204425_0000
No ratings yet
AIES_LAB_PROGRAM_(1)[2].docx_20240926_204425_0000
66 pages
Lab#08 - (2018 BCS 076)
No ratings yet
Lab#08 - (2018 BCS 076)
10 pages
AI_Labfile
No ratings yet
AI_Labfile
36 pages
Ass1 Merged Merged
No ratings yet
Ass1 Merged Merged
19 pages
Symbolic AI MDP DP
No ratings yet
Symbolic AI MDP DP
6 pages
III_AI-DS_AD3311_AI_Lab Manual
No ratings yet
III_AI-DS_AD3311_AI_Lab Manual
34 pages
Algorithms To Solve An MDP
No ratings yet
Algorithms To Solve An MDP
24 pages
Experiment 3
No ratings yet
Experiment 3
6 pages
Experiment Numbe1
No ratings yet
Experiment Numbe1
3 pages
ML assignment
No ratings yet
ML assignment
11 pages
Experiment 5_Adversarial Searching
No ratings yet
Experiment 5_Adversarial Searching
9 pages
ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING_BhavyaSharan_059 (1)
No ratings yet
ARTIFICIAL INTELLIGENCE AND MACHINE LEARNING_BhavyaSharan_059 (1)
47 pages
EXP-5 Rimendra RA2011033010064
No ratings yet
EXP-5 Rimendra RA2011033010064
7 pages
R22-AI Lab Manual
No ratings yet
R22-AI Lab Manual
24 pages
2021wa15168 AI LabWorkSheet1
No ratings yet
2021wa15168 AI LabWorkSheet1
12 pages
Ass 2
No ratings yet
Ass 2
4 pages
Computing The Cake Eating Problem
No ratings yet
Computing The Cake Eating Problem
13 pages
RL_20241103355_report
No ratings yet
RL_20241103355_report
4 pages
RL EXP 5
No ratings yet
RL EXP 5
2 pages
Exam Prep 4 Solutions: Q1. MDPS: Dice Bonanza
No ratings yet
Exam Prep 4 Solutions: Q1. MDPS: Dice Bonanza
4 pages
Final ML
No ratings yet
Final ML
50 pages
Ass1 Merged Merged
No ratings yet
Ass1 Merged Merged
15 pages
Class ActorCritic
No ratings yet
Class ActorCritic
1 page
ps2 Macro Bongioanni TXT
No ratings yet
ps2 Macro Bongioanni TXT
4 pages
Exam_Prep_Exercises034534123124
No ratings yet
Exam_Prep_Exercises034534123124
20 pages
ARTIFICIAL INTELLIGENCE Lab
No ratings yet
ARTIFICIAL INTELLIGENCE Lab
8 pages
ML - 6 - Jupyter Notebook
No ratings yet
ML - 6 - Jupyter Notebook
5 pages
Lec 09
No ratings yet
Lec 09
51 pages
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet
Wimax Poster
No ratings yet
Wimax Poster
9 pages
Persistent Placement Paper
No ratings yet
Persistent Placement Paper
3 pages
Po - Vou 00001242 Bearing
No ratings yet
Po - Vou 00001242 Bearing
3 pages
Practice SQL Queries With Solutions For Employee Table - Part 4
No ratings yet
Practice SQL Queries With Solutions For Employee Table - Part 4
11 pages
Presentation1_electronic Door Lock Ppt
No ratings yet
Presentation1_electronic Door Lock Ppt
19 pages
Computer Store
No ratings yet
Computer Store
14 pages
A Guide To Writing and Assessing Learning Outcomes
No ratings yet
A Guide To Writing and Assessing Learning Outcomes
20 pages
Agriculture Software Development Services
No ratings yet
Agriculture Software Development Services
11 pages
functional_interfaces_java
No ratings yet
functional_interfaces_java
12 pages
Techno Reviewer (IP and BP)
No ratings yet
Techno Reviewer (IP and BP)
10 pages
Let's Route Your Emails To Gmail
No ratings yet
Let's Route Your Emails To Gmail
3 pages
ORBIT Pre-Demo Client Questionnaire
No ratings yet
ORBIT Pre-Demo Client Questionnaire
1 page
CIS6003 CW1 Semester1
No ratings yet
CIS6003 CW1 Semester1
12 pages
Oracle Apps Finance Training
No ratings yet
Oracle Apps Finance Training
10 pages
892561860-Dhanush Resume-1
No ratings yet
892561860-Dhanush Resume-1
1 page
ING02. Communicative Activities. Guided Version.
No ratings yet
ING02. Communicative Activities. Guided Version.
6 pages
CVM 50
No ratings yet
CVM 50
24 pages
Sifang - csc-211 Multifunction Protection Ied Manual (Fosf.451.152e) - v1.00
No ratings yet
Sifang - csc-211 Multifunction Protection Ied Manual (Fosf.451.152e) - v1.00
48 pages
Adams Car
No ratings yet
Adams Car
1,068 pages
Shri Sai Computer Institution Fees Details 2023
No ratings yet
Shri Sai Computer Institution Fees Details 2023
2 pages
Matrix-Matrix Operations: 5.1 Opening Remarks
No ratings yet
Matrix-Matrix Operations: 5.1 Opening Remarks
44 pages
CloudAPIGuide BusinessFlows
No ratings yet
CloudAPIGuide BusinessFlows
440 pages
WPG-1000 Spec V1.0
No ratings yet
WPG-1000 Spec V1.0
2 pages
Chicago Basic Simple Cover Letter Template Green
No ratings yet
Chicago Basic Simple Cover Letter Template Green
2 pages
Tsunami MP 820 Series Datasheet Eol US
No ratings yet
Tsunami MP 820 Series Datasheet Eol US
3 pages
Swiftcompatibleapplicationprofilesheet2023 Payments - Montran-Corporation
No ratings yet
Swiftcompatibleapplicationprofilesheet2023 Payments - Montran-Corporation
1 page
Evolution OF: History of Sap What Is Sap & It'S Goals Sap'S Evolution
No ratings yet
Evolution OF: History of Sap What Is Sap & It'S Goals Sap'S Evolution
4 pages
Smart Road Safety and Vehicle Accident Prevention System For Mountain Roads
No ratings yet
Smart Road Safety and Vehicle Accident Prevention System For Mountain Roads
29 pages