0% found this document useful (0 votes)

24 views

Ass 2

The document presents code for training an agent using Q-learning to navigate a FrozenLake environment in OpenAI Gym. It initializes the environment and a Q-table of zeros. It then trains the agent over 1000 episodes, updating the Q-table values using the Q-learning update rule. Finally, it evaluates the trained agent over 100 episodes and achieves a 100% success rate, indicating it has learned an optimal policy.

Uploaded by

Akash Sahu

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

24 views

Ass 2

Uploaded by

Akash Sahu

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 4

ASSIGNMENT - 2

In [1]:

import gym
import random
import matplotlib.pyplot as plt
import numpy as np

In [3]:

# MAKING A GYM ENVIRONMENT FOR THE GAME

environment = gym.make("FrozenLake-v1", is_slippery=False)
environment.reset()
environment.render()

In [4]:
# Q TABLE FOR THE GAME
#1
# qtable = np.zeros((16, 4))
#2
nb_states = environment.observation_space.n # = 16
nb_actions = environment.action_space.n # = 4
qtable = np.zeros((nb_states, nb_actions))
qtable

Out[4]:
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])

In [5]:
# random.choice(["LEFT", "DOWN", "RIGHT", "UP"])
environment.action_space.sample()
Out[5]:

In [6]:
environment.step(2)
environment.render()

In [7]:

action = environment.action_space.sample()

# 2. Implement this action and move the agent in the desired direction
new_state, reward, done, info,a = environment.step(action)
# Display the results (reward and map)
environment.render()
print(f'Reward = {reward}')

Reward = 0.0

In [8]:

import matplotlib.pyplot as plt

plt.rcParams['figure.dpi'] = 300
plt.rcParams.update({'font.size': 17})

# We re-initialize the Q-table

qtable = np.zeros((environment.observation_space.n, environment.action_space.n))

# Hyperparameters
episodes = 1000 # Total number of episodes
alpha = 0.5 # Learning rate
gamma = 0.9 # Discount factor

# List of outcomes to plot

outcomes = []

print('Q-table before training:')

print(qtable)

# Training
for _ in range(episodes):
state = environment.reset()[0]
done = False

# By default, we consider our outcome to be a failure

outcomes.append("Failure")

# Until the agent gets stuck in a hole or reaches the goal, keep training it
while not done:
# Choose the action with the highest value in the current state
if np.max(qtable[state]) > 0:
action = np.argmax(qtable[state])

# If there's no best action (only zeros), take a random one

else:
action = environment.action_space.sample()

# Implement this action and move the agent in the desired direction
new_state, reward, done, info,a = environment.step(action)

# Update Q(s,a)
qtable[state, action] = qtable[state, action] + \
alpha * (reward + gamma * np.max(qtable[new_state]) - q
table[state, action])

# Update our current state

state = new_state

# If we have a reward, it means that our outcome is a success

if reward:
outcomes[-1] = "Success"

print()
print('===========================================')
print('Q-table after training:')
print(qtable)

# Plot outcomes
plt.figure(figsize=(12, 5))
plt.xlabel("Run number")
plt.ylabel("Outcome")
ax = plt.gca()
ax.set_facecolor('#efeeea')
plt.bar(range(len(outcomes)), outcomes, color="#0A047A", width=1.0)
plt.show()
Q-table before training:
[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]

===========================================
Q-table after training:
[[0. 0. 0.59049 0. ]
[0. 0. 0.6561 0. ]
[0. 0.729 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0.81 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0.2784375 0. ]
[0. 0.9 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 1. 0. ]
[0. 0. 0. 0. ]]

In [9]:
from IPython.display import clear_output
import time

state = environment.reset()[0]
done = False
sequence = []

while not done:

# Choose the action with the highest value in the current state
if np.max(qtable[state]) > 0:
action = np.argmax(qtable[state])
# If there's no best action (only zeros), take a random one
else:
action = environment.action_space.sample()

# Add the action to the sequence

sequence.append(action)

# Implement this action and move the agent in the desired direction
new_state, reward, done, info,a = environment.step(action)

# Update our current state

state = new_state

# Update the render

clear_output(wait=True)
environment.render()
time.sleep(1)

print(f"Sequence = {sequence}")

Sequence = [2, 2, 1, 1, 1, 2]

In [10]:
episodes = 100
nb_success = 0

# Evaluation
for _ in range(100):
state = environment.reset()[0]
done = False

# Until the agent gets stuck or reaches the goal, keep training it
while not done:
# Choose the action with the highest value in the current state
if np.max(qtable[state]) > 0:
action = np.argmax(qtable[state])

# If there's no best action (only zeros), take a random one

else:
action = environment.action_space.sample()

# Implement this action and move the agent in the desired direction
new_state, reward, done, info,a = environment.step(action)

# Update our current state

state = new_state

# When we get a reward, it means we solved the game

nb_success += reward

# Let's check our success rate!

print (f"Success rate = {nb_success/episodes*100}%")

Success rate = 100.0%

Final Report Volume Iii Design Manual: For Internal Use Only
100% (4)
Final Report Volume Iii Design Manual: For Internal Use Only
59 pages
Torishima Pumps General-Catalog
100% (1)
Torishima Pumps General-Catalog
36 pages
Ass1 Merged Merged
No ratings yet
Ass1 Merged Merged
15 pages
Ass1 Merged Merged
No ratings yet
Ass1 Merged Merged
19 pages
Frozen Lake
No ratings yet
Frozen Lake
6 pages
Class-Work-1 (26-08-2024)
No ratings yet
Class-Work-1 (26-08-2024)
5 pages
ML - 6 - Jupyter Notebook
No ratings yet
ML - 6 - Jupyter Notebook
5 pages
FROZENֹLAKE
No ratings yet
FROZENֹLAKE
2 pages
Practical
No ratings yet
Practical
6 pages
FL QL
No ratings yet
FL QL
5 pages
Intro to Reinforcement Learning - DQ Q AC A3C
No ratings yet
Intro to Reinforcement Learning - DQ Q AC A3C
36 pages
Assignment Week6 AI4ICPS
No ratings yet
Assignment Week6 AI4ICPS
11 pages
REINFORCE_Algorithm
No ratings yet
REINFORCE_Algorithm
15 pages
ponggame (1)
No ratings yet
ponggame (1)
2 pages
Class ActorCritic
No ratings yet
Class ActorCritic
1 page
Son
No ratings yet
Son
2 pages
BBBBBBB
No ratings yet
BBBBBBB
1 page
21BAI10063 MonteCarloLab
No ratings yet
21BAI10063 MonteCarloLab
18 pages
Qlearning Py
No ratings yet
Qlearning Py
3 pages
Q-Learning in RL With Openai Gym: Joo Soon Lee
No ratings yet
Q-Learning in RL With Openai Gym: Joo Soon Lee
34 pages
AI
No ratings yet
AI
11 pages
Part 2 - Building A Deep Q-Network To Play Gridworld - Catastrophic Forgetting and Experience Replay - by NandaKishore Joshi - Towards Data Science
No ratings yet
Part 2 - Building A Deep Q-Network To Play Gridworld - Catastrophic Forgetting and Experience Replay - by NandaKishore Joshi - Towards Data Science
8 pages
Ass 1
No ratings yet
Ass 1
2 pages
Py Code Example 4 1 Gradient MC Evaluation
No ratings yet
Py Code Example 4 1 Gradient MC Evaluation
4 pages
Assignment 1
No ratings yet
Assignment 1
24 pages
Reinforcement Learning - Project 3
No ratings yet
Reinforcement Learning - Project 3
9 pages
Practical No4,5
No ratings yet
Practical No4,5
7 pages
RL UNIT V QA (1)
No ratings yet
RL UNIT V QA (1)
13 pages
Quadcopter
No ratings yet
Quadcopter
7 pages
Symbolic AI MDP DP
No ratings yet
Symbolic AI MDP DP
6 pages
201CS240-ML OBTR 2 (1)
No ratings yet
201CS240-ML OBTR 2 (1)
16 pages
Ai Exp 1-10
No ratings yet
Ai Exp 1-10
26 pages
SOS Midterm
No ratings yet
SOS Midterm
8 pages
AML774 Post Assignment 2
No ratings yet
AML774 Post Assignment 2
4 pages
39-Q Learning Numerical
No ratings yet
39-Q Learning Numerical
13 pages
Q-Learning in C++
No ratings yet
Q-Learning in C++
4 pages
rldl
No ratings yet
rldl
27 pages
lab2_q1_200001064
No ratings yet
lab2_q1_200001064
2 pages
Reinforcement Ch.2
No ratings yet
Reinforcement Ch.2
77 pages
Walking Through Original DQN Paper - by Stas Olekhnovich - Medium
No ratings yet
Walking Through Original DQN Paper - by Stas Olekhnovich - Medium
13 pages
Initialization
No ratings yet
Initialization
16 pages
Algorithms To Solve An MDP
No ratings yet
Algorithms To Solve An MDP
24 pages
Reinforcement Learning - Ipynb - Colaboratory
No ratings yet
Reinforcement Learning - Ipynb - Colaboratory
7 pages
AI Outputs (4,5,6,7)
No ratings yet
AI Outputs (4,5,6,7)
16 pages
Script
No ratings yet
Script
15 pages
ARTIFICIAL INTELLIGENCE Lab
No ratings yet
ARTIFICIAL INTELLIGENCE Lab
8 pages
Technology
No ratings yet
Technology
7 pages
Quadcopter Task
No ratings yet
Quadcopter Task
3 pages
Code Question1-Adaline
No ratings yet
Code Question1-Adaline
29 pages
Part 3 - Building A Deep Q-Network To Play Gridworld - Learning Instability and Target Networks - by NandaKishore Joshi - Towards Data Science
No ratings yet
Part 3 - Building A Deep Q-Network To Play Gridworld - Learning Instability and Target Networks - by NandaKishore Joshi - Towards Data Science
7 pages
FrozenLake_using_Dynamic_programming5.ipynb - Colab
No ratings yet
FrozenLake_using_Dynamic_programming5.ipynb - Colab
6 pages
ile_66
No ratings yet
ile_66
1 page
Lab#08 - (2018 BCS 076)
No ratings yet
Lab#08 - (2018 BCS 076)
10 pages
RL EXP 5
No ratings yet
RL EXP 5
2 pages
le_67
No ratings yet
le_67
1 page
Implement the Knn (2)
No ratings yet
Implement the Knn (2)
5 pages
CVDL(Practical No. 3)
No ratings yet
CVDL(Practical No. 3)
1 page
(P) Program AIO
No ratings yet
(P) Program AIO
22 pages
1 You Will Work With A
No ratings yet
1 You Will Work With A
2 pages
Dadi
No ratings yet
Dadi
5 pages
Develop Snakes & Ladders Game Complete Guide with Code & Design
From Everand
Develop Snakes & Ladders Game Complete Guide with Code & Design
Anurag Pandey
No ratings yet
Develop Snake & Ladder Game in an Hour (Complete Guide with Code & Design)
From Everand
Develop Snake & Ladder Game in an Hour (Complete Guide with Code & Design)
Anurag Pandey
No ratings yet
Chapter Text - IFRS 15
No ratings yet
Chapter Text - IFRS 15
6 pages
7-GANS CO2 - (2placas de ZN y 2 LEDs) PDF
100% (2)
7-GANS CO2 - (2placas de ZN y 2 LEDs) PDF
17 pages
Fractals
No ratings yet
Fractals
20 pages
Word Version - Thermo 2
100% (1)
Word Version - Thermo 2
209 pages
Your Electronic Document(s)
No ratings yet
Your Electronic Document(s)
2 pages
Session 5 Airport Design - Airside
No ratings yet
Session 5 Airport Design - Airside
47 pages
Manual Controlador de Temperatura Red Lion
No ratings yet
Manual Controlador de Temperatura Red Lion
28 pages
Full download Neurovision Rehabilitation Guide 1st Edition Amy Chang pdf docx
100% (4)
Full download Neurovision Rehabilitation Guide 1st Edition Amy Chang pdf docx
48 pages
Models of Emotional Intelligence
No ratings yet
Models of Emotional Intelligence
25 pages
Staff Team Meeting Minutes
No ratings yet
Staff Team Meeting Minutes
2 pages
Toray Industries
No ratings yet
Toray Industries
6 pages
Atg Met 1 Lesson 2 Imf
100% (1)
Atg Met 1 Lesson 2 Imf
5 pages
Katalog Indonesia Dream Juice: Badjigur
No ratings yet
Katalog Indonesia Dream Juice: Badjigur
23 pages
Git and Github
No ratings yet
Git and Github
4 pages
DABUR INDIA VS KR INDUSTRIESfinal
No ratings yet
DABUR INDIA VS KR INDUSTRIESfinal
4 pages
Process Manual PMKVY-TI Dec 2016
No ratings yet
Process Manual PMKVY-TI Dec 2016
21 pages
Gen Math Week 1 Part 1
No ratings yet
Gen Math Week 1 Part 1
25 pages
Testing: 5.1 Object Oriented Mehodologies
No ratings yet
Testing: 5.1 Object Oriented Mehodologies
18 pages
Task 2 Business Ideas 1
No ratings yet
Task 2 Business Ideas 1
4 pages
Dimension-9150 Service Manual En-Us
No ratings yet
Dimension-9150 Service Manual En-Us
69 pages
Chemistry Today Magazine
No ratings yet
Chemistry Today Magazine
84 pages
Module Facilitating Learning Complete
No ratings yet
Module Facilitating Learning Complete
157 pages
PW2 Input Output
No ratings yet
PW2 Input Output
7 pages
Deterioration in Pregnant or Recently Delivered Women Meows
No ratings yet
Deterioration in Pregnant or Recently Delivered Women Meows
17 pages
Market Forecast - Philippines E-Commerce Industry Statistics
No ratings yet
Market Forecast - Philippines E-Commerce Industry Statistics
13 pages
Project Fact Sheet Template
No ratings yet
Project Fact Sheet Template
1 page
WPP056 - 2012 - Mitra Utama
No ratings yet
WPP056 - 2012 - Mitra Utama
2 pages

Ass 2

Uploaded by

Ass 2

Uploaded by

ASSIGNMENT - 2

# MAKING A GYM ENVIRONMENT FOR THE GAME

import matplotlib.pyplot as plt

# We re-initialize the Q-table

# List of outcomes to plot

print('Q-table before training:')

# By default, we consider our outcome to be a failure

# If there's no best action (only zeros), take a random one

# Update our current state

# If we have a reward, it means that our outcome is a success

while not done:

# Add the action to the sequence

# Update our current state

# Update the render

# If there's no best action (only zeros), take a random one

# Update our current state

# When we get a reward, it means we solved the game

# Let's check our success rate!

Success rate = 100.0%

You might also like