Open navigation menu
Close suggestions
Search
Search
en
Change Language
Upload
Sign in
Sign in
Download free for days
0 ratings
0% found this document useful (0 votes)
27 views
Class ActorCritic
Uploaded by
Laura Cobeña
AI-enhanced title
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
Download now
Download
Save class ActorCritic For Later
Download
Save
Save class ActorCritic For Later
0%
0% found this document useful, undefined
0%
, undefined
Embed
Share
Print
Report
0 ratings
0% found this document useful (0 votes)
27 views
Class ActorCritic
Uploaded by
Laura Cobeña
AI-enhanced title
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
Download now
Download
Save class ActorCritic For Later
Carousel Previous
Carousel Next
Save
Save class ActorCritic For Later
0%
0% found this document useful, undefined
0%
, undefined
Embed
Share
Print
Report
Download now
Download
You are on page 1
/ 1
Search
Fullscreen
for steps in range(num_steps):
#llamamos actor critic que nos da el valor y estrategia
# Plot results
value, policy_dist = actor_critic(state)
def moving_average(a, n=3):
# itera sobre el estado hasta que el poste se caiga o se c
ret = np.cumsum(a, dtype=float)
ret[n:] = ret[n:] - ret[:-n]
# desacoplamos del gradiente y convertimos en array return ret[n - 1:] / n
value = value.detach().numpy()[0,0]
dist = policy_dist.detach().numpy() all_rewards = np.asarray(all_rewards)
class ActorCritic(nn.Module): smoothend_rewards = moving_average(all_rewards, 30)
#toma acción basada en la disftribución de la red,
# red que tiene dos subredes dentro uno el actor y otro el crítico
dependiento el numero de outputs (2) por moverse de izda a
# se puede hacer por separado o que compartan parámetros return actor_critic
derecha
def __init__(self, num_inputs, num_actions, hidden_size,
learning_rate=3e-4):
action = np.random.choice(num_outputs,
super(ActorCritic, self).__init__()
p=np.squeeze(dist))
log_prob = torch.log(policy_dist.squeeze(0)[action])
self.num_actions = num_actions
entropy = -np.sum(np.mean(dist) * np.log(dist))
self.critic_linear1 = nn.Linear(num_inputs, hidden_size)
new_state, reward, terminated, truncated, _ =
self.critic_linear2 = nn.Linear(hidden_size, 1)
env.step(action)
done = terminated or truncated
self.actor_linear1 = nn.Linear(num_inputs, hidden_size)
self.actor_linear2 = nn.Linear(hidden_size, num_actions)
rewards.append(reward)
values.append(value)
def forward(self, state):
log_probs.append(log_prob)
state = Variable(torch.from_numpy(state).float().unsqueeze(0))
entropy_term += entropy
value = F.relu(self.critic_linear1(state))
state = new_state
value = self.critic_linear2(value)
if done or steps == num_steps-1:
policy_dist = F.relu(self.actor_linear1(state))
Qval, _ = actor_critic.forward(new_state)
policy_dist = F.softmax(self.actor_linear2(policy_dist), dim=1)
Qval = Qval.detach().numpy()[0,0]
all_rewards.append(np.sum(rewards))
return value, policy_dist
all_lengths.append(steps)
average_lengths.append(np.mean(all_lengths[-10:]))
def a2c(env): if episode % 10 == 0:
num_inputs = env.observation_space.shape[0] sys.stdout.write("episode: {}, reward: {}, total length: {}, average
num_outputs = env.action_space.n length: {} \n".format(episode, np.sum(rewards), steps, average_lengths[-1]))
break
# definimos alctor y critico que es modelo conjunto
# compute Q values
actor_critic = ActorCritic(num_inputs, num_outputs,
Qvals = np.zeros_like(values)
hidden_size) for t in reversed(range(len(rewards))):
# definimos el optimizador Qval = rewards[t] + GAMMA * Qval
ac_optimizer = optim.Adam(actor_critic.parameters(), Qvals[t] = Qval
lr=learning_rate)
#update actor critic
all_lengths = [] values = torch.FloatTensor(values)
Qvals = torch.FloatTensor(Qvals)
average_lengths = []
log_probs = torch.stack(log_probs)
all_rewards = []
entropy_term = 0 advantage = Qvals - values
actor_loss = (-log_probs * advantage).mean()
for episode in range(max_episodes): critic_loss = 0.5 * advantage.pow(2).mean()
log_probs = [] ac_loss = actor_loss + critic_loss + 0.001 * entropy_term
values = []
ac_optimizer.zero_grad()
rewards = []
ac_loss.backward()
ac_optimizer.step()
state, info = env.reset() # reinicializamos entorno y simulacion
You might also like
SOP For Computer System Validation
PDF
0% (2)
SOP For Computer System Validation
30 pages
Xact User
PDF
No ratings yet
Xact User
268 pages
REINFORCE_Algorithm
PDF
No ratings yet
REINFORCE_Algorithm
15 pages
import gym
PDF
No ratings yet
import gym
4 pages
Qlearning Py
PDF
No ratings yet
Qlearning Py
3 pages
Quadcopter
PDF
No ratings yet
Quadcopter
7 pages
Ass1 Merged Merged
PDF
No ratings yet
Ass1 Merged Merged
19 pages
Py Code Example 4 1 Gradient MC Evaluation
PDF
No ratings yet
Py Code Example 4 1 Gradient MC Evaluation
4 pages
Ass1 Merged Merged
PDF
No ratings yet
Ass1 Merged Merged
15 pages
Frozen Lake
PDF
No ratings yet
Frozen Lake
6 pages
RBF Based Reinforcement Learning
PDF
No ratings yet
RBF Based Reinforcement Learning
14 pages
Part 3 - Building A Deep Q-Network To Play Gridworld - Learning Instability and Target Networks - by NandaKishore Joshi - Towards Data Science
PDF
No ratings yet
Part 3 - Building A Deep Q-Network To Play Gridworld - Learning Instability and Target Networks - by NandaKishore Joshi - Towards Data Science
7 pages
Ass 2
PDF
No ratings yet
Ass 2
4 pages
Assignment Week6 AI4ICPS
PDF
No ratings yet
Assignment Week6 AI4ICPS
11 pages
Practical No4,5
PDF
No ratings yet
Practical No4,5
7 pages
Practical
PDF
No ratings yet
Practical
6 pages
Part 2 - Building A Deep Q-Network To Play Gridworld - Catastrophic Forgetting and Experience Replay - by NandaKishore Joshi - Towards Data Science
PDF
No ratings yet
Part 2 - Building A Deep Q-Network To Play Gridworld - Catastrophic Forgetting and Experience Replay - by NandaKishore Joshi - Towards Data Science
8 pages
ML - 6 - Jupyter Notebook
PDF
No ratings yet
ML - 6 - Jupyter Notebook
5 pages
Code Principale de Ait Omar
PDF
No ratings yet
Code Principale de Ait Omar
13 pages
Math Lab 1
PDF
No ratings yet
Math Lab 1
7 pages
Reinforcement Learning - Project 3
PDF
No ratings yet
Reinforcement Learning - Project 3
9 pages
01 Module 2 Neural Network Based Reinforcement Learning
PDF
No ratings yet
01 Module 2 Neural Network Based Reinforcement Learning
133 pages
Class-Work-1 (26-08-2024)
PDF
No ratings yet
Class-Work-1 (26-08-2024)
5 pages
RL UNIT V QA (1)
PDF
No ratings yet
RL UNIT V QA (1)
13 pages
lab-report-03
PDF
No ratings yet
lab-report-03
14 pages
Lecture2 Drl A
PDF
No ratings yet
Lecture2 Drl A
39 pages
Week 2 - Lab
PDF
No ratings yet
Week 2 - Lab
9 pages
Walking Through Original DQN Paper - by Stas Olekhnovich - Medium
PDF
No ratings yet
Walking Through Original DQN Paper - by Stas Olekhnovich - Medium
13 pages
201CS240-ML OBTR 2 (1)
PDF
No ratings yet
201CS240-ML OBTR 2 (1)
16 pages
FROZENֹLAKE
PDF
No ratings yet
FROZENֹLAKE
2 pages
Codigo Modelo
PDF
No ratings yet
Codigo Modelo
5 pages
Initialization
PDF
No ratings yet
Initialization
16 pages
Neural Network Assignment
PDF
No ratings yet
Neural Network Assignment
6 pages
RNN + RL: Shusen Wang
PDF
No ratings yet
RNN + RL: Shusen Wang
51 pages
Software Laboratory II Code
PDF
No ratings yet
Software Laboratory II Code
27 pages
(P) Program AIO
PDF
No ratings yet
(P) Program AIO
22 pages
Machine Learning practical file
PDF
No ratings yet
Machine Learning practical file
31 pages
Training With Proximal Policy Optimization Training With Proximal Policy Optimization
PDF
No ratings yet
Training With Proximal Policy Optimization Training With Proximal Policy Optimization
7 pages
drl_v5
PDF
No ratings yet
drl_v5
64 pages
IBest_DeepLearning
PDF
No ratings yet
IBest_DeepLearning
123 pages
Deep RL Tutorial Small
PDF
No ratings yet
Deep RL Tutorial Small
66 pages
Implement the Knn (2)
PDF
No ratings yet
Implement the Knn (2)
5 pages
Ee126 Project 1
PDF
No ratings yet
Ee126 Project 1
5 pages
txt
PDF
No ratings yet
txt
7 pages
ML Record Print
PDF
No ratings yet
ML Record Print
20 pages
Ad Py
PDF
No ratings yet
Ad Py
2 pages
R Deep Neural Network Step by Step
PDF
No ratings yet
R Deep Neural Network Step by Step
27 pages
neural net python sleep study -
PDF
No ratings yet
neural net python sleep study -
3 pages
Assignment Ai Paltforms Mostafa Hazem
PDF
No ratings yet
Assignment Ai Paltforms Mostafa Hazem
5 pages
Assignment 1: Q1. Task Description
PDF
No ratings yet
Assignment 1: Q1. Task Description
12 pages
Autoencoder From Scratch
PDF
No ratings yet
Autoencoder From Scratch
21 pages
Soft Computing Practical Teacher Manual
PDF
No ratings yet
Soft Computing Practical Teacher Manual
87 pages
Submitted By: Ms Raheela Submitted By: Numair Amin Reg No: FA17-BCS-061-B Date: 13-11-2020
PDF
No ratings yet
Submitted By: Ms Raheela Submitted By: Numair Amin Reg No: FA17-BCS-061-B Date: 13-11-2020
13 pages
Exam_Prep_Exercises034534123124
PDF
No ratings yet
Exam_Prep_Exercises034534123124
20 pages
Deep Learning Lectures - 2
PDF
No ratings yet
Deep Learning Lectures - 2
73 pages
Week 4 - Lab
PDF
No ratings yet
Week 4 - Lab
7 pages
Python Code PDF
PDF
No ratings yet
Python Code PDF
3 pages
Assignment-1 (MLP From Scratch) : Roll No: EDM18B055
PDF
No ratings yet
Assignment-1 (MLP From Scratch) : Roll No: EDM18B055
1 page
Machine Learning - Lab Manual
PDF
No ratings yet
Machine Learning - Lab Manual
35 pages
Adaptive Linear Neuron Using Linear (Identity) Activation Function With Batch Gradient Method
PDF
No ratings yet
Adaptive Linear Neuron Using Linear (Identity) Activation Function With Batch Gradient Method
19 pages
Week 6 - Lab
PDF
No ratings yet
Week 6 - Lab
5 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
LTRDCN-2079 ACI and OpenShift
PDF
No ratings yet
LTRDCN-2079 ACI and OpenShift
58 pages
Creating Descriptive Flex Fields
PDF
No ratings yet
Creating Descriptive Flex Fields
9 pages
sfc
PDF
No ratings yet
sfc
8 pages
OTT-Existing Censorship Laws and Recommendations
PDF
No ratings yet
OTT-Existing Censorship Laws and Recommendations
23 pages
8 Channel Current To Modbus Rtu Dat10017 I
PDF
No ratings yet
8 Channel Current To Modbus Rtu Dat10017 I
3 pages
Study of Consumer Behaviour Towards Reliance Jio: Executive Summary
PDF
No ratings yet
Study of Consumer Behaviour Towards Reliance Jio: Executive Summary
41 pages
Lect 1a-Cadastral Modules
PDF
No ratings yet
Lect 1a-Cadastral Modules
22 pages
Logcat 1714497243765
PDF
No ratings yet
Logcat 1714497243765
8 pages
Rashtrasant Tukadoji Maharaj Nagpur University: AS231301 8H78XB
PDF
No ratings yet
Rashtrasant Tukadoji Maharaj Nagpur University: AS231301 8H78XB
58 pages
New CG gr.3
PDF
No ratings yet
New CG gr.3
6 pages
DM - Graph Connectivity: NGUYEN Hoang Thach
PDF
No ratings yet
DM - Graph Connectivity: NGUYEN Hoang Thach
16 pages
The English Portfolio: Table of Content
PDF
No ratings yet
The English Portfolio: Table of Content
2 pages
Analytics 2022 12 26 020011
PDF
No ratings yet
Analytics 2022 12 26 020011
181 pages
Defense Against Distributed Dos Attack Detection by Using Intelligent Evolutionary Algorithm
PDF
No ratings yet
Defense Against Distributed Dos Attack Detection by Using Intelligent Evolutionary Algorithm
12 pages
TestKing - Lpi.117 102.general - Linux.q.and.a.v2.0 SSG
PDF
No ratings yet
TestKing - Lpi.117 102.general - Linux.q.and.a.v2.0 SSG
71 pages
TEAM - 8 (r2)
PDF
No ratings yet
TEAM - 8 (r2)
18 pages
Iris Imaging For Health Diagnostics: Tania Weidan Yu
PDF
No ratings yet
Iris Imaging For Health Diagnostics: Tania Weidan Yu
51 pages
Full-Disk Backup of Your PC With Macrium Reflect
PDF
No ratings yet
Full-Disk Backup of Your PC With Macrium Reflect
10 pages
HDL Designer Series Language Support Guide: Release v2019.4
PDF
No ratings yet
HDL Designer Series Language Support Guide: Release v2019.4
82 pages
Unit 1 Mod 1 Structuring The Use Case Model
PDF
No ratings yet
Unit 1 Mod 1 Structuring The Use Case Model
28 pages
Resume_Rishabh_Raj
PDF
No ratings yet
Resume_Rishabh_Raj
1 page
InsightVM SlideDeck PDF
PDF
No ratings yet
InsightVM SlideDeck PDF
191 pages
Smsno 1
PDF
No ratings yet
Smsno 1
2 pages
Alpha Sense
PDF
No ratings yet
Alpha Sense
5 pages
The Art of Penetration Testing a Practical Guide
PDF
50% (2)
The Art of Penetration Testing a Practical Guide
221 pages
Setup SAP Business One Mailer With Office 365 Email Account
PDF
No ratings yet
Setup SAP Business One Mailer With Office 365 Email Account
5 pages
Project Format FOR AKTU
PDF
No ratings yet
Project Format FOR AKTU
18 pages
pdf
PDF
No ratings yet
pdf
5 pages