Py Code Example 4 1 Gradient MC Evaluation
Py Code Example 4 1 Gradient MC Evaluation
##################
seed = 543
from numpy import random
random.seed(seed)
from tensorflow import set_random_seed
set_random_seed(seed)
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
# parameter settings
nrow = 4
ncol = 4
discount = 1.0
eps = 1.0e-5
#in_place = False
in_place = True # use most updated values; with each new value immediately
overwriting the old one
def is_terminal(state):
status = False
x = state[0]
y = state[1]
if (x == 0) and (y == 0):
status = True
if (x == nrow - 1) and y == (ncol - 1):
status = True
return status
model = Sequential()
model.add(Dense(units = 20, input_dim = 3, kernel_initializer = 'uniform',
activation = 'relu'))
model.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 10, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'linear'))
model.summary()
x_must = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [3, 3, 0], [3, 3, 1], [3, 3,
2], [3, 3, 3]]
x_must = np.array(x_must)
y_must = np.zeros(8)
# main loop
#episode_num = 1000
seed = 543
np.random.seed(seed) # Set random seed for reproducibility.
for i in range(nrow):
for j in range(ncol):
for k in range(action_num):
predictor = np.reshape([i, j, k], newshape = (1, 3))
q_value = model.predict(predictor)[0]
state_action_value[i, j, k] = q_value
print("state-action = ", [i, j, k], "state-action value = ",
q_value)
for i in range(nrow):
for j in range(ncol):
q_values = state_action_value[i, j, :]
state_value[i, j] = np.max(q_values)
index_max = np.argmax(q_values)
greedy_action[i, j, index_max] = 1
print("state = ", [i, j], "state value = ", state_value[i, j])
for i in range(nrow):
for j in range(ncol):
index_max = which(greedy_action[i, j, :] == 1)
print("state = ", [i, j], " greedy action = ", action_word[index_max])
##################