@@ -13,7 +13,7 @@ def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
1313 wd_dict = {}
1414 h1 = tf .nn .elu (dense (X , 64 , "h1" , weight_init = U .normc_initializer (1.0 ), bias_init = 0 , weight_loss_dict = wd_dict ))
1515 h2 = tf .nn .elu (dense (h1 , 64 , "h2" , weight_init = U .normc_initializer (1.0 ), bias_init = 0 , weight_loss_dict = wd_dict ))
16- vpred_n = dense (h2 , 1 , "hfinal" , weight_init = U . normc_initializer ( 1.0 ) , bias_init = 0 , weight_loss_dict = wd_dict )[:,0 ]
16+ vpred_n = dense (h2 , 1 , "hfinal" , weight_init = None , bias_init = 0 , weight_loss_dict = wd_dict )[:,0 ]
1717 sample_vpred_n = vpred_n + tf .random_normal (tf .shape (vpred_n ))
1818 wd_loss = tf .get_collection ("vf_losses" , None )
1919 loss = U .mean (tf .square (vpred_n - vtarg_n )) + tf .add_n (wd_loss )
@@ -22,7 +22,7 @@ def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
2222 optim = kfac .KfacOptimizer (learning_rate = 0.001 , cold_lr = 0.001 * (1 - 0.9 ), momentum = 0.9 , \
2323 clip_kl = 0.3 , epsilon = 0.1 , stats_decay = 0.95 , \
2424 async = 1 , kfac_update = 2 , cold_iter = 50 , \
25- weight_decay_dict = wd_dict , max_grad_norm = None )
25+ weight_decay_dict = wd_dict , max_grad_norm = 1.0 )
2626 vf_var_list = []
2727 for var in tf .trainable_variables ():
2828 if "vf" in var .name :
0 commit comments