Untitled
unknown
python
4 years ago
2.5 kB
5
Indexable
def optimize_params(problem_id=0, episodes=350): print("Optimizing for problem:", problem_id) max_avg_reward = np.NINF avg_reward_params = np.NINF # I have chosen these parameters based off of manual fine-tuning BUFFER_SIZES = [100, 1000, 10000] BATCH_SIZES = [50, 100, 200] alphas = [0.00001, 0.0001, 0.001, 0.01] for BUFFER_S in BUFFER_SIZES: for BATCH_S in BATCH_SIZES: for alphaa in alphas: env = virl.Epidemic(problem_id=problem_id) d_states = env.observation_space.shape[0], n_actions = env.action_space.n, alpha= alphaa, # learning rate/stepsize, 0.001 seems to be a good choice nn_config = [24,24], # size of the hidden layers in the MLP [24,24 seems to be a good choice] BATCH_SIZE = BATCH_S, # number of samples in a batch BUFFER_SIZE = BUFFER_S # size of the replay buffer # Init the two networks nn_func_approximator = NNFunctionApproximatorJointKeras(alpha, d_states, n_actions, nn_config) nn_func_approximator_target = NNFunctionApproximatorJointKeras(alpha, d_states, n_actions, nn_config) # Train agent and return the stats stats = q_learning_nn(env,nn_func_approximator, nn_func_approximator_target, 500, max_steps_per_episode=10, epsilon_init=0.1, epsilon_decay=0.995, epsilon_min=0.001, fn_model_in=None, fn_model_out="virl_temp.h5") reward = max(stats.episode_rewards) avg_reward = avg(stats.episode_rewards) if reward > max_reward: max_reward = reward max_reward_params = {"BUFFER SIZE": BUFFER_S, "BATCH_SIZE": BATCH_S, "alpha": alphaa} if avg_reward > max_avg_reward: max_avg_reward = avg_reward avg_reward_params = {"BUFFER SIZE": BUFFER_S, "BATCH_SIZE": BATCH_S, "alpha": alphaa} avg_reward = avg(stats.episode_rewards) print(f"\rmax reward (BUFFER SIZE: {BUFFER_S}, BATCH_SIZE: {BATCH_S}, alpha: {alphaa}):", reward, "avg:", avg_reward) print("\nFinished!") print("avg_reward:", max_avg_reward, "params:", avg_reward_params)
Editor is loading...