Untitled
unknown
python
5 years ago
2.5 kB
8
Indexable
def optimize_params(problem_id=0, episodes=350):
print("Optimizing for problem:", problem_id)
max_avg_reward = np.NINF
avg_reward_params = np.NINF
# I have chosen these parameters based off of manual fine-tuning
BUFFER_SIZES = [100, 1000, 10000]
BATCH_SIZES = [50, 100, 200]
alphas = [0.00001, 0.0001, 0.001, 0.01]
for BUFFER_S in BUFFER_SIZES:
for BATCH_S in BATCH_SIZES:
for alphaa in alphas:
env = virl.Epidemic(problem_id=problem_id)
d_states = env.observation_space.shape[0],
n_actions = env.action_space.n,
alpha= alphaa, # learning rate/stepsize, 0.001 seems to be a good choice
nn_config = [24,24], # size of the hidden layers in the MLP [24,24 seems to be a good choice]
BATCH_SIZE = BATCH_S, # number of samples in a batch
BUFFER_SIZE = BUFFER_S # size of the replay buffer
# Init the two networks
nn_func_approximator = NNFunctionApproximatorJointKeras(alpha, d_states, n_actions, nn_config)
nn_func_approximator_target = NNFunctionApproximatorJointKeras(alpha, d_states, n_actions, nn_config)
# Train agent and return the stats
stats = q_learning_nn(env,nn_func_approximator, nn_func_approximator_target, 500, max_steps_per_episode=10,
epsilon_init=0.1, epsilon_decay=0.995, epsilon_min=0.001,
fn_model_in=None, fn_model_out="virl_temp.h5")
reward = max(stats.episode_rewards)
avg_reward = avg(stats.episode_rewards)
if reward > max_reward:
max_reward = reward
max_reward_params = {"BUFFER SIZE": BUFFER_S, "BATCH_SIZE": BATCH_S, "alpha": alphaa}
if avg_reward > max_avg_reward:
max_avg_reward = avg_reward
avg_reward_params = {"BUFFER SIZE": BUFFER_S, "BATCH_SIZE": BATCH_S, "alpha": alphaa}
avg_reward = avg(stats.episode_rewards)
print(f"\rmax reward (BUFFER SIZE: {BUFFER_S}, BATCH_SIZE: {BATCH_S}, alpha: {alphaa}):", reward, "avg:", avg_reward)
print("\nFinished!")
print("avg_reward:", max_avg_reward, "params:", avg_reward_params)Editor is loading...