Untitled
unknown
python
a year ago
2.7 kB
52
Indexable
import gymnasium as gym
import yaml
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import time
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import os
start = time.time()
# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load the configuration
config = yaml.safe_load("""
LunarLander-v2:
n_timesteps: !!float 1e5
policy: 'MlpPolicy'
learning_rate: !!float 6.3e-4
batch_size: 128
buffer_size: 50000
learning_starts: 0
gamma: 0.99
target_update_interval: 250
train_freq: 4
gradient_steps: -1
exploration_fraction: 0.12
exploration_final_eps: 0.1
policy_kwargs: "dict(net_arch=[256, 256])"
""")
# Extract the configuration for LunarLander-v2
lunar_config = config['LunarLander-v2']
# Create log dir
log_dir = "tensorboard_logs"
os.makedirs(log_dir, exist_ok=True)
print("To view the TensorBoard, run:")
print(f"tensorboard --logdir {log_dir}")
# Create the environment
env = gym.make("LunarLander-v2")
env = Monitor(env)
env = DummyVecEnv([lambda: env])
# Create the DQN model with the specified hyperparameters
model = DQN(
policy=lunar_config['policy'],
env=env,
learning_rate=lunar_config['learning_rate'],
batch_size=lunar_config['batch_size'],
buffer_size=lunar_config['buffer_size'],
learning_starts=lunar_config['learning_starts'],
gamma=lunar_config['gamma'],
target_update_interval=lunar_config['target_update_interval'],
train_freq=lunar_config['train_freq'],
gradient_steps=lunar_config['gradient_steps'],
exploration_fraction=lunar_config['exploration_fraction'],
exploration_final_eps=lunar_config['exploration_final_eps'],
policy_kwargs=eval(lunar_config['policy_kwargs']),
device=device,
verbose=1,
tensorboard_log=log_dir
)
new_logger = configure(log_dir, ["tensorboard", "stdout"])
model.set_logger(new_logger)
# Create an evaluation callback
eval_env = gym.make("LunarLander-v2")
eval_env = Monitor(eval_env)
eval_env = DummyVecEnv([lambda: eval_env])
eval_callback = EvalCallback(
eval_env,
best_model_save_path="./logs/",
log_path="./logs/",
eval_freq=10000,
deterministic=True,
render=False
)
# Train the model
model.learn(total_timesteps=int(lunar_config['n_timesteps']), callback=eval_callback)
# Save the final model
model.save("dqn_lunarlander")
# Clean up
env.close()
eval_env.close()
print(f"The total process took = {time.time() - start} seconds")Editor is loading...
Leave a Comment