Untitled
unknown
python
11 days ago
2.7 kB
31
Indexable
Never
import gymnasium as gym import yaml import torch from stable_baselines3 import DQN from stable_baselines3.common.vec_env import DummyVecEnv from stable_baselines3.common.callbacks import EvalCallback import time from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.logger import configure import os start = time.time() # Check CUDA availability device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Load the configuration config = yaml.safe_load(""" LunarLander-v2: n_timesteps: !!float 1e5 policy: 'MlpPolicy' learning_rate: !!float 6.3e-4 batch_size: 128 buffer_size: 50000 learning_starts: 0 gamma: 0.99 target_update_interval: 250 train_freq: 4 gradient_steps: -1 exploration_fraction: 0.12 exploration_final_eps: 0.1 policy_kwargs: "dict(net_arch=[256, 256])" """) # Extract the configuration for LunarLander-v2 lunar_config = config['LunarLander-v2'] # Create log dir log_dir = "tensorboard_logs" os.makedirs(log_dir, exist_ok=True) print("To view the TensorBoard, run:") print(f"tensorboard --logdir {log_dir}") # Create the environment env = gym.make("LunarLander-v2") env = Monitor(env) env = DummyVecEnv([lambda: env]) # Create the DQN model with the specified hyperparameters model = DQN( policy=lunar_config['policy'], env=env, learning_rate=lunar_config['learning_rate'], batch_size=lunar_config['batch_size'], buffer_size=lunar_config['buffer_size'], learning_starts=lunar_config['learning_starts'], gamma=lunar_config['gamma'], target_update_interval=lunar_config['target_update_interval'], train_freq=lunar_config['train_freq'], gradient_steps=lunar_config['gradient_steps'], exploration_fraction=lunar_config['exploration_fraction'], exploration_final_eps=lunar_config['exploration_final_eps'], policy_kwargs=eval(lunar_config['policy_kwargs']), device=device, verbose=1, tensorboard_log=log_dir ) new_logger = configure(log_dir, ["tensorboard", "stdout"]) model.set_logger(new_logger) # Create an evaluation callback eval_env = gym.make("LunarLander-v2") eval_env = Monitor(eval_env) eval_env = DummyVecEnv([lambda: eval_env]) eval_callback = EvalCallback( eval_env, best_model_save_path="./logs/", log_path="./logs/", eval_freq=10000, deterministic=True, render=False ) # Train the model model.learn(total_timesteps=int(lunar_config['n_timesteps']), callback=eval_callback) # Save the final model model.save("dqn_lunarlander") # Clean up env.close() eval_env.close() print(f"The total process took = {time.time() - start} seconds")
Leave a Comment