Untitled

mail@pastecode.io avatar
unknown
python
11 days ago
2.7 kB
31
Indexable
Never
import gymnasium as gym
import yaml
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import time
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import os

start = time.time()

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the configuration
config = yaml.safe_load("""
LunarLander-v2:
  n_timesteps: !!float 1e5
  policy: 'MlpPolicy'
  learning_rate: !!float 6.3e-4
  batch_size: 128
  buffer_size: 50000
  learning_starts: 0
  gamma: 0.99
  target_update_interval: 250
  train_freq: 4
  gradient_steps: -1
  exploration_fraction: 0.12
  exploration_final_eps: 0.1
  policy_kwargs: "dict(net_arch=[256, 256])"
""")

# Extract the configuration for LunarLander-v2
lunar_config = config['LunarLander-v2']

# Create log dir
log_dir = "tensorboard_logs"
os.makedirs(log_dir, exist_ok=True)
print("To view the TensorBoard, run:")
print(f"tensorboard --logdir {log_dir}")

# Create the environment
env = gym.make("LunarLander-v2")
env = Monitor(env)
env = DummyVecEnv([lambda: env])

# Create the DQN model with the specified hyperparameters
model = DQN(
    policy=lunar_config['policy'],
    env=env,
    learning_rate=lunar_config['learning_rate'],
    batch_size=lunar_config['batch_size'],
    buffer_size=lunar_config['buffer_size'],
    learning_starts=lunar_config['learning_starts'],
    gamma=lunar_config['gamma'],
    target_update_interval=lunar_config['target_update_interval'],
    train_freq=lunar_config['train_freq'],
    gradient_steps=lunar_config['gradient_steps'],
    exploration_fraction=lunar_config['exploration_fraction'],
    exploration_final_eps=lunar_config['exploration_final_eps'],
    policy_kwargs=eval(lunar_config['policy_kwargs']),
    device=device,
    verbose=1,
    tensorboard_log=log_dir

)

new_logger = configure(log_dir, ["tensorboard", "stdout"])
model.set_logger(new_logger)

# Create an evaluation callback
eval_env = gym.make("LunarLander-v2")
eval_env = Monitor(eval_env)
eval_env = DummyVecEnv([lambda: eval_env])

eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./logs/",
    log_path="./logs/",
    eval_freq=10000,
    deterministic=True,
    render=False
)

# Train the model
model.learn(total_timesteps=int(lunar_config['n_timesteps']), callback=eval_callback)

# Save the final model
model.save("dqn_lunarlander")

# Clean up
env.close()
eval_env.close()

print(f"The total process took = {time.time() - start} seconds")
Leave a Comment