Untitled
unknown
plain_text
2 years ago
6.2 kB
26
Indexable
import pyboy
import gym
import numpy as np
import pickle
import multiprocessing
from pyboy import PyBoy, WindowEvent
from gym import spaces
# Custom Gym environment for Pokemon
class PokemonEnv(gym.Env):
def __init__(self, pyboy_instance):
self.pyboy = pyboy_instance
# Additional RAM locations
self.pokemon_in_party_addr = 0xD163
self.pokemon_data_start_addr = 0xD164
self.level_addr_offset = 0xD18C
self.current_hp_offset = 0xD16C
self.gym_badges_addr = 0xD755
# Initialize reward variables
self.total_level = 0
self.total_healing = 0
self.step_count = 0
self.max_steps = 1000 # Adjust as needed
self.max_badges_reward = 0
# Define a simple discrete action space
self.action_space = spaces.Discrete(8) # Adjust the size based on your actions
def read_byte(self, address):
return self.pyboy.get_memory_value(address)
def read_word(self, address):
return self.pyboy.get_memory_value(address) + (self.pyboy.get_memory_value(address + 1) << 8)
def get_pokemon_data(self, pokemon_addr):
pokemon_data = {
'pokemon_id': self.read_byte(pokemon_addr),
'current_hp': self.read_word(pokemon_addr + self.current_hp_offset),
'level': self.read_byte(pokemon_addr + self.level_addr_offset)
}
return pokemon_data
def step(self, action):
# Take action in the emulator
self.pyboy.tick()
state = self.pyboy.botsupport_manager().screen()
# Compute the reward and check if the episode is done
reward = 0.0
done = False
# Retrieve information from RAM locations
pokemon_in_party = self.read_byte(self.pokemon_in_party_addr)
gym_badges = self.read_byte(self.gym_badges_addr)
# Iterate over Pokémon in the party
for i in range(pokemon_in_party):
pokemon_data_addr = self.pokemon_data_start_addr + i * 7
pokemon_data = self.get_pokemon_data(pokemon_data_addr)
# Update reward variables
self.total_level += pokemon_data['level']
self.total_healing += pokemon_data['current_hp']
# Example: Reward based on level
reward += self.total_level_reward * pokemon_data['level']
# Example: Reward for gym badges
reward += self.max_badges_reward * gym_badges
# Update step count
self.step_count += 1
# Apply penalties for long episodes
if self.step_count > self.max_steps:
done = True
return state, reward, done, {}
# Your AI agent class
class PokemonAgent:
def __init__(self, action_space):
self.action_space = action_space
self.valid_actions = [
WindowEvent.PRESS_ARROW_DOWN,
WindowEvent.PRESS_ARROW_LEFT,
WindowEvent.PRESS_ARROW_RIGHT,
WindowEvent.PRESS_ARROW_UP,
WindowEvent.PRESS_BUTTON_A,
WindowEvent.PRESS_BUTTON_B,
WindowEvent.PRESS_BUTTON_START,
WindowEvent.PASS
]
self.release_arrow = [
WindowEvent.RELEASE_ARROW_DOWN,
WindowEvent.RELEASE_ARROW_LEFT,
WindowEvent.RELEASE_ARROW_RIGHT,
WindowEvent.RELEASE_ARROW_UP
]
self.release_button = [
WindowEvent.RELEASE_BUTTON_A,
WindowEvent.RELEASE_BUTTON_B
]
self.q_values = {} # Q-values for state-action pairs, initialize as needed
def choose_action(self, state):
# Random action for testing
chosen_action = np.random.choice(self.valid_actions)
return chosen_action
def update_q_values(self, state, action, reward, next_state):
# Q-learning update
learning_rate = 0.1
discount_factor = 0.9
# Get the current Q-value for the (state, action) pair
current_q_value = self.q_values.get((state, action), 0.0)
# Estimate the future Q-value using the maximum Q-value of the next state
max_next_q_value = max(self.q_values.get((next_state, a), 0.0) for a in self.valid_actions)
# Update the Q-value based on the Q-learning formula
updated_q_value = current_q_value + learning_rate * (reward + discount_factor * max_next_q_value - current_q_value)
# Update the Q-value in the Q-values dictionary
self.q_values[(state, action)] = updated_q_value
pass
def save_q_values(self, filename):
# Save Q-values to a file using pickle
with open(filename, 'wb') as file:
pickle.dump(self.q_values, file)
def load_q_values(self, filename):
# Load Q-values from a file using pickle
with open(filename, 'rb') as file:
self.q_values = pickle.load(file)
def worker(episode, agent):
pyboy_instance = PyBoy("PokemonRed.gb", window_type="SDL2", window_scale=2)
env = PokemonEnv(pyboy_instance)
state = env.reset()
total_reward = 0.0
while True:
action = agent.choose_action(state)
if action in agent.release_arrow:
env.pyboy.send_input(WindowEvent.RELEASE_ALL)
env.pyboy.send_input(action)
elif action in agent.release_button:
env.pyboy.send_input(action)
env.pyboy.send_input(WindowEvent.RELEASE_ALL)
else:
env.pyboy.send_input(action)
next_state, reward, done, _ = env.step(action)
# Implement your learning and updating logic here
total_reward += reward
state = next_state
# Render the environment
env.pyboy.tick()
env.pyboy.tick()
env.pyboy.tick()
env.render()
if done:
print(f"Episode {episode + 1}, Total Reward: {total_reward}")
# Save Q-values and model after each episode
agent.save_q_values(f'q_values_episode_{episode + 1}.pkl')
agent.save_model(f'model_episode_{episode + 1}.pkl')
break
if __name__ == "__main__":
action_space_size = 8 # Adjust this based on your actual action space size
agent = PokemonAgent(spaces.Discrete(action_space_size))
Editor is loading...