Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
6.2 kB
4
Indexable
Never
import pyboy
import gym
import numpy as np
import pickle
import multiprocessing
from pyboy import PyBoy, WindowEvent
from gym import spaces

# Custom Gym environment for Pokemon
class PokemonEnv(gym.Env):
    def __init__(self, pyboy_instance):
        self.pyboy = pyboy_instance

        # Additional RAM locations
        self.pokemon_in_party_addr = 0xD163
        self.pokemon_data_start_addr = 0xD164
        self.level_addr_offset = 0xD18C
        self.current_hp_offset = 0xD16C
        self.gym_badges_addr = 0xD755

        # Initialize reward variables
        self.total_level = 0
        self.total_healing = 0
        self.step_count = 0
        self.max_steps = 1000  # Adjust as needed

        self.max_badges_reward = 0

        # Define a simple discrete action space
        self.action_space = spaces.Discrete(8)  # Adjust the size based on your actions

    def read_byte(self, address):
        return self.pyboy.get_memory_value(address)

    def read_word(self, address):
        return self.pyboy.get_memory_value(address) + (self.pyboy.get_memory_value(address + 1) << 8)

    def get_pokemon_data(self, pokemon_addr):
        pokemon_data = {
            'pokemon_id': self.read_byte(pokemon_addr),
            'current_hp': self.read_word(pokemon_addr + self.current_hp_offset),
            'level': self.read_byte(pokemon_addr + self.level_addr_offset)
        }
        return pokemon_data

    def step(self, action):
        # Take action in the emulator
        self.pyboy.tick()
        state = self.pyboy.botsupport_manager().screen()

        # Compute the reward and check if the episode is done
        reward = 0.0
        done = False

        # Retrieve information from RAM locations
        pokemon_in_party = self.read_byte(self.pokemon_in_party_addr)
        gym_badges = self.read_byte(self.gym_badges_addr)

        # Iterate over Pokémon in the party
        for i in range(pokemon_in_party):
            pokemon_data_addr = self.pokemon_data_start_addr + i * 7
            pokemon_data = self.get_pokemon_data(pokemon_data_addr)

            # Update reward variables
            self.total_level += pokemon_data['level']
            self.total_healing += pokemon_data['current_hp']

            # Example: Reward based on level
            reward += self.total_level_reward * pokemon_data['level']

        # Example: Reward for gym badges
        reward += self.max_badges_reward * gym_badges

        # Update step count
        self.step_count += 1

        # Apply penalties for long episodes
        if self.step_count > self.max_steps:
            done = True

        return state, reward, done, {}

# Your AI agent class
class PokemonAgent:
    def __init__(self, action_space):
        self.action_space = action_space
        self.valid_actions = [
            WindowEvent.PRESS_ARROW_DOWN,
            WindowEvent.PRESS_ARROW_LEFT,
            WindowEvent.PRESS_ARROW_RIGHT,
            WindowEvent.PRESS_ARROW_UP,
            WindowEvent.PRESS_BUTTON_A,
            WindowEvent.PRESS_BUTTON_B,
            WindowEvent.PRESS_BUTTON_START,
            WindowEvent.PASS
        ]
        self.release_arrow = [
            WindowEvent.RELEASE_ARROW_DOWN,
            WindowEvent.RELEASE_ARROW_LEFT,
            WindowEvent.RELEASE_ARROW_RIGHT,
            WindowEvent.RELEASE_ARROW_UP
        ]
        self.release_button = [
            WindowEvent.RELEASE_BUTTON_A,
            WindowEvent.RELEASE_BUTTON_B
        ]
        self.q_values = {}  # Q-values for state-action pairs, initialize as needed

    def choose_action(self, state):
        # Random action for testing

        chosen_action = np.random.choice(self.valid_actions)

        return chosen_action

    def update_q_values(self, state, action, reward, next_state):
        # Q-learning update
        learning_rate = 0.1
        discount_factor = 0.9

        # Get the current Q-value for the (state, action) pair
        current_q_value = self.q_values.get((state, action), 0.0)

        # Estimate the future Q-value using the maximum Q-value of the next state
        max_next_q_value = max(self.q_values.get((next_state, a), 0.0) for a in self.valid_actions)

        # Update the Q-value based on the Q-learning formula
        updated_q_value = current_q_value + learning_rate * (reward + discount_factor * max_next_q_value - current_q_value)

        # Update the Q-value in the Q-values dictionary
        self.q_values[(state, action)] = updated_q_value
        pass

    def save_q_values(self, filename):
        # Save Q-values to a file using pickle
        with open(filename, 'wb') as file:
            pickle.dump(self.q_values, file)

    def load_q_values(self, filename):
        # Load Q-values from a file using pickle
        with open(filename, 'rb') as file:
            self.q_values = pickle.load(file)

def worker(episode, agent):
    pyboy_instance = PyBoy("PokemonRed.gb", window_type="SDL2", window_scale=2)
    env = PokemonEnv(pyboy_instance)
    state = env.reset()
    total_reward = 0.0

    while True:
        action = agent.choose_action(state)

        if action in agent.release_arrow:
            env.pyboy.send_input(WindowEvent.RELEASE_ALL)
            env.pyboy.send_input(action)
        elif action in agent.release_button:
            env.pyboy.send_input(action)
            env.pyboy.send_input(WindowEvent.RELEASE_ALL)
        else:
            env.pyboy.send_input(action)

        next_state, reward, done, _ = env.step(action)

        # Implement your learning and updating logic here

        total_reward += reward
        state = next_state

        # Render the environment
        env.pyboy.tick()
        env.pyboy.tick()
        env.pyboy.tick()

        env.render()

        if done:
            print(f"Episode {episode + 1}, Total Reward: {total_reward}")

            # Save Q-values and model after each episode
            agent.save_q_values(f'q_values_episode_{episode + 1}.pkl')
            agent.save_model(f'model_episode_{episode + 1}.pkl')

            break


if __name__ == "__main__":
    action_space_size = 8  # Adjust this based on your actual action space size
    agent = PokemonAgent(spaces.Discrete(action_space_size))