Untitled
unknown
python
a year ago
2.2 kB
27
Indexable
import gym import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim # Define the network class Net(nn.Module): def __init__(self, state_size, action_size, hidden_size=64): super(Net, self).__init__() self.fc1 = nn.Linear(state_size + action_size, hidden_size) self.fc2 = nn.Linear(hidden_size, state_size) def forward(self, state, action): x = torch.cat([state, action], dim=-1) x = F.sigmoid(self.fc1(x)) next_state_probs = F.softmax(self.fc2(x), dim=-1) return next_state_probs # Define the accuracy function def accuracy(preds, y): preds = torch.cat(preds) y = torch.cat(y) correct = (preds == y).float() acc = correct.sum() / len(correct) return acc for episode in range(1000): # adjust as needed print("episode = ", episode) state = env.reset() done = False preds_array = [] next_state_array = [] while not done: action = env.action_space.sample() # replace with your action selection method next_state, reward, done, info = env.step(action) # Prepare the data state_tensor = F.one_hot(torch.tensor([state]), num_classes=state_size).float() action_tensor = F.one_hot(torch.tensor([action]), num_classes=action_size).float() next_state_tensor = torch.tensor([next_state]) # Forward pass preds = net(state_tensor, action_tensor) preds_array.append(torch.max(preds, 1)[1].unsqueeze(0)) next_state_array.append(next_state_tensor) # Calculate the loss and the accuracy loss = loss_fn(preds, next_state_tensor) if episode%50==0 and episode!=0: print("next_state_array = ", next_state_array) print("Length = ", len(preds_array)) acc = accuracy(preds_array, next_state_array) print("The accuracy is ", acc) preds_array = [] next_state_array = [] # Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() # Move to the next state state = next_state
Editor is loading...
Leave a Comment