Untitled
unknown
plain_text
2 years ago
2.9 kB
5
Indexable
import time import torch import torch.nn as nn import torch.optim as optim import torchvision.datasets as datasets import torchvision.transforms as transforms from torch.utils.data import DataLoader, Dataset import ray # Define the neural network class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(784, 512) self.fc2 = nn.Linear(512, 10) def forward(self, x): x = x.view(-1, 784) x = torch.relu(self.fc1(x)) x = self.fc2(x) return x # Define the dataset class MNISTDataset(Dataset): def __init__(self, data, targets, transform=None): self.data = data self.targets = targets self.transform = transform def __getitem__(self, index): x = self.data[index] y = self.targets[index] if self.transform: x = self.transform(x) return x, y def __len__(self): return len(self.data) # Define the training function def train(net, dataloader, criterion, optimizer): net.train() running_loss = 0.0 for i, data in enumerate(dataloader, 0): inputs, labels = data inputs = inputs.to(device='cuda') labels = labels.to(device='cuda') optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() return running_loss / len(dataloader) # Define the main function def main(): s = time.time() # Initialize Ray ray.init() # Define the hyperparameters num_epochs = 10 batch_size = 64 learning_rate = 0.001 # Load the MNIST dataset transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # Create the neural network net = Net() # Define the loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=learning_rate) # Move the neural network to the GPU net.to("cuda") # Define the remote training function @ray.remote(num_cpus=4,num_gpus=2) def train_remote(net, dataloader, criterion, optimizer): return train(net, dataloader, criterion, optimizer) # Train the neural network on multiple GPUs using Ray for epoch in range(num_epochs): tasks = [train_remote.remote(net, train_loader, criterion, optimizer) for _ in range(2)] epoch_loss = sum(ray.get(tasks)) / len(tasks) print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}") # Shut down Ray ray.shutdown() end_t = time.time() - s print(end_t) if __name__ == '__main__': main()
Editor is loading...