Untitled
unknown
plain_text
3 years ago
2.9 kB
9
Indexable
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import ray
# Define the neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784, 512)
self.fc2 = nn.Linear(512, 10)
def forward(self, x):
x = x.view(-1, 784)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
# Define the dataset
class MNISTDataset(Dataset):
def __init__(self, data, targets, transform=None):
self.data = data
self.targets = targets
self.transform = transform
def __getitem__(self, index):
x = self.data[index]
y = self.targets[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.data)
# Define the training function
def train(net, dataloader, criterion, optimizer):
net.train()
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
inputs, labels = data
inputs = inputs.to(device='cuda')
labels = labels.to(device='cuda')
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
return running_loss / len(dataloader)
# Define the main function
def main():
s = time.time()
# Initialize Ray
ray.init()
# Define the hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001
# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create the neural network
net = Net()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# Move the neural network to the GPU
net.to("cuda")
# Define the remote training function
@ray.remote(num_cpus=4,num_gpus=2)
def train_remote(net, dataloader, criterion, optimizer):
return train(net, dataloader, criterion, optimizer)
# Train the neural network on multiple GPUs using Ray
for epoch in range(num_epochs):
tasks = [train_remote.remote(net, train_loader, criterion, optimizer) for _ in range(2)]
epoch_loss = sum(ray.get(tasks)) / len(tasks)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")
# Shut down Ray
ray.shutdown()
end_t = time.time() - s
print(end_t)
if __name__ == '__main__':
main()Editor is loading...