Untitled
unknown
plain_text
3 years ago
2.9 kB
10
Indexable
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import ray
# Define the neural network
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784, 512)
self.fc2 = nn.Linear(512, 10)
def forward(self, x):
x = x.view(-1, 784)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
# Define the dataset
class MNISTDataset(Dataset):
def __init__(self, data, targets, transform=None):
self.data = data
self.targets = targets
self.transform = transform
def __getitem__(self, index):
x = self.data[index]
y = self.targets[index]
if self.transform:
x = self.transform(x)
return x, y
def __len__(self):
return len(self.data)
# Define the training function
def train(net, dataloader, criterion, optimizer):
net.train()
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
inputs, labels = data
inputs = inputs.to(device='cuda')
labels = labels.to(device='cuda')
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
return running_loss / len(dataloader)
# Define the main function
def main():
s = time.time()
# Initialize Ray
ray.init()
# Define the hyperparameters
num_epochs = 10
batch_size = 64
learning_rate = 0.001
# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# Create the neural network
net = Net()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
# Move the neural network to the GPU
net.to("cuda")
# Define the remote training function
@ray.remote(num_cpus=4,num_gpus=2)
def train_remote(net, dataloader, criterion, optimizer):
return train(net, dataloader, criterion, optimizer)
# Train the neural network on multiple GPUs using Ray
for epoch in range(num_epochs):
tasks = [train_remote.remote(net, train_loader, criterion, optimizer) for _ in range(2)]
epoch_loss = sum(ray.get(tasks)) / len(tasks)
print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")
# Shut down Ray
ray.shutdown()
end_t = time.time() - s
print(end_t)
if __name__ == '__main__':
main()Editor is loading...