Untitled

 avatar
unknown
plain_text
2 years ago
904 B
6
Indexable
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

class LargeCSVDataset(Dataset):
    def __init__(self, csv_file):
        self.csv_file = csv_file
        self.len = sum(1 for line in open(csv_file)) - 1  # number of data rows (minus 1 for the header)

    def __getitem__(self, index):
        # Skip rows until reaching the row with the requested index
        data = pd.read_csv(self.csv_file, skiprows=range(1, index + 1), nrows=1)
        
        # Assuming that the last column is the target and the rest are features
        target = torch.tensor(data.iloc[0, -1])
        features = torch.tensor(data.iloc[0, :-1].values)
        return features, target

    def __len__(self):
        return self.len

# Use the custom dataset
dataset = LargeCSVDataset('large_dataset.csv')

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
Editor is loading...