Untitled

 avatar
unknown
plain_text
2 years ago
2.0 kB
4
Indexable
_CITATION = ""
_DESCRIPTION = "" 
_URL = ""
_HOMEPAGE = ""
_LICENSE = ""
DATA_DIR = {"train": "images"}

import os
from typing import Union

import datasets
import pandas as pd

class KitchenWare(datasets.GeneratorBasedBuilder):

    DEFAULT_WRITER_BATCH_SIZE = 256
    BUILDER_CONFIGS = [datasets.BuilderConfig(name="images", description="Train Set.")]
  
    def _info(self):
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=datasets.Features(
                {"file": datasets.Value("string"), "label": datasets.Value("string")}
            ),
            homepage=_HOMEPAGE,
        )

    def _split_generators(self, dl_manager):
        data_dir = dl_manager.extract(self.config.data_dir)
        if self.config.name == "images":
            train_splits = [
                datasets.SplitGenerator(
                    name="train", gen_kwargs={
                        "files": data_dir, "name": "train"
                        }
                )
            ]
        
        return train_splits

    def _generate_examples(self, files, name):
        key = 0
        examples = list()

        audio_dir = os.path.join(files, DATA_DIR[name])
        print("AAAAAAAAAAAAAAA")
        print(audio_dir)
        print()
        print("AAAAAAAAAAAAAAA")

        train_labels = pd.read_csv("/".join(audio_dir.split("/")[:-1])+"/train.csv")
        test_labels = pd.read_csv("/".join(audio_dir.split("/")[:-1])+"/test.csv")

        if not os.path.exists(audio_dir):
            raise FileNotFoundError
        else:
            for file in os.listdir(audio_dir):
                
                res = dict()
                res["file"] = "{}".format(os.path.join(audio_dir, file))
                res["label"] = train_labels[train_labels["Id"] == file.split(".")[0]]
                examples.append(res)

        for example in examples:
            yield key, {**example}
            key += 1
        examples = []
Editor is loading...