Untitled
unknown
plain_text
2 years ago
2.0 kB
4
Indexable
_CITATION = "" _DESCRIPTION = "" _URL = "" _HOMEPAGE = "" _LICENSE = "" DATA_DIR = {"train": "images"} import os from typing import Union import datasets import pandas as pd class KitchenWare(datasets.GeneratorBasedBuilder): DEFAULT_WRITER_BATCH_SIZE = 256 BUILDER_CONFIGS = [datasets.BuilderConfig(name="images", description="Train Set.")] def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( {"file": datasets.Value("string"), "label": datasets.Value("string")} ), homepage=_HOMEPAGE, ) def _split_generators(self, dl_manager): data_dir = dl_manager.extract(self.config.data_dir) if self.config.name == "images": train_splits = [ datasets.SplitGenerator( name="train", gen_kwargs={ "files": data_dir, "name": "train" } ) ] return train_splits def _generate_examples(self, files, name): key = 0 examples = list() audio_dir = os.path.join(files, DATA_DIR[name]) print("AAAAAAAAAAAAAAA") print(audio_dir) print() print("AAAAAAAAAAAAAAA") train_labels = pd.read_csv("/".join(audio_dir.split("/")[:-1])+"/train.csv") test_labels = pd.read_csv("/".join(audio_dir.split("/")[:-1])+"/test.csv") if not os.path.exists(audio_dir): raise FileNotFoundError else: for file in os.listdir(audio_dir): res = dict() res["file"] = "{}".format(os.path.join(audio_dir, file)) res["label"] = train_labels[train_labels["Id"] == file.split(".")[0]] examples.append(res) for example in examples: yield key, {**example} key += 1 examples = []
Editor is loading...