Untitled
unknown
plain_text
2 years ago
21 kB
15
Indexable
trainer: _target_: pytorch_lightning.Trainer checkpoint_callback: true default_root_dir: null gradient_clip_val: 0.0 process_position: 0 num_nodes: 1 num_processes: 1 gpus: '0' auto_select_gpus: false tpu_cores: null log_gpu_memory: null progress_bar_refresh_rate: 50 overfit_batches: 0.0 track_grad_norm: -1 check_val_every_n_epoch: 3 fast_dev_run: false accumulate_grad_batches: 1 max_epochs: 30000 min_epochs: 10000 max_steps: null min_steps: null limit_train_batches: 1.0 limit_val_batches: 1.0 limit_test_batches: 1.0 val_check_interval: 1.0 flush_logs_every_n_steps: 100 log_every_n_steps: 50 accelerator: null sync_batchnorm: false precision: 16 weights_summary: top weights_save_path: null num_sanity_val_steps: 1 truncated_bptt_steps: null resume_from_checkpoint: /content/mdx-net/last.ckpt profiler: null benchmark: true deterministic: false reload_dataloaders_every_epoch: false auto_lr_find: false replace_sampler_ddp: true terminate_on_nan: false auto_scale_batch_size: false prepare_data_per_node: true plugins: null amp_backend: native amp_level: O2 move_metrics_to_cpu: false datamodule: _target_: src.datamodules.musdb_datamodule.MusdbDataModule data_dir: ${data_dir} sample_rate: 44100 hop_length: ${model.hop_length} dim_t: ${model.dim_t} overlap: 3072 source_names: - vocals - other target_name: ${model.target_name} external_datasets: null batch_size: 7 num_workers: 2 pin_memory: true aug_params: - 0 - 0 validation_set: - um - dois - tres persistent_workers: true callbacks: model_checkpoint: _target_: pytorch_lightning.callbacks.ModelCheckpoint monitor: val/sdr save_top_k: 5 save_last: true mode: max verbose: false dirpath: checkpoints/ filename: '{epoch:02d}' early_stopping: _target_: pytorch_lightning.callbacks.EarlyStopping monitor: val/sdr patience: 50 mode: max min_delta: 0.05 make_onnx: _target_: src.callbacks.onnx_callback.MakeONNXCallback dirpath: onnx/ watch_model: _target_: src.callbacks.wandb_callbacks.WatchModel log: all log_freq: 100 logger: wandb: _target_: pytorch_lightning.loggers.wandb.WandbLogger project: mdx_${model.target_name} name: mdx_vocals save_dir: . offline: false id: null log_model: false prefix: '' job_type: train group: '' tags: [] model: _target_: src.models.mdxnet.ConvTDFNet target_name: vocals num_blocks: 11 l: 3 g: 32 k: 3 bn: 8 bias: false n_fft: 6144 dim_f: 2048 dim_t: 256 dim_c: 4 hop_length: 1024 overlap: 3072 lr: 0.0001 optimizer: rmsprop seed: 2021 work_dir: ${hydra:runtime.cwd} data_dir: /content/mdx-net/data debug: false print_config: true ignore_warnings: true wandb_api_key: bab2d296b6fdc27fbec5d1af44a4b8dd982c0c25 [2023-05-23 07:12:33,951][src.utils.utils][INFO] - Disabling python warnings! <config.ignore_warnings=True> ⚙ CONFIG ├── trainer │ └── _target_: pytorch_lightning.Trainer │ checkpoint_callback: true │ default_root_dir: null │ gradient_clip_val: 0.0 │ process_position: 0 │ num_nodes: 1 │ num_processes: 1 │ gpus: '0' │ auto_select_gpus: false │ tpu_cores: null │ log_gpu_memory: null │ progress_bar_refresh_rate: 50 │ overfit_batches: 0.0 │ track_grad_norm: -1 │ check_val_every_n_epoch: 3 │ fast_dev_run: false │ accumulate_grad_batches: 1 │ max_epochs: 30000 │ min_epochs: 10000 │ max_steps: null │ min_steps: null │ limit_train_batches: 1.0 │ limit_val_batches: 1.0 │ limit_test_batches: 1.0 │ val_check_interval: 1.0 │ flush_logs_every_n_steps: 100 │ log_every_n_steps: 50 │ accelerator: null │ sync_batchnorm: false │ precision: 16 │ weights_summary: top │ weights_save_path: null │ num_sanity_val_steps: 1 │ truncated_bptt_steps: null │ resume_from_checkpoint: /content/mdx-net/last.ckpt │ profiler: null │ benchmark: true │ deterministic: false │ reload_dataloaders_every_epoch: false │ auto_lr_find: false │ replace_sampler_ddp: true │ terminate_on_nan: false │ auto_scale_batch_size: false │ prepare_data_per_node: true │ plugins: null │ amp_backend: native │ amp_level: O2 │ move_metrics_to_cpu: false │ ├── model │ └── _target_: src.models.mdxnet.ConvTDFNet │ target_name: vocals │ num_blocks: 11 │ l: 3 │ g: 32 │ k: 3 │ bn: 8 │ bias: false │ n_fft: 6144 │ dim_f: 2048 │ dim_t: 256 │ dim_c: 4 │ hop_length: 1024 │ overlap: 3072 │ lr: 0.0001 │ optimizer: rmsprop │ ├── datamodule │ └── _target_: src.datamodules.musdb_datamodule.MusdbDataModule │ data_dir: /content/mdx-net/data │ sample_rate: 44100 │ hop_length: 1024 │ dim_t: 256 │ overlap: 3072 │ source_names: │ - vocals │ - other │ target_name: vocals │ external_datasets: null │ batch_size: 7 │ num_workers: 2 │ pin_memory: true │ aug_params: │ - 0 │ - 0 │ validation_set: │ - um │ - dois │ - tres │ persistent_workers: true │ ├── callbacks │ └── model_checkpoint: │ _target_: pytorch_lightning.callbacks.ModelCheckpoint │ monitor: val/sdr │ save_top_k: 5 │ save_last: true │ mode: max │ verbose: false │ dirpath: checkpoints/ │ filename: '{epoch:02d}' │ early_stopping: │ _target_: pytorch_lightning.callbacks.EarlyStopping │ monitor: val/sdr │ patience: 50 │ mode: max │ min_delta: 0.05 │ make_onnx: │ _target_: src.callbacks.onnx_callback.MakeONNXCallback │ dirpath: onnx/ │ watch_model: │ _target_: src.callbacks.wandb_callbacks.WatchModel │ log: all │ log_freq: 100 │ ├── logger │ └── wandb: │ _target_: pytorch_lightning.loggers.wandb.WandbLogger │ project: mdx_vocals │ name: mdx_vocals │ save_dir: . │ offline: false │ id: null │ log_model: false │ prefix: '' │ job_type: train │ group: '' │ tags: [] │ └── seed └── 2021 Global seed set to 2021 [2023-05-23 07:12:34,076][src.train][INFO] - Instantiating datamodule <src.datamodules.musdb_datamodule.MusdbDataModule> [2023-05-23 07:12:34,084][src.train][INFO] - Instantiating model <src.models.mdxnet.ConvTDFNet> [2023-05-23 07:12:34,268][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.ModelCheckpoint> [2023-05-23 07:12:34,270][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.EarlyStopping> [2023-05-23 07:12:34,271][src.train][INFO] - Instantiating callback <src.callbacks.onnx_callback.MakeONNXCallback> [2023-05-23 07:12:34,273][src.train][INFO] - Instantiating callback <src.callbacks.wandb_callbacks.WatchModel> [2023-05-23 07:12:35,361][src.train][INFO] - Instantiating logger <pytorch_lightning.loggers.wandb.WandbLogger> wandb: Currently logged in as: lucasrod1000. Use `wandb login --relogin` to force relogin wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc [2023-05-23 07:12:36,935][src.train][INFO] - Instantiating trainer <pytorch_lightning.Trainer> GPU available: True, used: True TPU available: False, using: 0 TPU cores Using native 16bit precision. [2023-05-23 07:12:36,982][src.train][INFO] - Logging hyperparameters! wandb: wandb version 0.15.3 is available! To upgrade, please run: wandb: $ pip install wandb --upgrade wandb: Tracking run with wandb version 0.13.2 wandb: Run data is saved locally in ./wandb/run-20230523_071237-2vqrmm0h wandb: Run `wandb offline` to turn off syncing. wandb: Syncing run mdx_vocals wandb: ⭐️ View project at https://wandb.ai/lucasrod1000/mdx_vocals wandb: 🚀 View run at https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h [2023-05-23 07:12:37,489][src.train][INFO] - Starting training! 1it [00:00, 120.54it/s] LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] | Name | Type | Params ------------------------------------------------ 0 | first_conv | Sequential | 224 1 | encoding_blocks | ModuleList | 2.9 M 2 | ds | ModuleList | 288 K 3 | bottleneck_block | TFC_TDF | 998 K 4 | decoding_blocks | ModuleList | 2.9 M 5 | us | ModuleList | 288 K 6 | final_conv | Sequential | 132 ------------------------------------------------ 7.4 M Trainable params 1.1 M Non-trainable params 8.5 M Total params 33.916 Total estimated model params size (MB) Restored states from the checkpoint file at /content/mdx-net/last.ckpt Global seed set to 2021 Epoch 42: 0% 0/1296 [00:00<?, ?it/s]Error executing job with overrides: ['experiment=multigpu_vocals', 'model=ConvTDFNet_vocals'] Traceback (most recent call last): File "run.py", line 39, in <module> main() File "/usr/local/lib/python3.8/dist-packages/hydra/main.py", line 49, in decorated_main _run_hydra( File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 367, in _run_hydra run_and_report( File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 214, in run_and_report raise ex File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 211, in run_and_report return func() File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 368, in <lambda> lambda: hydra.run( File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/hydra.py", line 110, in run _ = ret.return_value File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 233, in return_value raise self._return_value File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 160, in run_job ret.return_value = task_function(task_cfg) File "run.py", line 35, in main return train(config) File "/content/mdx-net/src/train.py", line 88, in train trainer.fit(model=model, datamodule=datamodule) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit self._run(model) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run self.dispatch() File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch self.accelerator.start_training(self) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training self.training_type_plugin.start_training(trainer) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training self._results = trainer.run_stage() File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage return self.run_train() File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train self.train_loop.run_training_epoch() File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 491, in run_training_epoch for batch_idx, (batch, is_last_batch) in train_dataloader: File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/profiler/profilers.py", line 112, in profile_iterable value = next(iterator) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 530, in prefetch_iterator last = next(it) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 464, in __next__ return self.request_next_batch(self.loader_iters) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 478, in request_next_batch return apply_to_collection(loader_iters, Iterator, next) File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/utilities/apply_func.py", line 85, in apply_to_collection return function(data, *args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__ data = self._next_data() File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data return self._process_data(data) File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data data.reraise() File "/usr/local/lib/python3.8/dist-packages/torch/_utils.py", line 425, in reraise raise self.exc_type(msg) soundfile.LibsndfileError: <unprintable LibsndfileError object> wandb: Waiting for W&B process to finish... (failed 1). Press Control-C to abort syncing. wandb: wandb: Synced mdx_vocals: https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) wandb: Find logs at: ./wandb/run-20230523_071237-2vqrmm0h/logs
Editor is loading...