Untitled
unknown
plain_text
3 years ago
21 kB
19
Indexable
trainer:
_target_: pytorch_lightning.Trainer
checkpoint_callback: true
default_root_dir: null
gradient_clip_val: 0.0
process_position: 0
num_nodes: 1
num_processes: 1
gpus: '0'
auto_select_gpus: false
tpu_cores: null
log_gpu_memory: null
progress_bar_refresh_rate: 50
overfit_batches: 0.0
track_grad_norm: -1
check_val_every_n_epoch: 3
fast_dev_run: false
accumulate_grad_batches: 1
max_epochs: 30000
min_epochs: 10000
max_steps: null
min_steps: null
limit_train_batches: 1.0
limit_val_batches: 1.0
limit_test_batches: 1.0
val_check_interval: 1.0
flush_logs_every_n_steps: 100
log_every_n_steps: 50
accelerator: null
sync_batchnorm: false
precision: 16
weights_summary: top
weights_save_path: null
num_sanity_val_steps: 1
truncated_bptt_steps: null
resume_from_checkpoint: /content/mdx-net/last.ckpt
profiler: null
benchmark: true
deterministic: false
reload_dataloaders_every_epoch: false
auto_lr_find: false
replace_sampler_ddp: true
terminate_on_nan: false
auto_scale_batch_size: false
prepare_data_per_node: true
plugins: null
amp_backend: native
amp_level: O2
move_metrics_to_cpu: false
datamodule:
_target_: src.datamodules.musdb_datamodule.MusdbDataModule
data_dir: ${data_dir}
sample_rate: 44100
hop_length: ${model.hop_length}
dim_t: ${model.dim_t}
overlap: 3072
source_names:
- vocals
- other
target_name: ${model.target_name}
external_datasets: null
batch_size: 7
num_workers: 2
pin_memory: true
aug_params:
- 0
- 0
validation_set:
- um
- dois
- tres
persistent_workers: true
callbacks:
model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
monitor: val/sdr
save_top_k: 5
save_last: true
mode: max
verbose: false
dirpath: checkpoints/
filename: '{epoch:02d}'
early_stopping:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: val/sdr
patience: 50
mode: max
min_delta: 0.05
make_onnx:
_target_: src.callbacks.onnx_callback.MakeONNXCallback
dirpath: onnx/
watch_model:
_target_: src.callbacks.wandb_callbacks.WatchModel
log: all
log_freq: 100
logger:
wandb:
_target_: pytorch_lightning.loggers.wandb.WandbLogger
project: mdx_${model.target_name}
name: mdx_vocals
save_dir: .
offline: false
id: null
log_model: false
prefix: ''
job_type: train
group: ''
tags: []
model:
_target_: src.models.mdxnet.ConvTDFNet
target_name: vocals
num_blocks: 11
l: 3
g: 32
k: 3
bn: 8
bias: false
n_fft: 6144
dim_f: 2048
dim_t: 256
dim_c: 4
hop_length: 1024
overlap: 3072
lr: 0.0001
optimizer: rmsprop
seed: 2021
work_dir: ${hydra:runtime.cwd}
data_dir: /content/mdx-net/data
debug: false
print_config: true
ignore_warnings: true
wandb_api_key: bab2d296b6fdc27fbec5d1af44a4b8dd982c0c25
[2023-05-23 07:12:33,951][src.utils.utils][INFO] - Disabling python warnings! <config.ignore_warnings=True>
⚙ CONFIG
├── trainer
│ └── _target_: pytorch_lightning.Trainer
│ checkpoint_callback: true
│ default_root_dir: null
│ gradient_clip_val: 0.0
│ process_position: 0
│ num_nodes: 1
│ num_processes: 1
│ gpus: '0'
│ auto_select_gpus: false
│ tpu_cores: null
│ log_gpu_memory: null
│ progress_bar_refresh_rate: 50
│ overfit_batches: 0.0
│ track_grad_norm: -1
│ check_val_every_n_epoch: 3
│ fast_dev_run: false
│ accumulate_grad_batches: 1
│ max_epochs: 30000
│ min_epochs: 10000
│ max_steps: null
│ min_steps: null
│ limit_train_batches: 1.0
│ limit_val_batches: 1.0
│ limit_test_batches: 1.0
│ val_check_interval: 1.0
│ flush_logs_every_n_steps: 100
│ log_every_n_steps: 50
│ accelerator: null
│ sync_batchnorm: false
│ precision: 16
│ weights_summary: top
│ weights_save_path: null
│ num_sanity_val_steps: 1
│ truncated_bptt_steps: null
│ resume_from_checkpoint: /content/mdx-net/last.ckpt
│ profiler: null
│ benchmark: true
│ deterministic: false
│ reload_dataloaders_every_epoch: false
│ auto_lr_find: false
│ replace_sampler_ddp: true
│ terminate_on_nan: false
│ auto_scale_batch_size: false
│ prepare_data_per_node: true
│ plugins: null
│ amp_backend: native
│ amp_level: O2
│ move_metrics_to_cpu: false
│
├── model
│ └── _target_: src.models.mdxnet.ConvTDFNet
│ target_name: vocals
│ num_blocks: 11
│ l: 3
│ g: 32
│ k: 3
│ bn: 8
│ bias: false
│ n_fft: 6144
│ dim_f: 2048
│ dim_t: 256
│ dim_c: 4
│ hop_length: 1024
│ overlap: 3072
│ lr: 0.0001
│ optimizer: rmsprop
│
├── datamodule
│ └── _target_: src.datamodules.musdb_datamodule.MusdbDataModule
│ data_dir: /content/mdx-net/data
│ sample_rate: 44100
│ hop_length: 1024
│ dim_t: 256
│ overlap: 3072
│ source_names:
│ - vocals
│ - other
│ target_name: vocals
│ external_datasets: null
│ batch_size: 7
│ num_workers: 2
│ pin_memory: true
│ aug_params:
│ - 0
│ - 0
│ validation_set:
│ - um
│ - dois
│ - tres
│ persistent_workers: true
│
├── callbacks
│ └── model_checkpoint:
│ _target_: pytorch_lightning.callbacks.ModelCheckpoint
│ monitor: val/sdr
│ save_top_k: 5
│ save_last: true
│ mode: max
│ verbose: false
│ dirpath: checkpoints/
│ filename: '{epoch:02d}'
│ early_stopping:
│ _target_: pytorch_lightning.callbacks.EarlyStopping
│ monitor: val/sdr
│ patience: 50
│ mode: max
│ min_delta: 0.05
│ make_onnx:
│ _target_: src.callbacks.onnx_callback.MakeONNXCallback
│ dirpath: onnx/
│ watch_model:
│ _target_: src.callbacks.wandb_callbacks.WatchModel
│ log: all
│ log_freq: 100
│
├── logger
│ └── wandb:
│ _target_: pytorch_lightning.loggers.wandb.WandbLogger
│ project: mdx_vocals
│ name: mdx_vocals
│ save_dir: .
│ offline: false
│ id: null
│ log_model: false
│ prefix: ''
│ job_type: train
│ group: ''
│ tags: []
│
└── seed
└── 2021
Global seed set to 2021
[2023-05-23 07:12:34,076][src.train][INFO] - Instantiating datamodule <src.datamodules.musdb_datamodule.MusdbDataModule>
[2023-05-23 07:12:34,084][src.train][INFO] - Instantiating model <src.models.mdxnet.ConvTDFNet>
[2023-05-23 07:12:34,268][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.ModelCheckpoint>
[2023-05-23 07:12:34,270][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.EarlyStopping>
[2023-05-23 07:12:34,271][src.train][INFO] - Instantiating callback <src.callbacks.onnx_callback.MakeONNXCallback>
[2023-05-23 07:12:34,273][src.train][INFO] - Instantiating callback <src.callbacks.wandb_callbacks.WatchModel>
[2023-05-23 07:12:35,361][src.train][INFO] - Instantiating logger <pytorch_lightning.loggers.wandb.WandbLogger>
wandb: Currently logged in as: lucasrod1000. Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[2023-05-23 07:12:36,935][src.train][INFO] - Instantiating trainer <pytorch_lightning.Trainer>
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
[2023-05-23 07:12:36,982][src.train][INFO] - Logging hyperparameters!
wandb: wandb version 0.15.3 is available! To upgrade, please run:
wandb: $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.13.2
wandb: Run data is saved locally in ./wandb/run-20230523_071237-2vqrmm0h
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run mdx_vocals
wandb: ⭐️ View project at https://wandb.ai/lucasrod1000/mdx_vocals
wandb: 🚀 View run at https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h
[2023-05-23 07:12:37,489][src.train][INFO] - Starting training!
1it [00:00, 120.54it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
------------------------------------------------
0 | first_conv | Sequential | 224
1 | encoding_blocks | ModuleList | 2.9 M
2 | ds | ModuleList | 288 K
3 | bottleneck_block | TFC_TDF | 998 K
4 | decoding_blocks | ModuleList | 2.9 M
5 | us | ModuleList | 288 K
6 | final_conv | Sequential | 132
------------------------------------------------
7.4 M Trainable params
1.1 M Non-trainable params
8.5 M Total params
33.916 Total estimated model params size (MB)
Restored states from the checkpoint file at /content/mdx-net/last.ckpt
Global seed set to 2021
Epoch 42: 0% 0/1296 [00:00<?, ?it/s]Error executing job with overrides: ['experiment=multigpu_vocals', 'model=ConvTDFNet_vocals']
Traceback (most recent call last):
File "run.py", line 39, in <module>
main()
File "/usr/local/lib/python3.8/dist-packages/hydra/main.py", line 49, in decorated_main
_run_hydra(
File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 367, in _run_hydra
run_and_report(
File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 214, in run_and_report
raise ex
File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 211, in run_and_report
return func()
File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 368, in <lambda>
lambda: hydra.run(
File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/hydra.py", line 110, in run
_ = ret.return_value
File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 233, in return_value
raise self._return_value
File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 160, in run_job
ret.return_value = task_function(task_cfg)
File "run.py", line 35, in main
return train(config)
File "/content/mdx-net/src/train.py", line 88, in train
trainer.fit(model=model, datamodule=datamodule)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit
self._run(model)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run
self.dispatch()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch
self.accelerator.start_training(self)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training
self.training_type_plugin.start_training(trainer)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training
self._results = trainer.run_stage()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage
return self.run_train()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 491, in run_training_epoch
for batch_idx, (batch, is_last_batch) in train_dataloader:
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/profiler/profilers.py", line 112, in profile_iterable
value = next(iterator)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 530, in prefetch_iterator
last = next(it)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 464, in __next__
return self.request_next_batch(self.loader_iters)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 478, in request_next_batch
return apply_to_collection(loader_iters, Iterator, next)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/utilities/apply_func.py", line 85, in apply_to_collection
return function(data, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/usr/local/lib/python3.8/dist-packages/torch/_utils.py", line 425, in reraise
raise self.exc_type(msg)
soundfile.LibsndfileError: <unprintable LibsndfileError object>
wandb: Waiting for W&B process to finish... (failed 1). Press Control-C to abort syncing.
wandb:
wandb: Synced mdx_vocals: https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h
wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20230523_071237-2vqrmm0h/logsEditor is loading...