Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
21 kB
3
Indexable
Never
trainer:
  _target_: pytorch_lightning.Trainer
  checkpoint_callback: true
  default_root_dir: null
  gradient_clip_val: 0.0
  process_position: 0
  num_nodes: 1
  num_processes: 1
  gpus: '0'
  auto_select_gpus: false
  tpu_cores: null
  log_gpu_memory: null
  progress_bar_refresh_rate: 50
  overfit_batches: 0.0
  track_grad_norm: -1
  check_val_every_n_epoch: 3
  fast_dev_run: false
  accumulate_grad_batches: 1
  max_epochs: 30000
  min_epochs: 10000
  max_steps: null
  min_steps: null
  limit_train_batches: 1.0
  limit_val_batches: 1.0
  limit_test_batches: 1.0
  val_check_interval: 1.0
  flush_logs_every_n_steps: 100
  log_every_n_steps: 50
  accelerator: null
  sync_batchnorm: false
  precision: 16
  weights_summary: top
  weights_save_path: null
  num_sanity_val_steps: 1
  truncated_bptt_steps: null
  resume_from_checkpoint: /content/mdx-net/last.ckpt
  profiler: null
  benchmark: true
  deterministic: false
  reload_dataloaders_every_epoch: false
  auto_lr_find: false
  replace_sampler_ddp: true
  terminate_on_nan: false
  auto_scale_batch_size: false
  prepare_data_per_node: true
  plugins: null
  amp_backend: native
  amp_level: O2
  move_metrics_to_cpu: false
datamodule:
  _target_: src.datamodules.musdb_datamodule.MusdbDataModule
  data_dir: ${data_dir}
  sample_rate: 44100
  hop_length: ${model.hop_length}
  dim_t: ${model.dim_t}
  overlap: 3072
  source_names:
  - vocals
  - other
  target_name: ${model.target_name}
  external_datasets: null
  batch_size: 7
  num_workers: 2
  pin_memory: true
  aug_params:
  - 0
  - 0
  validation_set:
  - um
  - dois
  - tres
  persistent_workers: true
callbacks:
  model_checkpoint:
    _target_: pytorch_lightning.callbacks.ModelCheckpoint
    monitor: val/sdr
    save_top_k: 5
    save_last: true
    mode: max
    verbose: false
    dirpath: checkpoints/
    filename: '{epoch:02d}'
  early_stopping:
    _target_: pytorch_lightning.callbacks.EarlyStopping
    monitor: val/sdr
    patience: 50
    mode: max
    min_delta: 0.05
  make_onnx:
    _target_: src.callbacks.onnx_callback.MakeONNXCallback
    dirpath: onnx/
  watch_model:
    _target_: src.callbacks.wandb_callbacks.WatchModel
    log: all
    log_freq: 100
logger:
  wandb:
    _target_: pytorch_lightning.loggers.wandb.WandbLogger
    project: mdx_${model.target_name}
    name: mdx_vocals
    save_dir: .
    offline: false
    id: null
    log_model: false
    prefix: ''
    job_type: train
    group: ''
    tags: []
model:
  _target_: src.models.mdxnet.ConvTDFNet
  target_name: vocals
  num_blocks: 11
  l: 3
  g: 32
  k: 3
  bn: 8
  bias: false
  n_fft: 6144
  dim_f: 2048
  dim_t: 256
  dim_c: 4
  hop_length: 1024
  overlap: 3072
  lr: 0.0001
  optimizer: rmsprop
seed: 2021
work_dir: ${hydra:runtime.cwd}
data_dir: /content/mdx-net/data
debug: false
print_config: true
ignore_warnings: true
wandb_api_key: bab2d296b6fdc27fbec5d1af44a4b8dd982c0c25

[2023-05-23 07:12:33,951][src.utils.utils][INFO] - Disabling python warnings! <config.ignore_warnings=True>
⚙ CONFIG
├── trainer
│   └── _target_: pytorch_lightning.Trainer                                     
│       checkpoint_callback: true                                               
│       default_root_dir: null                                                  
│       gradient_clip_val: 0.0                                                  
│       process_position: 0                                                     
│       num_nodes: 1                                                            
│       num_processes: 1                                                        
│       gpus: '0'                                                               
│       auto_select_gpus: false                                                 
│       tpu_cores: null                                                         
│       log_gpu_memory: null                                                    
│       progress_bar_refresh_rate: 50                                           
│       overfit_batches: 0.0                                                    
│       track_grad_norm: -1                                                     
│       check_val_every_n_epoch: 3                                              
│       fast_dev_run: false                                                     
│       accumulate_grad_batches: 1                                              
│       max_epochs: 30000                                                       
│       min_epochs: 10000                                                       
│       max_steps: null                                                         
│       min_steps: null                                                         
│       limit_train_batches: 1.0                                                
│       limit_val_batches: 1.0                                                  
│       limit_test_batches: 1.0                                                 
│       val_check_interval: 1.0                                                 
│       flush_logs_every_n_steps: 100                                           
│       log_every_n_steps: 50                                                   
│       accelerator: null                                                       
│       sync_batchnorm: false                                                   
│       precision: 16                                                           
│       weights_summary: top                                                    
│       weights_save_path: null                                                 
│       num_sanity_val_steps: 1                                                 
│       truncated_bptt_steps: null                                              
│       resume_from_checkpoint: /content/mdx-net/last.ckpt                      
│       profiler: null                                                          
│       benchmark: true                                                         
│       deterministic: false                                                    
│       reload_dataloaders_every_epoch: false                                   
│       auto_lr_find: false                                                     
│       replace_sampler_ddp: true                                               
│       terminate_on_nan: false                                                 
│       auto_scale_batch_size: false                                            
│       prepare_data_per_node: true                                             
│       plugins: null                                                           
│       amp_backend: native                                                     
│       amp_level: O2                                                           
│       move_metrics_to_cpu: false                                              
│                                                                               
├── model
│   └── _target_: src.models.mdxnet.ConvTDFNet                                  
│       target_name: vocals                                                     
│       num_blocks: 11                                                          
│       l: 3                                                                    
│       g: 32                                                                   
│       k: 3                                                                    
│       bn: 8                                                                   
│       bias: false                                                             
│       n_fft: 6144                                                             
│       dim_f: 2048                                                             
│       dim_t: 256                                                              
│       dim_c: 4                                                                
│       hop_length: 1024                                                        
│       overlap: 3072                                                           
│       lr: 0.0001                                                              
│       optimizer: rmsprop                                                      
│                                                                               
├── datamodule
│   └── _target_: src.datamodules.musdb_datamodule.MusdbDataModule              
│       data_dir: /content/mdx-net/data                                         
│       sample_rate: 44100                                                      
│       hop_length: 1024                                                        
│       dim_t: 256                                                              
│       overlap: 3072                                                           
│       source_names:                                                           
│       - vocals                                                                
│       - other                                                                 
│       target_name: vocals                                                     
│       external_datasets: null                                                 
│       batch_size: 7                                                           
│       num_workers: 2                                                          
│       pin_memory: true                                                        
│       aug_params:                                                             
│       - 0                                                                     
│       - 0                                                                     
│       validation_set:                                                         
│       - um                                                                    
│       - dois                                                                  
│       - tres                                                                  
│       persistent_workers: true                                                
│                                                                               
├── callbacks
│   └── model_checkpoint:                                                       
│         _target_: pytorch_lightning.callbacks.ModelCheckpoint                 
│         monitor: val/sdr                                                      
│         save_top_k: 5                                                         
│         save_last: true                                                       
│         mode: max                                                             
│         verbose: false                                                        
│         dirpath: checkpoints/                                                 
│         filename: '{epoch:02d}'                                               
│       early_stopping:                                                         
│         _target_: pytorch_lightning.callbacks.EarlyStopping                   
│         monitor: val/sdr                                                      
│         patience: 50                                                          
│         mode: max                                                             
│         min_delta: 0.05                                                       
│       make_onnx:                                                              
│         _target_: src.callbacks.onnx_callback.MakeONNXCallback                
│         dirpath: onnx/                                                        
│       watch_model:                                                            
│         _target_: src.callbacks.wandb_callbacks.WatchModel                    
│         log: all                                                              
│         log_freq: 100                                                         
│                                                                               
├── logger
│   └── wandb:                                                                  
│         _target_: pytorch_lightning.loggers.wandb.WandbLogger                 
│         project: mdx_vocals                                                   
│         name: mdx_vocals                                                      
│         save_dir: .                                                           
│         offline: false                                                        
│         id: null                                                              
│         log_model: false                                                      
│         prefix: ''                                                            
│         job_type: train                                                       
│         group: ''                                                             
│         tags: []                                                              
│                                                                               
└── seed
    └── 2021                                                                    
Global seed set to 2021
[2023-05-23 07:12:34,076][src.train][INFO] - Instantiating datamodule <src.datamodules.musdb_datamodule.MusdbDataModule>
[2023-05-23 07:12:34,084][src.train][INFO] - Instantiating model <src.models.mdxnet.ConvTDFNet>
[2023-05-23 07:12:34,268][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.ModelCheckpoint>
[2023-05-23 07:12:34,270][src.train][INFO] - Instantiating callback <pytorch_lightning.callbacks.EarlyStopping>
[2023-05-23 07:12:34,271][src.train][INFO] - Instantiating callback <src.callbacks.onnx_callback.MakeONNXCallback>
[2023-05-23 07:12:34,273][src.train][INFO] - Instantiating callback <src.callbacks.wandb_callbacks.WatchModel>
[2023-05-23 07:12:35,361][src.train][INFO] - Instantiating logger <pytorch_lightning.loggers.wandb.WandbLogger>
wandb: Currently logged in as: lucasrod1000. Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[2023-05-23 07:12:36,935][src.train][INFO] - Instantiating trainer <pytorch_lightning.Trainer>
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
[2023-05-23 07:12:36,982][src.train][INFO] - Logging hyperparameters!
wandb: wandb version 0.15.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.13.2
wandb: Run data is saved locally in ./wandb/run-20230523_071237-2vqrmm0h
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run mdx_vocals
wandb: ⭐️ View project at https://wandb.ai/lucasrod1000/mdx_vocals
wandb: 🚀 View run at https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h
[2023-05-23 07:12:37,489][src.train][INFO] - Starting training!
1it [00:00, 120.54it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type       | Params
------------------------------------------------
0 | first_conv       | Sequential | 224   
1 | encoding_blocks  | ModuleList | 2.9 M 
2 | ds               | ModuleList | 288 K 
3 | bottleneck_block | TFC_TDF    | 998 K 
4 | decoding_blocks  | ModuleList | 2.9 M 
5 | us               | ModuleList | 288 K 
6 | final_conv       | Sequential | 132   
------------------------------------------------
7.4 M     Trainable params
1.1 M     Non-trainable params
8.5 M     Total params
33.916    Total estimated model params size (MB)
Restored states from the checkpoint file at /content/mdx-net/last.ckpt
Global seed set to 2021
Epoch 42:   0% 0/1296 [00:00<?, ?it/s]Error executing job with overrides: ['experiment=multigpu_vocals', 'model=ConvTDFNet_vocals']
Traceback (most recent call last):
  File "run.py", line 39, in <module>
    main()
  File "/usr/local/lib/python3.8/dist-packages/hydra/main.py", line 49, in decorated_main
    _run_hydra(
  File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 367, in _run_hydra
    run_and_report(
  File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 214, in run_and_report
    raise ex
  File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 211, in run_and_report
    return func()
  File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/utils.py", line 368, in <lambda>
    lambda: hydra.run(
  File "/usr/local/lib/python3.8/dist-packages/hydra/_internal/hydra.py", line 110, in run
    _ = ret.return_value
  File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 233, in return_value
    raise self._return_value
  File "/usr/local/lib/python3.8/dist-packages/hydra/core/utils.py", line 160, in run_job
    ret.return_value = task_function(task_cfg)
  File "run.py", line 35, in main
    return train(config)
  File "/content/mdx-net/src/train.py", line 88, in train
    trainer.fit(model=model, datamodule=datamodule)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 460, in fit
    self._run(model)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 758, in _run
    self.dispatch()
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 799, in dispatch
    self.accelerator.start_training(self)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 96, in start_training
    self.training_type_plugin.start_training(trainer)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 144, in start_training
    self._results = trainer.run_stage()
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 809, in run_stage
    return self.run_train()
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 871, in run_train
    self.train_loop.run_training_epoch()
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 491, in run_training_epoch
    for batch_idx, (batch, is_last_batch) in train_dataloader:
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/profiler/profilers.py", line 112, in profile_iterable
    value = next(iterator)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 530, in prefetch_iterator
    last = next(it)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 464, in __next__
    return self.request_next_batch(self.loader_iters)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/supporters.py", line 478, in request_next_batch
    return apply_to_collection(loader_iters, Iterator, next)
  File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/utilities/apply_func.py", line 85, in apply_to_collection
    return function(data, *args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 521, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
    data.reraise()
  File "/usr/local/lib/python3.8/dist-packages/torch/_utils.py", line 425, in reraise
    raise self.exc_type(msg)
soundfile.LibsndfileError: <unprintable LibsndfileError object>
wandb: Waiting for W&B process to finish... (failed 1). Press Control-C to abort syncing.
wandb:                                                                                
wandb: Synced mdx_vocals: https://wandb.ai/lucasrod1000/mdx_vocals/runs/2vqrmm0h
wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
wandb: Find logs at: ./wandb/run-20230523_071237-2vqrmm0h/logs