parte1

mail@pastecode.io avatar
unknown
python
2 months ago
4.0 kB
2
Indexable
Never
# # Se esperan problemas de memoria RAM. Utilizar ampliamente 'del' y gc.collect().
# genre_path = '/content/Music_Genres_Dataset'

# # Song durations
# # Initialize all of the genres to avoid key error when adding song durations.
# song_data_backup = {'Name': np.array([]),
#              'Genre' : np.array([]),
#               'duration': np.array([]),
#               'sr': np.array([]),
#               'n_channels': np.array([]),
#              'est_bpm' : np.array([]),
#              'avg_amplitude' : np.array([]),
#              'fft_transform' : np.array([]),
#              'n_channels' : np.array([])
#               }




# for genre in genre_names:
#   songs_names = os.listdir(f'{genre_path}/{genre}')
#   # Use AudioUtil to read song.
#   song_path = f'{genre_path}/{genre}'
#   print(f'ANALYZING GENRE: {genre}')
#   song_data = song_data_backup
#   # songslist = [AudioUtil().open(f'{song_path}/{song_name}') for song_name in songs_names]
#   for song in songs_names:
#     sig, sr = AudioUtil().open(f'{song_path}/{song}')
#     # song_durations[genre][song] = [len(sig[1]) / sr, sig, sr]
#     song_data['Name'] = np.append(song_data['Name'], song)
#     song_data['Genre'] = np.append(song_data['Genre'], genre)
#     song_data['duration'] = np.append(song_data['duration'], len(sig[1]) / sr)
#     song_data['sr'] = np.append(song_data['sr'], sr)
#     # np.append(song_data['sig'], sig) ### too much data
#     song_data['est_bpm'] = np.append(song_data['est_bpm'],
#                                      AudioUtil().get_tempo(sig=sig, sr=sr))
#     song_data['avg_amplitude'] = np.append(song_data['avg_amplitude'],
#                                            torch.mean(torch.abs(sig)))
#     song_data['fft_transform'] = np.append(song_data['fft_transform'],
#                                            np.fft.fft(sig))

#     song_data['n_channels'] = np.append(song_data['n_channels'], sig.shape[0])
#     del sig, sr
#     gc.collect()
#   df_song_data = pd.DataFrame.from_dict(song_data)
#   df_song_data.to_csv(f'./csv/{genre}')
#   del df_song_data
#   del song_data
#   gc.collect()


genre_path = './content/Music_Genres_Dataset'

# Se esperan problemas de memoria RAM. Utilizar ampliamente 'del' y gc.collect().
song_data_backup = {
    'Name': [],
    'Genre': [],
    'duration': [],
    'sr': [],
    'n_channels': [],
    'est_bpm': [],
    'avg_amplitude': [],
    # 'fft_transform': [],
}

df_data_template =  pd.DataFrame.from_dict(song_data_backup)

for genre in genre_names:
    songs_names = os.listdir(f'{genre_path}/{genre}')
    print(f'ANALYZING GENRE: {genre}')
    df_song_data = pd.DataFrame(columns=song_data_backup.keys())

    sub_path = f'./csv'
    df_data_template.to_csv(f'{sub_path}/{genre}_data.csv')

    for song in songs_names:
        print(f'ANALYZING SONG: {song}')
        sig, sr = AudioUtil().open(f'{genre_path}/{genre}/{song}')
        duration = len(sig[1]) / sr
        est_bpm = AudioUtil().get_tempo(sig, sr)[0]
        avg_amplitude = torch.mean(torch.abs(sig)).item()
        # fft_transform = ne.evaluate('np.fft.fft(sig)')

    

        # song_data = song_data_backup.copy()

        song_data = {
            'Name': song,
            'Genre': genre,
            'duration': duration,
            'sr': sr,
            'n_channels': sig.shape[0],
            'est_bpm': est_bpm,
            'avg_amplitude': avg_amplitude,
            # 'fft_transform': [fft_transform],
        }
        del sig, sr
        gc.collect()


        # path = sub_path + f'/{song}_features.csv'
        print('Appending features')
        song_data = pd.DataFrame.from_dict(song_data)
        song_data.to_csv(f'{sub_path}/{genre}_data.csv', mode='a', header=False, index=False)

        # df_song_data = df_song_data.concat(song_data, ignore_index=True)
        del song_data #, fft_transform
        gc.collect()

    # df_song_data.to_csv(f'./csv/{genre}.csv')
    del df_song_data
    gc.collect()

Leave a Comment