Untitled
unknown
plain_text
12 days ago
1.1 kB
4
Indexable
Never
from sklearn.utils import resample # Separate each class class_2 = df[df['Post_Rating_encoded'] == 2] # Majority class class_0 = df[df['Post_Rating_encoded'] == 0] # Minority class class_1 = df[df['Post_Rating_encoded'] == 1] # Minority class #class_3 = df[df['Post_Rating_encoded'] == 3] # Minority class # Resample (oversampling) to the desired number of samples class_0_upsampled = resample(class_0, replace=True, n_samples=500, random_state=42) class_1_upsampled = resample(class_1, replace=True, n_samples=710, random_state=42) #class_3_upsampled = resample(class_3, replace=True, n_samples=550, random_state=42) class_2_downsampled = resample(class_2, replace=True, n_samples=500, random_state=42) # Optional downsampling # Combine all the classes together #df = pd.concat([class_2_downsampled, class_0_upsampled, class_1_upsampled, class_3_upsampled]) df = pd.concat([class_2_downsampled, class_0_upsampled, class_1_upsampled]) # Check the distribution of the balanced dataset print(df['Post_Rating_encoded'].value_counts())
Leave a Comment