Untitled

mail@pastecode.io avatar
unknown
plain_text
12 days ago
1.1 kB
4
Indexable
Never
from sklearn.utils import resample

# Separate each class
class_2 = df[df['Post_Rating_encoded'] == 2]  # Majority class
class_0 = df[df['Post_Rating_encoded'] == 0]  # Minority class
class_1 = df[df['Post_Rating_encoded'] == 1]  # Minority class
#class_3 = df[df['Post_Rating_encoded'] == 3]  # Minority class

# Resample (oversampling) to the desired number of samples
class_0_upsampled = resample(class_0, replace=True, n_samples=500, random_state=42)
class_1_upsampled = resample(class_1, replace=True, n_samples=710, random_state=42)
#class_3_upsampled = resample(class_3, replace=True, n_samples=550, random_state=42)
class_2_downsampled = resample(class_2, replace=True, n_samples=500, random_state=42)  # Optional downsampling

# Combine all the classes together
#df = pd.concat([class_2_downsampled, class_0_upsampled, class_1_upsampled, class_3_upsampled])
df = pd.concat([class_2_downsampled, class_0_upsampled, class_1_upsampled])
# Check the distribution of the balanced dataset
print(df['Post_Rating_encoded'].value_counts())
Leave a Comment