Untitled
import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score # Creating a DataFrame from the provided data data = { 'Genre': ['Drama', 'Humor', 'Literary Fiction', 'Non-Fiction', 'Philosophical', 'Poetry', 'Romance', 'Science Fiction', 'Thriller', 'YA'], 'Male_Authors': [7, 2, 2, 4, 5, 3, 1, 2, 4, 0], 'Female_Authors': [9, 0, 4, 5, 0, 2, 14, 3, 7, 6], 'Average_Rating_Male': [4.21, 4.25, 3.75, 4.25, 4.4, 4.17, 5, 5, 3.38, np.nan], 'Average_Rating_Female': [4.28, np.nan, 4.75, 3.3, np.nan, 4, 2.43, 4.67, 3.5, 2.42], 'Overall_Average': [4.25, 4.25, 4.42, 3.72, 4.4, 4.1, 2.6, 4.8, 3.45, 2.42] } df = pd.DataFrame(data) # Encoding the gender and genre variables as dummy variables df_encoded = pd.get_dummies(df, columns=['Genre'], drop_first=True) # Dropping NaN values for the purpose of modeling df_encoded = df_encoded.dropna() # Splitting the dataset into training and testing sets X = df_encoded.drop(['Average_Rating_Male', 'Average_Rating_Female', 'Overall_Average'], axis=1) y_male = df_encoded['Average_Rating_Male'] y_female = df_encoded['Average_Rating_Female'] X_train_male, X_test_male, y_train_male, y_test_male = train_test_split(X, y_male, test_size=0.2, random_state=42) X_train_female, X_test_female, y_train_female, y_test_female = train_test_split(X, y_female, test_size=0.2, random_state=42) # Creating and fitting the linear regression model for male authors model_male = LinearRegression() model_male.fit(X_train_male, y_train_male) # Creating and fitting the linear regression model for female authors model_female = LinearRegression() model_female.fit(X_train_female, y_train_female) # Making predictions y_pred_male = model_male.predict(X_test_male) y_pred_female = model_female.predict(X_test_female) # Evaluating the model mse_male = mean_squared_error(y_test_male, y_pred_male) r2_male = r2_score(y_test_male, y_pred_male) mse_female = mean_squared_error(y_test_female, y_pred_female) r2_female = r2_score(y_test_female, y_pred_female) print(f"Male Model - Mean Squared Error: {mse_male}, R-squared: {r2_male}") print(f"Female Model - Mean Squared Error: {mse_female}, R-squared: {r2_female}")
Leave a Comment