Untitled

 avatar
unknown
plain_text
a year ago
2.3 kB
3
Indexable
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Creating a DataFrame from the provided data
data = {
    'Genre': ['Drama', 'Humor', 'Literary Fiction', 'Non-Fiction', 'Philosophical', 'Poetry', 'Romance', 'Science Fiction', 'Thriller', 'YA'],
    'Male_Authors': [7, 2, 2, 4, 5, 3, 1, 2, 4, 0],
    'Female_Authors': [9, 0, 4, 5, 0, 2, 14, 3, 7, 6],
    'Average_Rating_Male': [4.21, 4.25, 3.75, 4.25, 4.4, 4.17, 5, 5, 3.38, np.nan],
    'Average_Rating_Female': [4.28, np.nan, 4.75, 3.3, np.nan, 4, 2.43, 4.67, 3.5, 2.42],
    'Overall_Average': [4.25, 4.25, 4.42, 3.72, 4.4, 4.1, 2.6, 4.8, 3.45, 2.42]
}
df = pd.DataFrame(data)

# Encoding the gender and genre variables as dummy variables
df_encoded = pd.get_dummies(df, columns=['Genre'], drop_first=True)

# Dropping NaN values for the purpose of modeling
df_encoded = df_encoded.dropna()

# Splitting the dataset into training and testing sets
X = df_encoded.drop(['Average_Rating_Male', 'Average_Rating_Female', 'Overall_Average'], axis=1)
y_male = df_encoded['Average_Rating_Male']
y_female = df_encoded['Average_Rating_Female']

X_train_male, X_test_male, y_train_male, y_test_male = train_test_split(X, y_male, test_size=0.2, random_state=42)
X_train_female, X_test_female, y_train_female, y_test_female = train_test_split(X, y_female, test_size=0.2, random_state=42)

# Creating and fitting the linear regression model for male authors
model_male = LinearRegression()
model_male.fit(X_train_male, y_train_male)

# Creating and fitting the linear regression model for female authors
model_female = LinearRegression()
model_female.fit(X_train_female, y_train_female)

# Making predictions
y_pred_male = model_male.predict(X_test_male)
y_pred_female = model_female.predict(X_test_female)

# Evaluating the model
mse_male = mean_squared_error(y_test_male, y_pred_male)
r2_male = r2_score(y_test_male, y_pred_male)

mse_female = mean_squared_error(y_test_female, y_pred_female)
r2_female = r2_score(y_test_female, y_pred_female)

print(f"Male Model - Mean Squared Error: {mse_male}, R-squared: {r2_male}")
print(f"Female Model - Mean Squared Error: {mse_female}, R-squared: {r2_female}")
Leave a Comment