Untitled

mail@pastecode.io avatar
unknown
python
a month ago
1.9 kB
2
Indexable
Never
from scipy.stats import pointbiserialr, chi2_contingency

# Analysis for Numerical Attributes: Calculating Point-Biserial Correlation Coefficient
numerical_correlation_results = []
for attr in numerical_attributes:
    corr, p_value = pointbiserialr(dataset[attr], dataset['Revenue'])
    numerical_correlation_results.append((attr, corr, p_value))

# Creating a DataFrame for the results
numerical_corr_df = pd.DataFrame(numerical_correlation_results, columns=['Attribute', 'Correlation', 'P-Value'])

# Analysis for Categorical Attributes: Conducting Pearson Chi-Squared Test
categorical_correlation_results = []
for attr in categorical_attributes:
    contingency_table = pd.crosstab(dataset[attr], dataset['Revenue'])
    chi2, p_value, _, _ = chi2_contingency(contingency_table)
    categorical_correlation_results.append((attr, chi2, p_value))

# Creating a DataFrame for the results
categorical_corr_df = pd.DataFrame(categorical_correlation_results, columns=['Attribute', 'Chi-Squared', 'P-Value'])

# Filtering attributes with p-value <= 0.05 for visual representation
numerical_corr_significant = numerical_corr_df[numerical_corr_df['P-Value'] <= 0.05]
categorical_corr_significant = categorical_corr_df[categorical_corr_df['P-Value'] <= 0.05]

# Plotting significant correlations for numerical and categorical attributes
fig, axes = plt.subplots(2, 1, figsize=(10, 12))

# Numerical Attributes Bar Plot
sns.barplot(x='Correlation', y='Attribute', data=numerical_corr_significant, ax=axes[0])
axes[0].set_title('Significant Point-Biserial Correlation Coefficients (Numerical Attributes)')

# Categorical Attributes Bar Plot
sns.barplot(x='Chi-Squared', y='Attribute', data=categorical_corr_significant, ax=axes[1])
axes[1].set_title('Significant Chi-Squared Statistics (Categorical Attributes)')

plt.tight_layout()
plt.show()

# Returning the results for analysis
numerical_corr_df, categorical_corr_df
Leave a Comment