Untitled
unknown
python
a year ago
1.9 kB
4
Indexable
from scipy.stats import pointbiserialr, chi2_contingency # Analysis for Numerical Attributes: Calculating Point-Biserial Correlation Coefficient numerical_correlation_results = [] for attr in numerical_attributes: corr, p_value = pointbiserialr(dataset[attr], dataset['Revenue']) numerical_correlation_results.append((attr, corr, p_value)) # Creating a DataFrame for the results numerical_corr_df = pd.DataFrame(numerical_correlation_results, columns=['Attribute', 'Correlation', 'P-Value']) # Analysis for Categorical Attributes: Conducting Pearson Chi-Squared Test categorical_correlation_results = [] for attr in categorical_attributes: contingency_table = pd.crosstab(dataset[attr], dataset['Revenue']) chi2, p_value, _, _ = chi2_contingency(contingency_table) categorical_correlation_results.append((attr, chi2, p_value)) # Creating a DataFrame for the results categorical_corr_df = pd.DataFrame(categorical_correlation_results, columns=['Attribute', 'Chi-Squared', 'P-Value']) # Filtering attributes with p-value <= 0.05 for visual representation numerical_corr_significant = numerical_corr_df[numerical_corr_df['P-Value'] <= 0.05] categorical_corr_significant = categorical_corr_df[categorical_corr_df['P-Value'] <= 0.05] # Plotting significant correlations for numerical and categorical attributes fig, axes = plt.subplots(2, 1, figsize=(10, 12)) # Numerical Attributes Bar Plot sns.barplot(x='Correlation', y='Attribute', data=numerical_corr_significant, ax=axes[0]) axes[0].set_title('Significant Point-Biserial Correlation Coefficients (Numerical Attributes)') # Categorical Attributes Bar Plot sns.barplot(x='Chi-Squared', y='Attribute', data=categorical_corr_significant, ax=axes[1]) axes[1].set_title('Significant Chi-Squared Statistics (Categorical Attributes)') plt.tight_layout() plt.show() # Returning the results for analysis numerical_corr_df, categorical_corr_df
Editor is loading...
Leave a Comment