Untitled
unknown
plain_text
a year ago
1.6 kB
4
Indexable
import seaborn as sns import pandas as pd import matplotlib.pyplot as plt from scipy import stats # Create some example data vpca_df = pd.DataFrame({ "Var1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "Var2": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], "Cohort": ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"] }) def box_scatter(x, df): """ This function creates a box scatter plot with statistical testing for comparing distributions between two cohorts. Args: x (str): The column name of the data to be visualized. df (pandas.DataFrame): The data frame containing the data. Returns: None """ sns.set_theme(style="ticks") # Initialize the figure with a logarithmic x axis f, ax = plt.subplots(figsize=(7, 6)) # Plot the data with horizontal boxes sns.boxplot( df, x=x, y="Cohort", hue="Cohort", whis=[0, 100], width=0.6, palette="vlag", ) # Add points to show each observation sns.stripplot(df, x=x, y="Cohort", size=4) # Perform the statistical test (Mann-Whitney U test) # Assuming unequal variance statistic, pval = stats.mannwhitneyu(df[df["Cohort"] == "A"][x], df[df["Cohort"] == "B"][x]) # Tweak the visual presentation ax.xaxis.grid(True) ax.set(ylabel="") sns.despine(trim=True, left=True) # Add the statistical test results to the plot title ax.set_title(f"{x} distribution (p-value: {pval:.4f})") plt.show() # Iterate over each column (excluding the cohort column) and create the box scatter plot with statistical test for col in vpca_df.columns[:-1]: box_scatter(col, vpca_df.copy())
Editor is loading...
Leave a Comment