Untitled
unknown
plain_text
2 years ago
1.6 kB
8
Indexable
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
# Create some example data
vpca_df = pd.DataFrame({
"Var1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
"Var2": [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
"Cohort": ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]
})
def box_scatter(x, df):
"""
This function creates a box scatter plot with statistical testing for comparing distributions between two cohorts.
Args:
x (str): The column name of the data to be visualized.
df (pandas.DataFrame): The data frame containing the data.
Returns:
None
"""
sns.set_theme(style="ticks")
# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
# Plot the data with horizontal boxes
sns.boxplot(
df,
x=x,
y="Cohort",
hue="Cohort",
whis=[0, 100],
width=0.6,
palette="vlag",
)
# Add points to show each observation
sns.stripplot(df, x=x, y="Cohort", size=4)
# Perform the statistical test (Mann-Whitney U test)
# Assuming unequal variance
statistic, pval = stats.mannwhitneyu(df[df["Cohort"] == "A"][x], df[df["Cohort"] == "B"][x])
# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)
# Add the statistical test results to the plot title
ax.set_title(f"{x} distribution (p-value: {pval:.4f})")
plt.show()
# Iterate over each column (excluding the cohort column) and create the box scatter plot with statistical test
for col in vpca_df.columns[:-1]:
box_scatter(col, vpca_df.copy())
Editor is loading...
Leave a Comment