Untitled

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load your data
proteomics_data_path = '/Users/clivejay/Desktop/Cleaned_PROTEOMICS.xlsx'
ppi_enrichment_data_path = '/Users/clivejay/Desktop/enrichment.all.tsv'

# Proteomics data
proteomics_data = pd.read_excel(proteomics_data_path)

# PPI enrichment data
ppi_enrichment_data = pd.read_csv(ppi_enrichment_data_path, sep='\t')

# Box plots
plt.figure(figsize=(12, 8))
sns.boxplot(data=proteomics_data)
plt.title('Box Plots of Protein Expressions')
plt.show()

# Scatter plots (as an example, we will plot the first two columns)
# Update with the column names you are interested in
plt.figure(figsize=(10, 6))
sns.scatterplot(x=proteomics_data.columns[1], y=proteomics_data.columns[2], data=proteomics_data)
plt.title('Scatter Plot of Protein Expressions')
plt.show()

# PCA plot
scaler = StandardScaler()
scaled_data = scaler.fit_transform(proteomics_data.iloc[:, 1:])  # Exclude the first column if it's non-numeric (like protein names)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(scaled_data)
principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])

plt.figure(figsize=(10, 6))
sns.scatterplot(x='PC1', y='PC2', data=principal_df)
plt.title('PCA Plot of Protein Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# Interpret the PCA in the context of PPI enrichment data
# You might want to annotate points on the PCA plot based on their significance in the PPI enrichment data
Editor is loading...