Untitled
unknown
plain_text
2 years ago
1.6 kB
5
Indexable
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler # Load your data proteomics_data_path = '/Users/clivejay/Desktop/Cleaned_PROTEOMICS.xlsx' ppi_enrichment_data_path = '/Users/clivejay/Desktop/enrichment.all.tsv' # Proteomics data proteomics_data = pd.read_excel(proteomics_data_path) # PPI enrichment data ppi_enrichment_data = pd.read_csv(ppi_enrichment_data_path, sep='\t') # Box plots plt.figure(figsize=(12, 8)) sns.boxplot(data=proteomics_data) plt.title('Box Plots of Protein Expressions') plt.show() # Scatter plots (as an example, we will plot the first two columns) # Update with the column names you are interested in plt.figure(figsize=(10, 6)) sns.scatterplot(x=proteomics_data.columns[1], y=proteomics_data.columns[2], data=proteomics_data) plt.title('Scatter Plot of Protein Expressions') plt.show() # PCA plot scaler = StandardScaler() scaled_data = scaler.fit_transform(proteomics_data.iloc[:, 1:]) # Exclude the first column if it's non-numeric (like protein names) pca = PCA(n_components=2) principal_components = pca.fit_transform(scaled_data) principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2']) plt.figure(figsize=(10, 6)) sns.scatterplot(x='PC1', y='PC2', data=principal_df) plt.title('PCA Plot of Protein Data') plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.show() # Interpret the PCA in the context of PPI enrichment data # You might want to annotate points on the PCA plot based on their significance in the PPI enrichment data
Editor is loading...
Leave a Comment