dataframe.py
unknown
plain_text
a year ago
2.3 kB
10
Indexable
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
# Function to process CSV files
def process_csv(files):
dfs = []
for file in files:
df = pd.read_csv(file)
dfs.append(df)
combined_df = pd.concat(dfs, ignore_index=True)
return combined_df, dfs
# Function to process Excel files
def process_excel(files):
dfs = []
for file in files:
df = pd.read_excel(file)
dfs.append(df)
combined_df = pd.concat(dfs, ignore_index=True)
return combined_df, dfs
# Function to process TXT files
def process_texts(files):
texts = []
for file in files:
file_content = file.read().decode('utf-8')
text = Document(page_content=file_content)
texts.append(text)
combined_text = "\n\n".join(text.page_content for text in texts)
return combined_text, texts
# Function to process PDF files
def process_pdfs(files):
texts = []
for file in files:
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
texts.append(Document(page_content=text))
combined_text = "\n\n".join(text.page_content for text in texts)
return combined_text, texts
# Function to generate insights from a dataframe
def generate_insights(query, df):
response = ""
if query.lower() == "show dataframe":
st.dataframe(df)
response = "Displayed the dataframe."
elif query.lower() == "show summary":
st.write(df.describe())
response = "Displayed the statistical summary of the dataframe."
else:
try:
if "plot" in query.lower():
col = query.split(" ")[-1]
if col in df.columns:
plt.figure(figsize=(10, 5))
sns.histplot(df[col], kde=True)
st.pyplot(plt)
response = f"Displayed the distribution plot for {col}."
else:
response = f"Column {col} not found in the dataframe."
else:
response = f"Query '{query}' not recognized."
except Exception as e:
response = f"An error occurred: {e}"
return response
Editor is loading...
Leave a Comment