Untitled
unknown
python
a month ago
9.4 kB
3
Indexable
Never
import streamlit as st import pandas as pd import os from langchain.callbacks import StreamlitCallbackHandler from langchain_groq import ChatGroq from langchain_experimental.agents import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType from langchain.agents import AgentExecutor, Tool from langchain.tools.python.tool import PythonAstREPLTool from dotenv import load_dotenv load_dotenv() def clear_submit(): st.session_state["submit"] = False @st.cache_data(ttl="2h") def load_data(uploaded_file): try: ext = os.path.splitext(uploaded_file.name)[1][1:].lower() except: ext = uploaded_file.split(".")[-1] if ext in ['xls', 'xlsx', 'xlsm', 'xlsb']: xls = pd.ExcelFile(uploaded_file) sheet_to_df_map = {} for sheet_name in xls.sheet_names: sheet_to_df_map[sheet_name] = pd.read_excel(xls, sheet_name) return sheet_to_df_map elif ext == 'csv': return {"main": pd.read_csv(uploaded_file)} else: st.error(f"Unsupported file format: {ext}") return None def create_agent_for_dataframes(llm, dfs): tools = [] for name, df in dfs.items(): tools.append( Tool( name=f"python_repl_{name}", description=f"A Python REPL for interacting with the '{name}' dataframe. Use this to execute python commands. Input should be a valid python command. When using this tool, you can access the dataframe for '{name}' as 'df'.", func=PythonAstREPLTool(locals={"df": df}).run ) ) agent = create_pandas_dataframe_agent( llm, dfs, verbose=True, agent_type=AgentType.OPENAI_FUNCTIONS, handle_parsing_errors=True, ) return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True) st.set_page_config(page_title="LangChain: Chat with Excel Data", page_icon="🦜") st.title("🦜 LangChain: Chat with Excel Data") uploaded_file = st.file_uploader( "Upload an Excel or CSV file", type=['csv', 'xls', 'xlsx', 'xlsm', 'xlsb'], help="Various File formats are Supported", on_change=clear_submit, ) if not uploaded_file: st.warning( "This app uses LangChain's `PythonAstREPLTool` which is vulnerable to arbitrary code execution. Please use caution in deploying and sharing this app." ) if uploaded_file: dfs = load_data(uploaded_file) if isinstance(dfs, dict): st.write(f"Loaded {len(dfs)} table(s) from the file:") for sheet_name, df in dfs.items(): st.write(f"- {sheet_name}: {df.shape[0]} rows, {df.shape[1]} columns") llm = ChatGroq( model="mixtral-8x7b-32768", temperature=0, max_tokens=1024, api_key=os.getenv("GROQ_API_KEY"), streaming=True ) agent_executor = create_agent_for_dataframes(llm, dfs) if "messages" not in st.session_state or st.sidebar.button("Clear conversation history"): st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you with the data from all tables?"}] for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["content"]) if prompt := st.chat_input(placeholder="Ask a question about the data..."): st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) with st.chat_message("assistant"): st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False) response = agent_executor.run(prompt, callbacks=[st_cb]) st.session_state.messages.append({"role": "assistant", "content": response}) st.write(response) else: st.error("Failed to load the file. Please check the file format and try again.") #################### import streamlit as st import pandas as pd import os import json import matplotlib.pyplot as plt from langchain.callbacks import StreamlitCallbackHandler from langchain_groq import ChatGroq from langchain_experimental.agents import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType from langchain.agents import AgentExecutor, Tool from langchain.tools.python.tool import PythonAstREPLTool from dotenv import load_dotenv load_dotenv() def clear_submit(): st.session_state["submit"] = False @st.cache_data(ttl="2h") def load_data(uploaded_file): try: ext = os.path.splitext(uploaded_file.name)[1][1:].lower() except: ext = uploaded_file.split(".")[-1] if ext in ['xls', 'xlsx', 'xlsm', 'xlsb']: xls = pd.ExcelFile(uploaded_file) sheet_to_df_map = {} for sheet_name in xls.sheet_names: sheet_to_df_map[sheet_name] = pd.read_excel(xls, sheet_name) return sheet_to_df_map elif ext == 'csv': return {"main": pd.read_csv(uploaded_file)} else: st.error(f"Unsupported file format: {ext}") return None def create_json_data(data): """Create a JSON string from the provided data.""" return json.dumps(data, indent=2) def plot_data(df, x_col, y_col, title): """Create a simple plot using matplotlib.""" plt.figure(figsize=(10, 6)) plt.plot(df[x_col], df[y_col]) plt.title(title) plt.xlabel(x_col) plt.ylabel(y_col) return plt def create_agent_for_dataframes(llm, dfs): tools = [] for name, df in dfs.items(): tools.append( Tool( name=f"python_repl_{name}", description=f"A Python REPL for interacting with the '{name}' dataframe. Use this to execute python commands. Input should be a valid python command. When using this tool, you can access the dataframe for '{name}' as 'df'.", func=PythonAstREPLTool(locals={"df": df}).run ) ) # Add JSON creation tool tools.append( Tool( name="create_json", description="Create a JSON string from provided data. Input should be a dictionary or list.", func=create_json_data ) ) # Add plotting tool tools.append( Tool( name="create_plot", description="Create a plot using matplotlib. Input should be a dictionary with 'df_name', 'x_col', 'y_col', and 'title'.", func=lambda x: plot_data(dfs[x['df_name']], x['x_col'], x['y_col'], x['title']) ) ) agent = create_pandas_dataframe_agent( llm, dfs, verbose=True, agent_type=AgentType.OPENAI_FUNCTIONS, handle_parsing_errors=True, ) return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True) st.set_page_config(page_title="LangChain: Chat with Excel Data", page_icon="🦜") st.title("🦜 LangChain: Chat with Excel Data") uploaded_file = st.file_uploader( "Upload an Excel or CSV file", type=['csv', 'xls', 'xlsx', 'xlsm', 'xlsb'], help="Various File formats are Supported", on_change=clear_submit, ) if not uploaded_file: st.warning( "This app uses LangChain's `PythonAstREPLTool` which is vulnerable to arbitrary code execution. Please use caution in deploying and sharing this app." ) if uploaded_file: dfs = load_data(uploaded_file) if isinstance(dfs, dict): st.write(f"Loaded {len(dfs)} table(s) from the file:") for sheet_name, df in dfs.items(): st.write(f"- {sheet_name}: {df.shape[0]} rows, {df.shape[1]} columns") llm = ChatGroq( model="mixtral-8x7b-32768", temperature=0, max_tokens=1024, api_key=os.getenv("GROQ_API_KEY"), streaming=True ) agent_executor = create_agent_for_dataframes(llm, dfs) if "messages" not in st.session_state or st.sidebar.button("Clear conversation history"): st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you with the data from all tables? I can now create JSON data and plots as well!"}] for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["content"]) if prompt := st.chat_input(placeholder="Ask a question about the data..."): st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) with st.chat_message("assistant"): st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False) response = agent_executor.run(prompt, callbacks=[st_cb]) st.session_state.messages.append({"role": "assistant", "content": response}) st.write(response) # Display plot if one was created if 'plt' in locals(): st.pyplot(plt) plt.close() else: st.error("Failed to load the file. Please check the file format and try again.")
Leave a Comment