Untitled

mail@pastecode.io avatar
unknown
python
5 months ago
9.4 kB
4
Indexable
import streamlit as st
import pandas as pd
import os
from langchain.callbacks import StreamlitCallbackHandler
from langchain_groq import ChatGroq
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.agents import AgentExecutor, Tool
from langchain.tools.python.tool import PythonAstREPLTool
from dotenv import load_dotenv

load_dotenv()

def clear_submit():
    st.session_state["submit"] = False

@st.cache_data(ttl="2h")
def load_data(uploaded_file):
    try:
        ext = os.path.splitext(uploaded_file.name)[1][1:].lower()
    except:
        ext = uploaded_file.split(".")[-1]
    
    if ext in ['xls', 'xlsx', 'xlsm', 'xlsb']:
        xls = pd.ExcelFile(uploaded_file)
        sheet_to_df_map = {}
        for sheet_name in xls.sheet_names:
            sheet_to_df_map[sheet_name] = pd.read_excel(xls, sheet_name)
        return sheet_to_df_map
    elif ext == 'csv':
        return {"main": pd.read_csv(uploaded_file)}
    else:
        st.error(f"Unsupported file format: {ext}")
        return None

def create_agent_for_dataframes(llm, dfs):
    tools = []
    for name, df in dfs.items():
        tools.append(
            Tool(
                name=f"python_repl_{name}",
                description=f"A Python REPL for interacting with the '{name}' dataframe. Use this to execute python commands. Input should be a valid python command. When using this tool, you can access the dataframe for '{name}' as 'df'.",
                func=PythonAstREPLTool(locals={"df": df}).run
            )
        )
    
    agent = create_pandas_dataframe_agent(
        llm,
        dfs,
        verbose=True,
        agent_type=AgentType.OPENAI_FUNCTIONS,
        handle_parsing_errors=True,
    )
    
    return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

st.set_page_config(page_title="LangChain: Chat with Excel Data", page_icon="🦜")
st.title("🦜 LangChain: Chat with Excel Data")

uploaded_file = st.file_uploader(
    "Upload an Excel or CSV file",
    type=['csv', 'xls', 'xlsx', 'xlsm', 'xlsb'],
    help="Various File formats are Supported",
    on_change=clear_submit,
)

if not uploaded_file:
    st.warning(
        "This app uses LangChain's `PythonAstREPLTool` which is vulnerable to arbitrary code execution. Please use caution in deploying and sharing this app."
    )

if uploaded_file:
    dfs = load_data(uploaded_file)
    
    if isinstance(dfs, dict):
        st.write(f"Loaded {len(dfs)} table(s) from the file:")
        for sheet_name, df in dfs.items():
            st.write(f"- {sheet_name}: {df.shape[0]} rows, {df.shape[1]} columns")
    
        llm = ChatGroq(
            model="mixtral-8x7b-32768",
            temperature=0,
            max_tokens=1024,
            api_key=os.getenv("GROQ_API_KEY"),
            streaming=True
        )
        
        agent_executor = create_agent_for_dataframes(llm, dfs)
        
        if "messages" not in st.session_state or st.sidebar.button("Clear conversation history"):
            st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you with the data from all tables?"}]

        for msg in st.session_state.messages:
            st.chat_message(msg["role"]).write(msg["content"])

        if prompt := st.chat_input(placeholder="Ask a question about the data..."):
            st.session_state.messages.append({"role": "user", "content": prompt})
            st.chat_message("user").write(prompt)

            with st.chat_message("assistant"):
                st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False)
                response = agent_executor.run(prompt, callbacks=[st_cb])
                st.session_state.messages.append({"role": "assistant", "content": response})
                st.write(response)
    else:
        st.error("Failed to load the file. Please check the file format and try again.")







#################### 





import streamlit as st
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
from langchain.callbacks import StreamlitCallbackHandler
from langchain_groq import ChatGroq
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain.agents.agent_types import AgentType
from langchain.agents import AgentExecutor, Tool
from langchain.tools.python.tool import PythonAstREPLTool
from dotenv import load_dotenv

load_dotenv()

def clear_submit():
    st.session_state["submit"] = False

@st.cache_data(ttl="2h")
def load_data(uploaded_file):
    try:
        ext = os.path.splitext(uploaded_file.name)[1][1:].lower()
    except:
        ext = uploaded_file.split(".")[-1]
    
    if ext in ['xls', 'xlsx', 'xlsm', 'xlsb']:
        xls = pd.ExcelFile(uploaded_file)
        sheet_to_df_map = {}
        for sheet_name in xls.sheet_names:
            sheet_to_df_map[sheet_name] = pd.read_excel(xls, sheet_name)
        return sheet_to_df_map
    elif ext == 'csv':
        return {"main": pd.read_csv(uploaded_file)}
    else:
        st.error(f"Unsupported file format: {ext}")
        return None

def create_json_data(data):
    """Create a JSON string from the provided data."""
    return json.dumps(data, indent=2)

def plot_data(df, x_col, y_col, title):
    """Create a simple plot using matplotlib."""
    plt.figure(figsize=(10, 6))
    plt.plot(df[x_col], df[y_col])
    plt.title(title)
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    return plt

def create_agent_for_dataframes(llm, dfs):
    tools = []
    for name, df in dfs.items():
        tools.append(
            Tool(
                name=f"python_repl_{name}",
                description=f"A Python REPL for interacting with the '{name}' dataframe. Use this to execute python commands. Input should be a valid python command. When using this tool, you can access the dataframe for '{name}' as 'df'.",
                func=PythonAstREPLTool(locals={"df": df}).run
            )
        )
    
    # Add JSON creation tool
    tools.append(
        Tool(
            name="create_json",
            description="Create a JSON string from provided data. Input should be a dictionary or list.",
            func=create_json_data
        )
    )
    
    # Add plotting tool
    tools.append(
        Tool(
            name="create_plot",
            description="Create a plot using matplotlib. Input should be a dictionary with 'df_name', 'x_col', 'y_col', and 'title'.",
            func=lambda x: plot_data(dfs[x['df_name']], x['x_col'], x['y_col'], x['title'])
        )
    )
    
    agent = create_pandas_dataframe_agent(
        llm,
        dfs,
        verbose=True,
        agent_type=AgentType.OPENAI_FUNCTIONS,
        handle_parsing_errors=True,
    )
    
    return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

st.set_page_config(page_title="LangChain: Chat with Excel Data", page_icon="🦜")
st.title("🦜 LangChain: Chat with Excel Data")

uploaded_file = st.file_uploader(
    "Upload an Excel or CSV file",
    type=['csv', 'xls', 'xlsx', 'xlsm', 'xlsb'],
    help="Various File formats are Supported",
    on_change=clear_submit,
)

if not uploaded_file:
    st.warning(
        "This app uses LangChain's `PythonAstREPLTool` which is vulnerable to arbitrary code execution. Please use caution in deploying and sharing this app."
    )

if uploaded_file:
    dfs = load_data(uploaded_file)
    
    if isinstance(dfs, dict):
        st.write(f"Loaded {len(dfs)} table(s) from the file:")
        for sheet_name, df in dfs.items():
            st.write(f"- {sheet_name}: {df.shape[0]} rows, {df.shape[1]} columns")
    
        llm = ChatGroq(
            model="mixtral-8x7b-32768",
            temperature=0,
            max_tokens=1024,
            api_key=os.getenv("GROQ_API_KEY"),
            streaming=True
        )
        
        agent_executor = create_agent_for_dataframes(llm, dfs)
        
        if "messages" not in st.session_state or st.sidebar.button("Clear conversation history"):
            st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you with the data from all tables? I can now create JSON data and plots as well!"}]

        for msg in st.session_state.messages:
            st.chat_message(msg["role"]).write(msg["content"])

        if prompt := st.chat_input(placeholder="Ask a question about the data..."):
            st.session_state.messages.append({"role": "user", "content": prompt})
            st.chat_message("user").write(prompt)

            with st.chat_message("assistant"):
                st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False)
                response = agent_executor.run(prompt, callbacks=[st_cb])
                st.session_state.messages.append({"role": "assistant", "content": response})
                st.write(response)
                
                # Display plot if one was created
                if 'plt' in locals():
                    st.pyplot(plt)
                    plt.close()
    else:
        st.error("Failed to load the file. Please check the file format and try again.")
Leave a Comment