Untitled

import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
import underthesea  





@st.cache_resource
def load_model():
    model_id = "wonrax/phobert-base-vietnamese-sentiment"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(model_id)

    return tokenizer, model


def analyze_sentiment(text):
    tokenizer, model = load_model()
    inputs = tokenizer(
        text, return_tensors="pt", truncation=True, max_length=512, padding=True
    )
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=1)
    return predictions.numpy()[0]


def preprocess_model_input_str(text, video_title=""):
    regex_pattern = (
        r"(http|www).*(\/|\/\/)\s?|[-()+*&^%$#!@\";<>\/\.\?]{3,}|\n|#.*|\w*:"
    )
    clean_str = (
        re.sub(r"\s{2,}", " ", re.sub(regex_pattern, " ", text))
        .replace(video_title, "")
        .strip()
    )
    clean_str = underthesea.word_tokenize(clean_str, format="text")
    return clean_str


def get_desc_chat(video_url):
    st.write(f"Analyzing video: {video_url}")
    video_description = """Thị trường chứng khoán tiếp tục lình xình trong phiên sáng khi tỷ giá vẫn neo ở mức
    cao.\n\nTỷ giá và chứng khoán luôn có mối quan hệ nghịch chiều. Tỷ giá tăng thì chứng khoán thường điều chỉnh.
    Trong giai đoạn cuối năm, nhiều yếu tố đang đưa tỷ giá lên mức cao, thị trường chứng khoán cũng có những ảnh hưởng nhất định.
    \n\nCùng phân tích về cơ hội và thách thức của tỷ giá tăng trong chương trình Khớp lệnh
    - Tài chính thịnh vượng hôm nay với chủ đề “Đô” vật.\n\n#KhoplenhVPBankS #Taichinhthinhvuong #Vimottuonglaithinhvuong"""

    video_live_chat = [
        "MBS xin cảm ơn quý nhà đầu tư đã theo dõi và để lại các câu hỏi cho các chuyên gia ",
        "Cho em hỏi mã HAG",
        "Mọi người cùng vào tương tác nhé. Welcome",
        "Thị trường xanh rồi",
        "Có ai có câu hỏi dành cho chuyên gia không ạ?",
    ]
    video_title = "KHỚP LỆNH 28/10/2024: ĐÔ VẬT"

    clean_description = preprocess_model_input_str(video_description, video_title)
    clean_live_chat = [
        preprocess_model_input_str(live_chat) for live_chat in video_live_chat
    ]

    return clean_description, clean_live_chat




def main():
    st.title("Content Sentiment Analysis")

    # User input for video URL
    video_url = st.text_input(label="Enter video URL")


    if st.button("Analyze Content"):
        # Pass both video_url and playlist_id to get_desc_chat
        video_description, video_live_chat = get_desc_chat(video_url, playlist_id)

        sentiment_labels = ["Negative", "Neutral", "Positive"]

        # Analyze comments
        comments_results = []
        for comment in video_live_chat:
            scores = analyze_sentiment(comment)
            comments_results.append(
                {
                    "Text": comment,
                    "Sentiment": sentiment_labels[np.argmax(scores)],
                    **{
                        label: scores[i] * 100
                        for i, label in enumerate(sentiment_labels)
                    },
                }
            )

        # Analyze subtitle
        description_score = analyze_sentiment(video_description) * 100

        # Create visualization
        fig = make_subplots(
            rows=2, cols=1, subplot_titles=("Description Analysis", "Comments Analysis")
        )

        # Subtitle visualization
        fig.add_trace(
            go.Bar(
                name="Description Sentiment", x=sentiment_labels, y=description_score
            ),
            row=1,
            col=1,
        )

        # Comments visualization
        for i, label in enumerate(sentiment_labels):
            scores = [result[label] for result in comments_results]
            fig.add_trace(
                go.Bar(name=label, x=list(range(1, len(scores) + 1)), y=scores),
                row=2,
                col=1,
            )

        fig.update_layout(height=700, barmode="group")
        st.plotly_chart(fig)

        # Display results
        st.subheader("Description Analysis")
        st.write(
            f"**Overall Sentiment:** {sentiment_labels[np.argmax(description_score)]}"
        )
        st.write(
            f"**Scores:** {', '.join([f'{label}: {description_score[i]:.2f}%' for i, label in enumerate(sentiment_labels)])}"
        )
        st.write(f"**Text:** {video_description}")

        st.subheader("Comments Analysis")
        comments_df = pd.DataFrame(comments_results)
        st.dataframe(comments_df)


# def main():
#     st.title("Content Sentiment Analysis")
#     # label, value="", max_chars=None, key=None,
#     # type="default", help=None, autocomplete=None, on_change=None,
#     # args=None, kwargs=None, *, placeholder=None, disabled=False, label_visibility="visible")
#
#     video_url = st.text_input(label="Enter video url")
#     if st.button("Analyze Content"):
#         video_description, video_live_chat = get_desc_chat(video_url)
#         sentiment_labels = [
#             "Negative",
#             "Neutral",
#             "Positive",
#         ]
#
#         # Analyze comments
#         comments_results = []
#         for comment in video_live_chat:
#             scores = analyze_sentiment(comment)
#             comments_results.append(
#                 {
#                     "Text": comment,
#                     "Sentiment": sentiment_labels[np.argmax(scores)],
#                     **{
#                         label: scores[i] * 100
#                         for i, label in enumerate(sentiment_labels)
#                     },
#                 }
#             )
#
#         # Analyze subtitle
#         description_score = analyze_sentiment(video_description) * 100
#
#         # Create visualization
#         fig = make_subplots(
#             rows=2, cols=1, subplot_titles=("Description Analysis", "Comments Analysis")
#         )
#
#         # Subtitle visualization
#         fig.add_trace(
#             go.Bar(
#                 name="Description Sentiment", x=sentiment_labels, y=description_score
#             ),
#             row=1,
#             col=1,
#         )
#
#         # Comments visualization
#         for i, label in enumerate(sentiment_labels):
#             scores = [result[label] for result in comments_results]
#             fig.add_trace(
#                 go.Bar(name=label, x=list(range(1, len(scores) + 1)), y=scores),
#                 row=2,
#                 col=1,
#             )
#
#         fig.update_layout(height=700, barmode="group")
#         st.plotly_chart(fig)
#
#         # Display results
#         st.subheader("Description Analysis")
#         st.write(
#             f"**Overall Sentiment:** {sentiment_labels[np.argmax(description_score)]}"
#         )
#         st.write(
#             f"**Scores:** {', '.join([f'{label}: {description_score[i]:.2f}%' for i, label in enumerate(sentiment_labels)])}"
#         )
#         st.write(f"**Text:** {video_description}")
#
#         st.subheader("Comments Analysis")
#         comments_df = pd.DataFrame(comments_results)
#         st.dataframe(comments_df)


if __name__ == "__main__":
    main()
Editor is loading...