Untitled

import pandas as pd
from CommentProcessor import CommentProcessor
from LDAModel import ldaModel


class DataProcessor:
    def __init__(self):
        self.comment_processor = CommentProcessor()
        self.lda_model = ldaModel()

    def process_data(self, data):
        all_comments_with_topics = []
        result_df = pd.DataFrame(columns=['Place Name', 'VADER Score', 'Topic', 'Comment'])

        for index, row in data.iterrows():
            place_name = row['place_name']
            comment = row['text']
            # Check if the comment is a non-empty string
            if isinstance(comment, str) and comment.strip():
                # Get comments with topics for the current comment
                try:
                    _, comments_with_topics = self.lda_model.get_comments_with_topics([comment], place_name, self.lda_model.cat_remove)

                    # Calculate VADER score for the current comment
                    positive_words, negative_words, overall_vader_score = self.comment_processor.process_comments([comment], self.lda_model.cat_remove)

                    # Extract the topic of the current comment
                    topic = comments_with_topics['topics'][0] if comments_with_topics['topics'] else None

                    # Add the details to the DataFrame
                    new_row = {'Place Name': place_name,
                               'VADER Score': overall_vader_score,
                               'Topic': topic,
                               'Comment': comment}
                except ValueError:
                    new_row = {'Place Name': place_name,
                               'VADER Score': overall_vader_score,
                               'Topic': topic,
                               'Comment': None}

                result_df = pd.concat([result_df, pd.DataFrame([new_row])], ignore_index=True)
            else:
                print(f"Skipping empty comment for place {place_name}")

        result_df.to_csv('place_vader_topics.csv', index=False)
        print("Place names, VADER scores, and topics saved to place_vader_topics.csv")


if __name__ == '__main__':
    data_processor = DataProcessor()
    data = pd.read_csv('data.csv')
    data_selected = data.sample(n=50, random_state=42)  # Randomly select 50 data points
    data_processor.process_data(data_selected)
Editor is loading...