Untitled
unknown
plain_text
7 months ago
2.4 kB
1
Indexable
Never
import pandas as pd from CommentProcessor import CommentProcessor from LDAModel import ldaModel class DataProcessor: def __init__(self): self.comment_processor = CommentProcessor() self.lda_model = ldaModel() def process_data(self, data): all_comments_with_topics = [] result_df = pd.DataFrame(columns=['Place Name', 'VADER Score', 'Topic', 'Comment']) for index, row in data.iterrows(): place_name = row['place_name'] comment = row['text'] # Check if the comment is a non-empty string if isinstance(comment, str) and comment.strip(): # Get comments with topics for the current comment try: _, comments_with_topics = self.lda_model.get_comments_with_topics([comment], place_name, self.lda_model.cat_remove) # Calculate VADER score for the current comment positive_words, negative_words, overall_vader_score = self.comment_processor.process_comments([comment], self.lda_model.cat_remove) # Extract the topic of the current comment topic = comments_with_topics['topics'][0] if comments_with_topics['topics'] else None # Add the details to the DataFrame new_row = {'Place Name': place_name, 'VADER Score': overall_vader_score, 'Topic': topic, 'Comment': comment} except ValueError: new_row = {'Place Name': place_name, 'VADER Score': overall_vader_score, 'Topic': topic, 'Comment': None} result_df = pd.concat([result_df, pd.DataFrame([new_row])], ignore_index=True) else: print(f"Skipping empty comment for place {place_name}") result_df.to_csv('place_vader_topics.csv', index=False) print("Place names, VADER scores, and topics saved to place_vader_topics.csv") if __name__ == '__main__': data_processor = DataProcessor() data = pd.read_csv('data.csv') data_selected = data.sample(n=50, random_state=42) # Randomly select 50 data points data_processor.process_data(data_selected)
Leave a Comment