Untitled
unknown
plain_text
2 years ago
2.4 kB
8
Indexable
import pandas as pd
from CommentProcessor import CommentProcessor
from LDAModel import ldaModel
class DataProcessor:
def __init__(self):
self.comment_processor = CommentProcessor()
self.lda_model = ldaModel()
def process_data(self, data):
all_comments_with_topics = []
result_df = pd.DataFrame(columns=['Place Name', 'VADER Score', 'Topic', 'Comment'])
for index, row in data.iterrows():
place_name = row['place_name']
comment = row['text']
# Check if the comment is a non-empty string
if isinstance(comment, str) and comment.strip():
# Get comments with topics for the current comment
try:
_, comments_with_topics = self.lda_model.get_comments_with_topics([comment], place_name, self.lda_model.cat_remove)
# Calculate VADER score for the current comment
positive_words, negative_words, overall_vader_score = self.comment_processor.process_comments([comment], self.lda_model.cat_remove)
# Extract the topic of the current comment
topic = comments_with_topics['topics'][0] if comments_with_topics['topics'] else None
# Add the details to the DataFrame
new_row = {'Place Name': place_name,
'VADER Score': overall_vader_score,
'Topic': topic,
'Comment': comment}
except ValueError:
new_row = {'Place Name': place_name,
'VADER Score': overall_vader_score,
'Topic': topic,
'Comment': None}
result_df = pd.concat([result_df, pd.DataFrame([new_row])], ignore_index=True)
else:
print(f"Skipping empty comment for place {place_name}")
result_df.to_csv('place_vader_topics.csv', index=False)
print("Place names, VADER scores, and topics saved to place_vader_topics.csv")
if __name__ == '__main__':
data_processor = DataProcessor()
data = pd.read_csv('data.csv')
data_selected = data.sample(n=50, random_state=42) # Randomly select 50 data points
data_processor.process_data(data_selected)
Editor is loading...
Leave a Comment