Untitled
user_3839718
python
a year ago
1.5 kB
8
Indexable
import pandas as pd from lda_topic_model import LDATopicModeling class DataProcessor: def __init__(self, data_path="data.csv", n=50, random_state=42): self.data = pd.read_csv(data_path).sample(n=n, random_state=random_state) self.result_df = pd.DataFrame(columns=['place_name', 'VADER_score', 'topic', 'comment']) self.lda_model = LDATopicModeling(dev_mode=True) def process_data(self): self.data["preprocess_fda_text"] = self.data["text"].apply(lambda x: self.lda_model.preprocess_fda(x)) df = pd.DataFrame(columns=['place_name', 'vader_score', 'topic', 'text']) for place_name, grouped_data in self.data.groupby('place_name'): grouped_data["topic"] = grouped_data["preprocess_fda_text"].apply(lambda x: self.lda_model.get_comments_with_topics(x)) grouped_data["vader_score"] = grouped_data["text"].apply(lambda x: self.lda_model.calculate_vader_score(x)["vader_score"]) grouped_data = grouped_data[['place_name', 'vader_score', 'topic', 'text']] df = df._append(grouped_data) df.to_csv("processed_data.csv", index=False) if __name__ == '__main__': import ssl try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: pass else: ssl._create_default_https_context = _create_unverified_https_context DataProcessor(data_path="data.csv", n=100, random_state=42).process_data()
Editor is loading...
Leave a Comment