Untitled
CHUNK_SIZE = 500 remake_data = [] iter_num = 1 for start in range(0, len(rpg_df), CHUNK_SIZE): end = start + CHUNK_SIZE chunk_df = rpg_df.iloc[start:end] for index, row in chunk_df.iterrows(): print("Iteration num", iter_num) description = row["detailed_description"] reviews = row["reviews"] # ------------------------------------------------------- # 1. Skip iteration if "No description is provided". # Or if it's empty or NaN. # ------------------------------------------------------- if not isinstance(description, str) or description.strip() == "" or description.strip() == "No description provided": # Simply continue to the next row continue # ------------------------------------------------------- # 2. Classification # ------------------------------------------------------- print("Classification call") classification_result_json = classify_remake(description) try: classification_dict = parse_llm_response(classification_result_json) classification = classification_dict.get("classification", "Neither") time.sleep(2) except: classification = "Neither" # Only proceed if "remake" or "remaster" if "remake" in classification.lower() or "remaster" in classification.lower(): # ------------------------------------------------------- # 3. Feature Extraction # ------------------------------------------------------- print("Feature call") original_title = get_original_title(row["name"]) or "" feature_extraction_json = extract_features(description, original_title) try: features = parse_llm_response(feature_extraction_json) except: features = {"Error": feature_extraction_json} # ------------------------------------------------------- # 4. Sentiment (Handle empty or placeholder reviews) # ------------------------------------------------------- if ( pd.isna(reviews) # NaN or not isinstance(reviews, str) # Not a string or reviews.strip() == "" # Truly empty string or reviews.strip() == "No review provided" ): # If there's no meaningful review, you can: # - set sentiment to None # - or skip sentiment analysis print("No Sentiment Call") sentiment = None else: print("Sentiment Call") # Actual text for sentiment sentiment = analyze_sentiment(reviews) # ------------------------------------------------------- # 5. Append final data # ------------------------------------------------------- remake_data.append({ "title": row["name"], "classification": classification, "features": features, "sentiment": sentiment, "original_title": original_title, "description": description }) iter_num += 1 remake_df = pd.DataFrame(remake_data) remake_df.to_csv("remakes_final.csv", index=False)
Leave a Comment