Untitled

CHUNK_SIZE = 500
remake_data = []
iter_num = 1
for start in range(0, len(rpg_df), CHUNK_SIZE):
    end = start + CHUNK_SIZE
    chunk_df = rpg_df.iloc[start:end]

    for index, row in chunk_df.iterrows():
        print("Iteration num", iter_num)
        description = row["detailed_description"]
        reviews = row["reviews"]
        # -------------------------------------------------------
        # 1. Skip iteration if "No description is provided".
        #    Or if it's empty or NaN.
        # -------------------------------------------------------
        if not isinstance(description, str) or description.strip() == "" or description.strip() == "No description provided":
            # Simply continue to the next row
            continue

        # -------------------------------------------------------
        # 2. Classification
        # -------------------------------------------------------
        print("Classification call")
        classification_result_json = classify_remake(description)
        try:
            classification_dict = parse_llm_response(classification_result_json)
            classification = classification_dict.get("classification", "Neither")
            time.sleep(2)
        except:
            classification = "Neither"

        # Only proceed if "remake" or "remaster"
        if "remake" in classification.lower() or "remaster" in classification.lower():

            # -------------------------------------------------------
            # 3. Feature Extraction
            # -------------------------------------------------------
            print("Feature call")
            original_title = get_original_title(row["name"]) or ""
            feature_extraction_json = extract_features(description, original_title)
            try:
                features = parse_llm_response(feature_extraction_json)
            except:
                features = {"Error": feature_extraction_json}

            # -------------------------------------------------------
            # 4. Sentiment (Handle empty or placeholder reviews)
            # -------------------------------------------------------
            if (
                    pd.isna(reviews)  # NaN
                    or not isinstance(reviews, str)  # Not a string
                    or reviews.strip() == ""  # Truly empty string
                    or reviews.strip() == "No review provided"
            ):
                # If there's no meaningful review, you can:
                #   - set sentiment to None
                #   - or skip sentiment analysis
                print("No Sentiment Call")
                sentiment = None
            else:
                print("Sentiment Call")
                # Actual text for sentiment
                sentiment = analyze_sentiment(reviews)

            # -------------------------------------------------------
            # 5. Append final data
            # -------------------------------------------------------
            remake_data.append({
                "title": row["name"],
                "classification": classification,
                "features": features,
                "sentiment": sentiment,
                "original_title": original_title,
                "description": description
            })
        iter_num += 1

remake_df = pd.DataFrame(remake_data)
remake_df.to_csv("remakes_final.csv", index=False)
Editor is loading...