Untitled
unknown
plain_text
9 months ago
3.4 kB
5
Indexable
CHUNK_SIZE = 500
remake_data = []
iter_num = 1
for start in range(0, len(rpg_df), CHUNK_SIZE):
end = start + CHUNK_SIZE
chunk_df = rpg_df.iloc[start:end]
for index, row in chunk_df.iterrows():
print("Iteration num", iter_num)
description = row["detailed_description"]
reviews = row["reviews"]
# -------------------------------------------------------
# 1. Skip iteration if "No description is provided".
# Or if it's empty or NaN.
# -------------------------------------------------------
if not isinstance(description, str) or description.strip() == "" or description.strip() == "No description provided":
# Simply continue to the next row
continue
# -------------------------------------------------------
# 2. Classification
# -------------------------------------------------------
print("Classification call")
classification_result_json = classify_remake(description)
try:
classification_dict = parse_llm_response(classification_result_json)
classification = classification_dict.get("classification", "Neither")
time.sleep(2)
except:
classification = "Neither"
# Only proceed if "remake" or "remaster"
if "remake" in classification.lower() or "remaster" in classification.lower():
# -------------------------------------------------------
# 3. Feature Extraction
# -------------------------------------------------------
print("Feature call")
original_title = get_original_title(row["name"]) or ""
feature_extraction_json = extract_features(description, original_title)
try:
features = parse_llm_response(feature_extraction_json)
except:
features = {"Error": feature_extraction_json}
# -------------------------------------------------------
# 4. Sentiment (Handle empty or placeholder reviews)
# -------------------------------------------------------
if (
pd.isna(reviews) # NaN
or not isinstance(reviews, str) # Not a string
or reviews.strip() == "" # Truly empty string
or reviews.strip() == "No review provided"
):
# If there's no meaningful review, you can:
# - set sentiment to None
# - or skip sentiment analysis
print("No Sentiment Call")
sentiment = None
else:
print("Sentiment Call")
# Actual text for sentiment
sentiment = analyze_sentiment(reviews)
# -------------------------------------------------------
# 5. Append final data
# -------------------------------------------------------
remake_data.append({
"title": row["name"],
"classification": classification,
"features": features,
"sentiment": sentiment,
"original_title": original_title,
"description": description
})
iter_num += 1
remake_df = pd.DataFrame(remake_data)
remake_df.to_csv("remakes_final.csv", index=False)
Editor is loading...
Leave a Comment