Untitled
unknown
python
2 years ago
15 kB
5
Indexable
import pandas as pd
from datetime import datetime, timedelta
from utils.common import url_or_link_generation
from utils.custom_logger import logger
from services.recommender_services.recommender import Recommender
from services.recommender_services.popularity_based_recommender import PopularityRecommender
from services.recommender_services.linked_ideas_recommender import LinkedIdeasRecommender
from services.recommender_services.tag_based_recommender import TagBasedRecommender
class BoostedIdeasRecommender(Recommender):
def recommendation(self, model, member_id, member_items, members_dict, ideas_dict, ideas_full_df, workspace_id,
community_id,
community_idea_ranks, community_url, members_full_df, stats_df,
affinity_boost=0.5, rank_boost=0.5, show_output=True, filter_recommendations=True,
normalize_scores=False, months=12, k=10, campaign_ids=None, gen_html=False):
"""
Recommends Ideas to Members by using an Affinity+Reputation Model with Recency filter
:param model: trained implicit model
:param member_id: member id to recommend ideas to
:param member_items: weighted matrix used for training the implicit model
:param members_dict: dictionary of all members of the community (k=member index, v=member id)
:param ideas_dict: dictionaory of all ideas of the community (k=idea index, v=idea id)
:param ideas_full_df: dataframe of all ideas in the community
:param community_id: id of the community
:param top_ideas_df: dataframe of ranked ideas from community graph
:param top_ideas_list: list of ranked ideas from community graph
:param community_url: community url to use in the listing
:param members_full_df: dataframe of members
:param stats_df: dataframe of idea status
:param affinity_boost: float value to boost affinity-based recommendations
:param rank_boost: float value to boost rank-based recommendations
:param show_output: boolean flag to show output/ return output if false
:param filter_recommendations: boolean flag to filter items member has interacted with previously
:param months: number of months to filter recommended ideas (default = 12)
:param k: number of recommended ideas to return
:param gen_html: boolean flag to generate html output
:return: dataframe of recommended ideas
"""
member_index = self.get_index_from_dict(member_id, members_dict)
if member_index == -1:
# if member index is not found return Popular ideas.
return self.get_recommendation_with_popularity_recommender(workspace_id,
community_id,
member_id,
top_n=k,
ideas_full_df=ideas_full_df,
campaign_ids=campaign_ids,
filter_own_ideas=True,
filter_viewed_ideas=True
)
else:
logger.info("Recommendation generated with Regular Recommender")
recommendations = model.recommend(member_index,
member_items[member_index],
N=400,
filter_already_liked_items=filter_recommendations
)
# Get idea ids from the recommendations
idea_ids = [ideas_dict.get(iid) for iid in recommendations[0]]
# Get scores form recommendations
scores = recommendations[1]
ideas_rec = {'idea_id': idea_ids, 'score': scores}
ideas_rec_df = pd.DataFrame(ideas_rec)
top_ideas_list = community_idea_ranks.idea_id.unique().tolist()
top_ideas_list = list(map(int, top_ideas_list))
# filter ideas based on rank and affinity boost values
if rank_boost == 0:
ideas_to_boost = idea_ids
elif affinity_boost == 0:
ideas_to_boost = top_ideas_list[:300]
else:
ideas_to_boost = list(set(idea_ids).intersection(top_ideas_list))
result_ids = []
result_titles = []
result_scores = []
idea_urls = []
# if month filter is not 0, include ideas only within the given months
if months != 0:
days = months * 30
start_date = datetime.today() - timedelta(days=days)
end_date = datetime.today()
ideas_full_df['createdAt'] = pd.to_datetime(ideas_full_df['createdAt'], errors='coerce')
mask = (ideas_full_df['createdAt'] > start_date) & (ideas_full_df['createdAt'] <= end_date)
ideas_sel_df = ideas_full_df.loc[mask]
recent_idea_ids_list = ideas_sel_df.contributionId.values.tolist()
else:
recent_idea_ids_list = ideas_to_boost
recent_idea_ids_list = list(map(int, recent_idea_ids_list))
# get status of ideas: completed/selected
com_sel_ideas_df = stats_df[
(stats_df.workspace_id == workspace_id) & (stats_df['panel_id'] == community_id) & (
(stats_df['completed'] == 1) | (stats_df['selected'] == 1))]
com_sel_ideas_list = com_sel_ideas_df.idea_id.unique().tolist()
for id in ideas_to_boost:
# if idea id is within the date range and has not been marked as completed/selected
if int(id) in recent_idea_ids_list and int(id) not in com_sel_ideas_list:
result_ids.append(id)
try:
idea_title = ideas_full_df.loc[ideas_full_df['contributionId'] == int(id), 'title'].iloc[0]
except:
idea_title = 'None'
result_titles.append(idea_title)
if rank_boost != 0:
try:
idea_rank_score = \
community_idea_ranks.loc[community_idea_ranks['idea_id'] == int(id), 'idea_score'].iloc[0]
idea_rank_boosted = float(idea_rank_score * rank_boost)
except:
idea_rank_boosted = 0
if affinity_boost != 0:
idea_rec_score = ideas_rec_df.loc[ideas_rec_df['idea_id'] == int(id), 'score'].iloc[0]
idea_rec_boosted = float(idea_rec_score * affinity_boost)
if rank_boost == 0:
rec_rank_score = idea_rec_boosted
elif affinity_boost == 0:
rec_rank_score = idea_rank_boosted
else:
rec_rank_score = idea_rec_boosted * idea_rank_boosted
result_scores.append(rec_rank_score)
idea_url = url_or_link_generation(id, is_member=False, gen_html=gen_html)
idea_urls.append(idea_url)
if show_output:
results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores,
'View Idea': idea_urls}
else:
results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores,
'Idea URL': idea_urls}
# Apply tags impact in recommendation.
if result_ids:
results_df = pd.DataFrame(results)
try:
logger.info("Recommendation with Tag consideration.")
# Get recommendation from tag based recommender
tag_recommender = TagBasedRecommender(workspace_id=workspace_id,
community_id=community_id,
member_id=member_id
)
tag_based_recommendation: pd.DataFrame = tag_recommender.recommend_ideas()
if tag_based_recommendation is None:
logger.info('No recommendation available with Tag consideration.')
raise Exception('No recommendation available with Tag consideration.')
# Merge regular recommendation df and tag based recommendation df on 'idea_id'
merged_df: pd.DataFrame = pd.merge(results_df, tag_based_recommendation, how='left', left_on='Idea ID',
right_on='idea_id')
merged_df.fillna(0, inplace=True) # Fill NaN with 0
# Add tag recommendation score with regular recommendation score
results_df['Score'] = merged_df.Score + merged_df.score
except Exception as e:
logger.info("Failed to recommend with tag.")
else:
# Failed to recommend
logger.info(f"There have no recommendation for {member_id}.")
return self.get_recommendation_with_popularity_recommender(workspace_id,
community_id,
member_id,
top_n=k,
ideas_full_df=ideas_full_df,
campaign_ids=campaign_ids,
filter_own_ideas=True,
filter_viewed_ideas=False
)
results_df.sort_values(by='Score', ascending=False, inplace=True)
results_df = results_df[['Idea ID', 'Score']]
linked_ideas_results_df = self.get_recommendation_considering_linked_ideas(workspace_id,
community_id,
base_recommendation_df=results_df
)
results_df = linked_ideas_results_df.sort_values('Score', ascending=False).reset_index(drop=True)
parameters = {
'workspace_id': workspace_id,
'community_id': community_id,
'member_id': member_id,
'ideas_full_df': ideas_full_df,
'results_df': results_df
}
filtered_result_df: pd.DataFrame = self.filter_self_ideas(**parameters)
if campaign_ids:
filtered_result_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df,
results_df=filtered_result_df,
campaign_ids=campaign_ids
)
return filtered_result_df.head(k)
def get_recommendation_with_popularity_recommender(self, workspace_id: int,
community_id: int,
member_id: int,
top_n: int,
ideas_full_df: pd.DataFrame,
campaign_ids=None,
filter_own_ideas: bool = True,
filter_viewed_ideas: bool = False):
logger.info("Recommendation generated with Popularity Based Recommender")
popularity_recommender = PopularityRecommender(workspace_id=workspace_id,
community_id=community_id
)
popular_ideas_df = popularity_recommender.recommend_ideas(member_id=member_id,
top_n=top_n,
filter_own_ideas=filter_own_ideas,
filter_viewed_ideas=filter_viewed_ideas)
if campaign_ids:
popular_ideas_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df,
results_df=popular_ideas_df,
campaign_ids=campaign_ids
)
return popular_ideas_df
@staticmethod
def get_recommendation_considering_linked_ideas(workspace_id: int, community_id: int,
base_recommendation_df: pd.DataFrame) -> pd.DataFrame:
logger.info("Recommendation generated considering linked ideas")
linked_ideas_recommender = LinkedIdeasRecommender(workspace_id=workspace_id, community_id=community_id,
base_recommendation_df=base_recommendation_df)
linked_ideas_recommendation__df = linked_ideas_recommender.recommend()
return linked_ideas_recommendation__df
@staticmethod
def filter_self_ideas(workspace_id: int, community_id: int, member_id: int, ideas_full_df: pd.DataFrame,
results_df: pd.DataFrame) -> pd.DataFrame:
""" Filter the ideas id for a specific member id. """
# Extract self idea id for the specific member
self_ideas_id = list(ideas_full_df[(ideas_full_df.workspace_id == workspace_id) & (
ideas_full_df['communityId'] == community_id) & (
ideas_full_df['memberId'] == member_id)]["contributionId"].values)
# Remove self ideas from the result data frame.
filtered_result_df = results_df[~results_df["Idea ID"].isin(self_ideas_id)].reset_index(drop=True)
return filtered_result_df
@staticmethod
def filter_by_campaign_ids(ideas_full_df: pd.DataFrame, results_df: pd.DataFrame, campaign_ids: list):
"""
Filter out the ideas which is not in the given campaign.
"""
filtered_ideas_id = ideas_full_df[ideas_full_df.campaignId.isin(campaign_ids)].contributionId
filtered_result_df = results_df[results_df["Idea ID"].isin(filtered_ideas_id)].reset_index(drop=True)
return filtered_result_df
Editor is loading...