Untitled
unknown
python
2 years ago
15 kB
4
Indexable
import pandas as pd from datetime import datetime, timedelta from utils.common import url_or_link_generation from utils.custom_logger import logger from services.recommender_services.recommender import Recommender from services.recommender_services.popularity_based_recommender import PopularityRecommender from services.recommender_services.linked_ideas_recommender import LinkedIdeasRecommender from services.recommender_services.tag_based_recommender import TagBasedRecommender class BoostedIdeasRecommender(Recommender): def recommendation(self, model, member_id, member_items, members_dict, ideas_dict, ideas_full_df, workspace_id, community_id, community_idea_ranks, community_url, members_full_df, stats_df, affinity_boost=0.5, rank_boost=0.5, show_output=True, filter_recommendations=True, normalize_scores=False, months=12, k=10, campaign_ids=None, gen_html=False): """ Recommends Ideas to Members by using an Affinity+Reputation Model with Recency filter :param model: trained implicit model :param member_id: member id to recommend ideas to :param member_items: weighted matrix used for training the implicit model :param members_dict: dictionary of all members of the community (k=member index, v=member id) :param ideas_dict: dictionaory of all ideas of the community (k=idea index, v=idea id) :param ideas_full_df: dataframe of all ideas in the community :param community_id: id of the community :param top_ideas_df: dataframe of ranked ideas from community graph :param top_ideas_list: list of ranked ideas from community graph :param community_url: community url to use in the listing :param members_full_df: dataframe of members :param stats_df: dataframe of idea status :param affinity_boost: float value to boost affinity-based recommendations :param rank_boost: float value to boost rank-based recommendations :param show_output: boolean flag to show output/ return output if false :param filter_recommendations: boolean flag to filter items member has interacted with previously :param months: number of months to filter recommended ideas (default = 12) :param k: number of recommended ideas to return :param gen_html: boolean flag to generate html output :return: dataframe of recommended ideas """ member_index = self.get_index_from_dict(member_id, members_dict) if member_index == -1: # if member index is not found return Popular ideas. return self.get_recommendation_with_popularity_recommender(workspace_id, community_id, member_id, top_n=k, ideas_full_df=ideas_full_df, campaign_ids=campaign_ids, filter_own_ideas=True, filter_viewed_ideas=True ) else: logger.info("Recommendation generated with Regular Recommender") recommendations = model.recommend(member_index, member_items[member_index], N=400, filter_already_liked_items=filter_recommendations ) # Get idea ids from the recommendations idea_ids = [ideas_dict.get(iid) for iid in recommendations[0]] # Get scores form recommendations scores = recommendations[1] ideas_rec = {'idea_id': idea_ids, 'score': scores} ideas_rec_df = pd.DataFrame(ideas_rec) top_ideas_list = community_idea_ranks.idea_id.unique().tolist() top_ideas_list = list(map(int, top_ideas_list)) # filter ideas based on rank and affinity boost values if rank_boost == 0: ideas_to_boost = idea_ids elif affinity_boost == 0: ideas_to_boost = top_ideas_list[:300] else: ideas_to_boost = list(set(idea_ids).intersection(top_ideas_list)) result_ids = [] result_titles = [] result_scores = [] idea_urls = [] # if month filter is not 0, include ideas only within the given months if months != 0: days = months * 30 start_date = datetime.today() - timedelta(days=days) end_date = datetime.today() ideas_full_df['createdAt'] = pd.to_datetime(ideas_full_df['createdAt'], errors='coerce') mask = (ideas_full_df['createdAt'] > start_date) & (ideas_full_df['createdAt'] <= end_date) ideas_sel_df = ideas_full_df.loc[mask] recent_idea_ids_list = ideas_sel_df.contributionId.values.tolist() else: recent_idea_ids_list = ideas_to_boost recent_idea_ids_list = list(map(int, recent_idea_ids_list)) # get status of ideas: completed/selected com_sel_ideas_df = stats_df[ (stats_df.workspace_id == workspace_id) & (stats_df['panel_id'] == community_id) & ( (stats_df['completed'] == 1) | (stats_df['selected'] == 1))] com_sel_ideas_list = com_sel_ideas_df.idea_id.unique().tolist() for id in ideas_to_boost: # if idea id is within the date range and has not been marked as completed/selected if int(id) in recent_idea_ids_list and int(id) not in com_sel_ideas_list: result_ids.append(id) try: idea_title = ideas_full_df.loc[ideas_full_df['contributionId'] == int(id), 'title'].iloc[0] except: idea_title = 'None' result_titles.append(idea_title) if rank_boost != 0: try: idea_rank_score = \ community_idea_ranks.loc[community_idea_ranks['idea_id'] == int(id), 'idea_score'].iloc[0] idea_rank_boosted = float(idea_rank_score * rank_boost) except: idea_rank_boosted = 0 if affinity_boost != 0: idea_rec_score = ideas_rec_df.loc[ideas_rec_df['idea_id'] == int(id), 'score'].iloc[0] idea_rec_boosted = float(idea_rec_score * affinity_boost) if rank_boost == 0: rec_rank_score = idea_rec_boosted elif affinity_boost == 0: rec_rank_score = idea_rank_boosted else: rec_rank_score = idea_rec_boosted * idea_rank_boosted result_scores.append(rec_rank_score) idea_url = url_or_link_generation(id, is_member=False, gen_html=gen_html) idea_urls.append(idea_url) if show_output: results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores, 'View Idea': idea_urls} else: results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores, 'Idea URL': idea_urls} # Apply tags impact in recommendation. if result_ids: results_df = pd.DataFrame(results) try: logger.info("Recommendation with Tag consideration.") # Get recommendation from tag based recommender tag_recommender = TagBasedRecommender(workspace_id=workspace_id, community_id=community_id, member_id=member_id ) tag_based_recommendation: pd.DataFrame = tag_recommender.recommend_ideas() if tag_based_recommendation is None: logger.info('No recommendation available with Tag consideration.') raise Exception('No recommendation available with Tag consideration.') # Merge regular recommendation df and tag based recommendation df on 'idea_id' merged_df: pd.DataFrame = pd.merge(results_df, tag_based_recommendation, how='left', left_on='Idea ID', right_on='idea_id') merged_df.fillna(0, inplace=True) # Fill NaN with 0 # Add tag recommendation score with regular recommendation score results_df['Score'] = merged_df.Score + merged_df.score except Exception as e: logger.info("Failed to recommend with tag.") else: # Failed to recommend logger.info(f"There have no recommendation for {member_id}.") return self.get_recommendation_with_popularity_recommender(workspace_id, community_id, member_id, top_n=k, ideas_full_df=ideas_full_df, campaign_ids=campaign_ids, filter_own_ideas=True, filter_viewed_ideas=False ) results_df.sort_values(by='Score', ascending=False, inplace=True) results_df = results_df[['Idea ID', 'Score']] linked_ideas_results_df = self.get_recommendation_considering_linked_ideas(workspace_id, community_id, base_recommendation_df=results_df ) results_df = linked_ideas_results_df.sort_values('Score', ascending=False).reset_index(drop=True) parameters = { 'workspace_id': workspace_id, 'community_id': community_id, 'member_id': member_id, 'ideas_full_df': ideas_full_df, 'results_df': results_df } filtered_result_df: pd.DataFrame = self.filter_self_ideas(**parameters) if campaign_ids: filtered_result_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df, results_df=filtered_result_df, campaign_ids=campaign_ids ) return filtered_result_df.head(k) def get_recommendation_with_popularity_recommender(self, workspace_id: int, community_id: int, member_id: int, top_n: int, ideas_full_df: pd.DataFrame, campaign_ids=None, filter_own_ideas: bool = True, filter_viewed_ideas: bool = False): logger.info("Recommendation generated with Popularity Based Recommender") popularity_recommender = PopularityRecommender(workspace_id=workspace_id, community_id=community_id ) popular_ideas_df = popularity_recommender.recommend_ideas(member_id=member_id, top_n=top_n, filter_own_ideas=filter_own_ideas, filter_viewed_ideas=filter_viewed_ideas) if campaign_ids: popular_ideas_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df, results_df=popular_ideas_df, campaign_ids=campaign_ids ) return popular_ideas_df @staticmethod def get_recommendation_considering_linked_ideas(workspace_id: int, community_id: int, base_recommendation_df: pd.DataFrame) -> pd.DataFrame: logger.info("Recommendation generated considering linked ideas") linked_ideas_recommender = LinkedIdeasRecommender(workspace_id=workspace_id, community_id=community_id, base_recommendation_df=base_recommendation_df) linked_ideas_recommendation__df = linked_ideas_recommender.recommend() return linked_ideas_recommendation__df @staticmethod def filter_self_ideas(workspace_id: int, community_id: int, member_id: int, ideas_full_df: pd.DataFrame, results_df: pd.DataFrame) -> pd.DataFrame: """ Filter the ideas id for a specific member id. """ # Extract self idea id for the specific member self_ideas_id = list(ideas_full_df[(ideas_full_df.workspace_id == workspace_id) & ( ideas_full_df['communityId'] == community_id) & ( ideas_full_df['memberId'] == member_id)]["contributionId"].values) # Remove self ideas from the result data frame. filtered_result_df = results_df[~results_df["Idea ID"].isin(self_ideas_id)].reset_index(drop=True) return filtered_result_df @staticmethod def filter_by_campaign_ids(ideas_full_df: pd.DataFrame, results_df: pd.DataFrame, campaign_ids: list): """ Filter out the ideas which is not in the given campaign. """ filtered_ideas_id = ideas_full_df[ideas_full_df.campaignId.isin(campaign_ids)].contributionId filtered_result_df = results_df[results_df["Idea ID"].isin(filtered_ideas_id)].reset_index(drop=True) return filtered_result_df
Editor is loading...