Untitled

mail@pastecode.io avatar
unknown
python
a year ago
15 kB
1
Indexable
Never
import pandas as pd
from datetime import datetime, timedelta

from utils.common import url_or_link_generation
from utils.custom_logger import logger
from services.recommender_services.recommender import Recommender
from services.recommender_services.popularity_based_recommender import PopularityRecommender
from services.recommender_services.linked_ideas_recommender import LinkedIdeasRecommender
from services.recommender_services.tag_based_recommender import TagBasedRecommender


class BoostedIdeasRecommender(Recommender):

    def recommendation(self, model, member_id, member_items, members_dict, ideas_dict, ideas_full_df, workspace_id,
                       community_id,
                       community_idea_ranks, community_url, members_full_df, stats_df,
                       affinity_boost=0.5, rank_boost=0.5, show_output=True, filter_recommendations=True,
                       normalize_scores=False, months=12, k=10, campaign_ids=None, gen_html=False):
        """
        Recommends Ideas to Members by using an Affinity+Reputation Model with Recency filter
        :param model: trained implicit model
        :param member_id: member id to recommend ideas to
        :param member_items: weighted matrix used for training the implicit model
        :param members_dict: dictionary of all members of the community (k=member index, v=member id)
        :param ideas_dict: dictionaory of all ideas of the community (k=idea index, v=idea id)
        :param ideas_full_df: dataframe of all ideas in the community
        :param community_id: id of the community
        :param top_ideas_df: dataframe of ranked ideas from community graph
        :param top_ideas_list: list of ranked ideas from community graph
        :param community_url: community url to use in the listing
        :param members_full_df: dataframe of members
        :param stats_df: dataframe of idea status
        :param affinity_boost: float value to boost affinity-based recommendations
        :param rank_boost: float value to boost rank-based recommendations
        :param show_output: boolean flag to show output/ return output if false
        :param filter_recommendations: boolean flag to filter items member has interacted with previously
        :param months: number of months to filter recommended ideas (default = 12)
        :param k: number of recommended ideas to return
        :param gen_html: boolean flag to generate html output
        :return: dataframe of recommended ideas
        """
        member_index = self.get_index_from_dict(member_id, members_dict)

        if member_index == -1:
            # if member index is not found return Popular ideas.
            return self.get_recommendation_with_popularity_recommender(workspace_id,
                                                                       community_id,
                                                                       member_id,
                                                                       top_n=k,
                                                                       ideas_full_df=ideas_full_df,
                                                                       campaign_ids=campaign_ids,
                                                                       filter_own_ideas=True,
                                                                       filter_viewed_ideas=True
                                                                       )
        else:
            logger.info("Recommendation generated with Regular Recommender")
            recommendations = model.recommend(member_index,
                                              member_items[member_index],
                                              N=400,
                                              filter_already_liked_items=filter_recommendations
                                              )

        # Get idea ids from the recommendations
        idea_ids = [ideas_dict.get(iid) for iid in recommendations[0]]

        # Get scores form recommendations
        scores = recommendations[1]

        ideas_rec = {'idea_id': idea_ids, 'score': scores}
        ideas_rec_df = pd.DataFrame(ideas_rec)

        top_ideas_list = community_idea_ranks.idea_id.unique().tolist()
        top_ideas_list = list(map(int, top_ideas_list))

        # filter ideas based on rank and affinity boost values
        if rank_boost == 0:
            ideas_to_boost = idea_ids
        elif affinity_boost == 0:
            ideas_to_boost = top_ideas_list[:300]
        else:
            ideas_to_boost = list(set(idea_ids).intersection(top_ideas_list))

        result_ids = []
        result_titles = []
        result_scores = []
        idea_urls = []

        # if month filter is not 0, include ideas only within the given months
        if months != 0:
            days = months * 30
            start_date = datetime.today() - timedelta(days=days)
            end_date = datetime.today()
            ideas_full_df['createdAt'] = pd.to_datetime(ideas_full_df['createdAt'], errors='coerce')
            mask = (ideas_full_df['createdAt'] > start_date) & (ideas_full_df['createdAt'] <= end_date)
            ideas_sel_df = ideas_full_df.loc[mask]
            recent_idea_ids_list = ideas_sel_df.contributionId.values.tolist()
        else:
            recent_idea_ids_list = ideas_to_boost
            recent_idea_ids_list = list(map(int, recent_idea_ids_list))

        # get status of ideas: completed/selected
        com_sel_ideas_df = stats_df[
            (stats_df.workspace_id == workspace_id) & (stats_df['panel_id'] == community_id) & (
                    (stats_df['completed'] == 1) | (stats_df['selected'] == 1))]
        com_sel_ideas_list = com_sel_ideas_df.idea_id.unique().tolist()

        for id in ideas_to_boost:
            # if idea id is within the date range and has not been marked as completed/selected
            if int(id) in recent_idea_ids_list and int(id) not in com_sel_ideas_list:
                result_ids.append(id)
                try:
                    idea_title = ideas_full_df.loc[ideas_full_df['contributionId'] == int(id), 'title'].iloc[0]
                except:
                    idea_title = 'None'
                result_titles.append(idea_title)

                if rank_boost != 0:
                    try:
                        idea_rank_score = \
                            community_idea_ranks.loc[community_idea_ranks['idea_id'] == int(id), 'idea_score'].iloc[0]
                        idea_rank_boosted = float(idea_rank_score * rank_boost)
                    except:
                        idea_rank_boosted = 0

                if affinity_boost != 0:
                    idea_rec_score = ideas_rec_df.loc[ideas_rec_df['idea_id'] == int(id), 'score'].iloc[0]
                    idea_rec_boosted = float(idea_rec_score * affinity_boost)

                if rank_boost == 0:
                    rec_rank_score = idea_rec_boosted
                elif affinity_boost == 0:
                    rec_rank_score = idea_rank_boosted
                else:
                    rec_rank_score = idea_rec_boosted * idea_rank_boosted

                result_scores.append(rec_rank_score)
                idea_url = url_or_link_generation(id, is_member=False, gen_html=gen_html)
                idea_urls.append(idea_url)

        if show_output:
            results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores,
                       'View Idea': idea_urls}
        else:
            results = {'Idea ID': result_ids, 'Idea Title': result_titles, 'Score': result_scores,
                       'Idea URL': idea_urls}

        # Apply tags impact in recommendation.
        if result_ids:
            results_df = pd.DataFrame(results)
            try:
                logger.info("Recommendation with Tag consideration.")

                # Get recommendation from tag based recommender
                tag_recommender = TagBasedRecommender(workspace_id=workspace_id,
                                                      community_id=community_id,
                                                      member_id=member_id
                                                      )
                tag_based_recommendation: pd.DataFrame = tag_recommender.recommend_ideas()
                if tag_based_recommendation is None:
                    logger.info('No recommendation available with Tag consideration.')
                    raise Exception('No recommendation available with Tag consideration.')

                # Merge regular recommendation df and tag based recommendation df on 'idea_id'
                merged_df: pd.DataFrame = pd.merge(results_df, tag_based_recommendation, how='left', left_on='Idea ID',
                                                   right_on='idea_id')
                merged_df.fillna(0, inplace=True)  # Fill NaN with 0

                # Add tag recommendation score with regular recommendation score
                results_df['Score'] = merged_df.Score + merged_df.score
            except Exception as e:
                logger.info("Failed to recommend with tag.")
        else:
            # Failed to recommend
            logger.info(f"There have no recommendation for {member_id}.")
            return self.get_recommendation_with_popularity_recommender(workspace_id,
                                                                       community_id,
                                                                       member_id,
                                                                       top_n=k,
                                                                       ideas_full_df=ideas_full_df,
                                                                       campaign_ids=campaign_ids,
                                                                       filter_own_ideas=True,
                                                                       filter_viewed_ideas=False
                                                                       )
        results_df.sort_values(by='Score', ascending=False, inplace=True)
        results_df = results_df[['Idea ID', 'Score']]
        linked_ideas_results_df = self.get_recommendation_considering_linked_ideas(workspace_id,
                                                                                   community_id,
                                                                                   base_recommendation_df=results_df
                                                                                   )
        results_df = linked_ideas_results_df.sort_values('Score', ascending=False).reset_index(drop=True)

        parameters = {
            'workspace_id': workspace_id,
            'community_id': community_id,
            'member_id': member_id,
            'ideas_full_df': ideas_full_df,
            'results_df': results_df
        }
        filtered_result_df: pd.DataFrame = self.filter_self_ideas(**parameters)

        if campaign_ids:
            filtered_result_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df,
                                                             results_df=filtered_result_df,
                                                             campaign_ids=campaign_ids
                                                             )

        return filtered_result_df.head(k)

    def get_recommendation_with_popularity_recommender(self, workspace_id: int,
                                                       community_id: int,
                                                       member_id: int,
                                                       top_n: int,
                                                       ideas_full_df: pd.DataFrame,
                                                       campaign_ids=None,
                                                       filter_own_ideas: bool = True,
                                                       filter_viewed_ideas: bool = False):
        logger.info("Recommendation generated with Popularity Based Recommender")
        popularity_recommender = PopularityRecommender(workspace_id=workspace_id,
                                                       community_id=community_id
                                                       )
        popular_ideas_df = popularity_recommender.recommend_ideas(member_id=member_id,
                                                                  top_n=top_n,
                                                                  filter_own_ideas=filter_own_ideas,
                                                                  filter_viewed_ideas=filter_viewed_ideas)
        if campaign_ids:
            popular_ideas_df = self.filter_by_campaign_ids(ideas_full_df=ideas_full_df,
                                                           results_df=popular_ideas_df,
                                                           campaign_ids=campaign_ids
                                                           )
        return popular_ideas_df

    @staticmethod
    def get_recommendation_considering_linked_ideas(workspace_id: int, community_id: int,
                                                    base_recommendation_df: pd.DataFrame) -> pd.DataFrame:
        logger.info("Recommendation generated considering linked ideas")
        linked_ideas_recommender = LinkedIdeasRecommender(workspace_id=workspace_id, community_id=community_id,
                                                          base_recommendation_df=base_recommendation_df)
        linked_ideas_recommendation__df = linked_ideas_recommender.recommend()
        return linked_ideas_recommendation__df

    @staticmethod
    def filter_self_ideas(workspace_id: int, community_id: int, member_id: int, ideas_full_df: pd.DataFrame,
                          results_df: pd.DataFrame) -> pd.DataFrame:
        """ Filter the ideas id for a specific member id. """

        # Extract self idea id for the specific member
        self_ideas_id = list(ideas_full_df[(ideas_full_df.workspace_id == workspace_id) & (
                ideas_full_df['communityId'] == community_id) & (
                                                   ideas_full_df['memberId'] == member_id)]["contributionId"].values)

        # Remove self ideas from the result data frame.
        filtered_result_df = results_df[~results_df["Idea ID"].isin(self_ideas_id)].reset_index(drop=True)
        return filtered_result_df

    @staticmethod
    def filter_by_campaign_ids(ideas_full_df: pd.DataFrame, results_df: pd.DataFrame, campaign_ids: list):
        """
        Filter out the ideas which is not in the given campaign.
        """
        filtered_ideas_id = ideas_full_df[ideas_full_df.campaignId.isin(campaign_ids)].contributionId
        filtered_result_df = results_df[results_df["Idea ID"].isin(filtered_ideas_id)].reset_index(drop=True)
        return filtered_result_df