Untitled

Interview: Dhruv
mail@pastecode.io avatar
unknown
python
a year ago
1.8 kB
1
Indexable
Never
from collections import deque
from collections import defaultdict 

logs = [  
{ 'user': '1', 'page': 'A'},  
{ 'user': '2', 'page': 'B'},  
{ 'user': '1', 'page': 'B'},  
{ 'user': '1', 'page': 'D'},  
{ 'user': '2', 'page': 'A'},  
{ 'user': '3', 'page': 'B'},  
{ 'user': '3', 'page': 'D'},  
{ 'user': '1', 'page': 'C'},  
{ 'user': '3', 'page': 'C'},  
{ 'user': '1', 'page': 'C'},  
{ 'user': '2', 'page': 'C'},  
{ 'user': '3', 'page': 'B'},  
{ 'user': '1', 'page': 'A'},  
{ 'user': '3', 'page': 'C'}, 
{ 'user': '4', 'page': 'B'},  
{ 'user': '4', 'page': 'A'},  
{ 'user': '4', 'page': 'C'}, 
]

# We need to find the sequence of pages of length 3, that is the most popular.  

# The most popular is BDC, since user 1 and user 3 visit that sequence
# n = len(logs)
# (nlogn)
def find_popular_grp(logs):
    
    if len(logs) == 0:
        return None
    user = defaultdict(deque)
    count = defaultdict(int)
    # most_popular_grp = None
    highest_count = 0
    
    for log in logs:
        user_id = int(log['user'])
        page_id = log['page']
        user[user_id].append(page_id)
        if len(user[user_id]) > 3 :
            user[user_id].popleft()
        if len(user[user_id]) < 3:
            continue
        page_grp = "".join(user[user_id])
        count[page_grp] += 1
        if count[page_grp] > highest_count:
            highest_count = count[page_grp]
    

    count_lst = list(count.items()) 
    count_lst.sort(key = lambda pair: pair[1], reverse=True) 

    res = [count_lst[0][0]]
    for i in range(1, len(count_lst)):
        if count_lst[i][1] < count_lst[i-1][1]:
            break
        else:
            res.append(count_lst[i][0])
    if len(res) == len(count_lst):
        return None
    else:
        return res

print(find_popular_grp(logs))