Untitled
unknown
python
3 years ago
2.9 kB
5
Indexable
def gen_by_emb(df): """ using embedding to generate training instance, embedding : dict[int] -> np.array((dim,)) """ global embeds, cand_dict, type_dict session = df[0] aids = df[1] types = df[2] unique_aids = list(dict.fromkeys(aids[::-1])) unique_aids = list(map(str, unique_aids)) candidates = unique_aids + cand_dict[session] candidates = candidates[:40] # candidates = cand_dict[session] # if len(candidates) < 40: # candidates = candidates + unique_aids[:40 - len(candidates)] truths = [] feas = [] for embed in embeds: try: dim = next(iter(embed.values())).shape[0] except: import IPython;IPython.embed(color='neutral');exit(1) truth = [] fea = [] cand_emb = np.array([ embed[int(ID)] if int(ID) in embed else np.zeros((dim)) for ID in candidates ]) flag = False last_type = None for aid, _type in zip(aids[::-1], types[::-1]): if _type != type_dict[args.type] and last_type==None: continue if last_type == None: last_type = aid else: flag = True if aid in embed: q_embed = embed[aid] score = np.sum(q_embed*cand_emb, axis=1) # dot products fea.append(score.reshape((cand_emb.shape[0], 1))) truth = [ 1. if r == last_type else 0. for r in candidates ] else: fea.append(np.full((cand_emb.shape[0], 1), 0.)) truth = [0]*cand_emb.shape[0] break if flag == False: return [], [], [] # horizonal combined all embed feature dots = np.hstack(fea) truths = np.hstack(truth) truths = np.expand_dims(truths, axis=1) fea = np.hstack( ( dots, np.sum(dots, axis=1).reshape(( cand_emb.shape[0], 1)), np.amax(dots, axis=1).reshape((cand_emb.shape[0], 1)), #cosines, #np.sum(cosines, axis=1).reshape((len(candidates), 1)), #np.amax(cosines, axis=1).reshape((len(candidates), 1)) ) ) ''' if fea.shape != (40,3): import IPython;IPython.embed(color='neutral');exit(1) ''' # first feas if not len(feas): feas = fea else: feas = np.hstack((feas, fea)) if args.mode != 'train': return feas, truths, candidates else: return feas, truths, []
Editor is loading...