Untitled
unknown
python
3 years ago
2.9 kB
6
Indexable
def gen_by_emb(df):
"""
using embedding to generate training instance,
embedding : dict[int] -> np.array((dim,))
"""
global embeds, cand_dict, type_dict
session = df[0]
aids = df[1]
types = df[2]
unique_aids = list(dict.fromkeys(aids[::-1]))
unique_aids = list(map(str, unique_aids))
candidates = unique_aids + cand_dict[session]
candidates = candidates[:40]
# candidates = cand_dict[session]
# if len(candidates) < 40:
# candidates = candidates + unique_aids[:40 - len(candidates)]
truths = []
feas = []
for embed in embeds:
try:
dim = next(iter(embed.values())).shape[0]
except:
import IPython;IPython.embed(color='neutral');exit(1)
truth = []
fea = []
cand_emb = np.array([
embed[int(ID)] if int(ID) in embed
else np.zeros((dim))
for ID in candidates
])
flag = False
last_type = None
for aid, _type in zip(aids[::-1], types[::-1]):
if _type != type_dict[args.type] and last_type==None:
continue
if last_type == None:
last_type = aid
else:
flag = True
if aid in embed:
q_embed = embed[aid]
score = np.sum(q_embed*cand_emb, axis=1)
# dot products
fea.append(score.reshape((cand_emb.shape[0], 1)))
truth = [
1. if r == last_type
else 0.
for r in candidates
]
else:
fea.append(np.full((cand_emb.shape[0], 1), 0.))
truth = [0]*cand_emb.shape[0]
break
if flag == False:
return [], [], []
# horizonal combined all embed feature
dots = np.hstack(fea)
truths = np.hstack(truth)
truths = np.expand_dims(truths, axis=1)
fea = np.hstack(
(
dots,
np.sum(dots, axis=1).reshape(( cand_emb.shape[0], 1)),
np.amax(dots, axis=1).reshape((cand_emb.shape[0], 1)),
#cosines,
#np.sum(cosines, axis=1).reshape((len(candidates), 1)),
#np.amax(cosines, axis=1).reshape((len(candidates), 1))
)
)
'''
if fea.shape != (40,3):
import IPython;IPython.embed(color='neutral');exit(1)
'''
# first feas
if not len(feas):
feas = fea
else:
feas = np.hstack((feas, fea))
if args.mode != 'train':
return feas, truths, candidates
else:
return feas, truths, []
Editor is loading...