Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
821 B
2
Indexable
embeddings_dict = {}
with io.open(config.data_path+config.global_vector_name, 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        token = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[token] = vector
        
def train_data_context(unique_train_data_word):
    #pdb.set_trace()
    unique_train_data ={}
    #unique_train_data_word_embeed = []
    for i in unique_train_data_word:
        try:
            #unique_train_data_matrix.append(embeddings_dict[i].tolist())
            unique_train_data.update({i:embeddings_dict[i].tolist()})
        except:
            continue
    Matching_data= pd.DataFrame(unique_train_data.items(), columns=['unique_train_data_word_embeed', 'unique_train_data_matrix'])
    return Matching_data