Untitled
unknown
plain_text
a year ago
3.4 kB
2
Indexable
Never
This is the reference code which I am using - def event_prediction_tfidf(input_event,input_event_type): #pdb.set_trace() global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act ## Input Data Preprocessing input_processed_text = input_data_preprocessing(input_event) print(input_processed_text) #pdb.set_trace() ##TFIDF Prediction tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act) ##TF_count Prediction tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act) #pdb.set_trace() tfidf_pred['score_new'] = tfidf_pred['score']*0.5 tf_count_pred['score_new'] = tf_count_pred['score']*0.5 tfidf_pred['flag'] = 'tfidf' tf_count_pred['flag'] = 'tf_count' overall_result = pd.concat([tfidf_pred,tf_count_pred]) if len(overall_result)>0: overall_result = overall_result.sort_values(by='score_new',ascending=False) overall_result.drop_duplicates(subset = 'event_id',inplace=True) #overall_result =overall_result[overall_result['event_type']==input_event_type] overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text,row['clean_text_event_title']),axis=1) overall_result = overall_result[(overall_result['fuzz_valid_score']>config.fuzzy_threshold) |(overall_result['score_new']>=config.tf_threshold)] overall_result = overall_result.head(config.max_reccom) overall_result_1 = overall_result[overall_result['event_type']==input_event_type] overall_result_2 = overall_result[overall_result['event_type']!=input_event_type] if len(overall_result_1) < 10: overall_result = overall_result_1.append(overall_result_2, ignore_index=True) else: overall_result = overall_result_1 event_id_list = overall_result['event_id'].tolist() event_title_list = overall_result[config.target_column].tolist() return event_id_list,event_title_list else: event_id_list = [] event_title_list = [] return event_id_list,event_title_list The difference here in the code is here they have only one column as input data and one target column, but in my case I have few columns as input columns like- 'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary', 'ticket_desc', 'ticket_severity', 'resolution_sla_violated', 'role_name', ANd this is the input data preprocessing function which takes text as input- def input_data_preprocessing(text): #pdb.set_trace() text = text.upper() # uuper case #text = text.translate(str.maketrans('', '', string.punctuation)) #remove punctuation text = ' '.join([word for word in text.split() if word not in stopwords]) # stopword removal text = ''.join(i for i in text if not i.isdigit()) # digit removal text = text.replace('.', '') return text We want to concatenate these columns 'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary', 'ticket_desc', 'ticket_severity', 'resolution_sla_violated', 'role_name' and pass as an input. and thereby do preprocessing. Is it possible please . Can you show changes in code.