This is the reference code which I am using -
def event_prediction_tfidf(input_event,input_event_type):
#pdb.set_trace()
global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
## Input Data Preprocessing
input_processed_text = input_data_preprocessing(input_event)
print(input_processed_text)
#pdb.set_trace()
##TFIDF Prediction
tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act)
##TF_count Prediction
tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act)
#pdb.set_trace()
tfidf_pred['score_new'] = tfidf_pred['score']*0.5
tf_count_pred['score_new'] = tf_count_pred['score']*0.5
tfidf_pred['flag'] = 'tfidf'
tf_count_pred['flag'] = 'tf_count'
overall_result = pd.concat([tfidf_pred,tf_count_pred])
if len(overall_result)>0:
overall_result = overall_result.sort_values(by='score_new',ascending=False)
overall_result.drop_duplicates(subset = 'event_id',inplace=True)
#overall_result =overall_result[overall_result['event_type']==input_event_type]
overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text,row['clean_text_event_title']),axis=1)
overall_result = overall_result[(overall_result['fuzz_valid_score']>config.fuzzy_threshold) |(overall_result['score_new']>=config.tf_threshold)]
overall_result = overall_result.head(config.max_reccom)
overall_result_1 = overall_result[overall_result['event_type']==input_event_type]
overall_result_2 = overall_result[overall_result['event_type']!=input_event_type]
if len(overall_result_1) < 10:
overall_result = overall_result_1.append(overall_result_2, ignore_index=True)
else:
overall_result = overall_result_1
event_id_list = overall_result['event_id'].tolist()
event_title_list = overall_result[config.target_column].tolist()
return event_id_list,event_title_list
else:
event_id_list = []
event_title_list = []
return event_id_list,event_title_list
The difference here in the code is here they have only one column as input data and one target column, but in my case I have few columns as input columns like-
'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary',
'ticket_desc', 'ticket_severity', 'resolution_sla_violated',
'role_name',
ANd this is the input data preprocessing function which takes text as input-
def input_data_preprocessing(text):
#pdb.set_trace()
text = text.upper() # uuper case
#text = text.translate(str.maketrans('', '', string.punctuation)) #remove punctuation
text = ' '.join([word for word in text.split() if word not in stopwords]) # stopword removal
text = ''.join(i for i in text if not i.isdigit()) # digit removal
text = text.replace('.', '')
return text
We want to concatenate these columns 'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary',
'ticket_desc', 'ticket_severity', 'resolution_sla_violated',
'role_name' and pass as an input.
and thereby do preprocessing.
Is it possible please . Can you show changes in code.