Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
3.4 kB
3
Indexable
This is the reference code which I am using -

def event_prediction_tfidf(input_event,input_event_type):
    #pdb.set_trace()
    
    global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
    
    ## Input Data Preprocessing
    input_processed_text = input_data_preprocessing(input_event) 
    
    print(input_processed_text)
    #pdb.set_trace()
      
    ##TFIDF Prediction
    tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act)
    
    ##TF_count Prediction
    tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act)
    #pdb.set_trace()
    
    
    tfidf_pred['score_new'] = tfidf_pred['score']*0.5
    tf_count_pred['score_new'] = tf_count_pred['score']*0.5
    
    tfidf_pred['flag'] = 'tfidf'
    tf_count_pred['flag'] = 'tf_count'
    
    overall_result = pd.concat([tfidf_pred,tf_count_pred])
    if len(overall_result)>0:
    
        overall_result = overall_result.sort_values(by='score_new',ascending=False)
        overall_result.drop_duplicates(subset = 'event_id',inplace=True)
	#overall_result =overall_result[overall_result['event_type']==input_event_type]
    
    
        overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text,row['clean_text_event_title']),axis=1)
        overall_result = overall_result[(overall_result['fuzz_valid_score']>config.fuzzy_threshold) |(overall_result['score_new']>=config.tf_threshold)]
        overall_result = overall_result.head(config.max_reccom)

        overall_result_1 = overall_result[overall_result['event_type']==input_event_type]

        overall_result_2 = overall_result[overall_result['event_type']!=input_event_type]

        if len(overall_result_1) < 10:
            overall_result = overall_result_1.append(overall_result_2, ignore_index=True)
        else:
            overall_result = overall_result_1
    
        event_id_list = overall_result['event_id'].tolist()
        event_title_list = overall_result[config.target_column].tolist()
    
        return event_id_list,event_title_list
   
    else:
    
        event_id_list = []
        event_title_list = []
        return event_id_list,event_title_list


The difference here in the code is here they have only one column as input data and one target column, but in my case I have few columns as input columns like-

'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary',
       'ticket_desc', 'ticket_severity', 'resolution_sla_violated',
       'role_name',

ANd this is the input data preprocessing function which takes text as input-

def input_data_preprocessing(text):
    #pdb.set_trace()
    text = text.upper() # uuper case
    #text = text.translate(str.maketrans('', '', string.punctuation)) #remove punctuation
    text = ' '.join([word for word in text.split() if word not in stopwords]) # stopword removal
    text = ''.join(i for i in text if not i.isdigit()) # digit removal
    text = text.replace('.', '')
    return text

We want to concatenate these columns 'ticket_category', 'ticket_type', 'ticket_item', 'ticket_summary',
       'ticket_desc', 'ticket_severity', 'resolution_sla_violated',
       'role_name' and pass as an input.

and thereby do preprocessing.

Is it possible please . Can you show changes in code.