Untitled
plain_text
a month ago
1.7 kB
1
Indexable
Never
def data_pre_processing(data): # Convert the 'creation_date' and 'resolution_date' columns to datetime data['created_date']=pd.to_datetime(data['created_date']) data['ticket_resolution_date']=pd.to_datetime(data['ticket_resolution_date']) data['ticket_resolution_time']=data.apply(lambda row:row['ticket_resolution_date']-row['created_date'],axis=1) data['ticket_resolution_time'] =data['ticket_resolution_time'].apply(lambda x: x.total_seconds() / 3600) data.drop(columns=['ticket_resolution_date','created_date'],axis=1 ,inplace=True) ## Convert all text columns to lowercase text_columns=[col for col in data.columns if data[col].dtype=='O'] data[text_columns]=data[text_columns].apply(lambda x : x.str.lower()) data['ticket_desc'] = data['ticket_desc'].apply(preprocess_text) #print(data) return data def preprocess_text(text): if not isinstance(text, str): return text # Apply various text cleaning functions using neattext text = nfx.remove_userhandles(text) text = nfx.remove_puncts(text) text = nfx.remove_punctuations(text) text = nfx.remove_numbers(text) text = nfx.remove_special_characters(text) # Remove stopwords using a custom function text = remove_stopwords(text) return text def remove_stopwords(text): tokens = text.split() filtered_tokens = [word for word in tokens if word.lower() not in stopwords] return ' '.join(filtered_tokens) def input_data_preprocess(text): ## We will same above function for preprocessing the input data as well return text