Untitled
unknown
plain_text
2 years ago
1.7 kB
13
Indexable
def data_pre_processing(data):
# Convert the 'creation_date' and 'resolution_date' columns to datetime
data['created_date']=pd.to_datetime(data['created_date'])
data['ticket_resolution_date']=pd.to_datetime(data['ticket_resolution_date'])
data['ticket_resolution_time']=data.apply(lambda row:row['ticket_resolution_date']-row['created_date'],axis=1)
data['ticket_resolution_time'] =data['ticket_resolution_time'].apply(lambda x: x.total_seconds() / 3600)
data.drop(columns=['ticket_resolution_date','created_date'],axis=1 ,inplace=True)
## Convert all text columns to lowercase
text_columns=[col for col in data.columns if data[col].dtype=='O']
data[text_columns]=data[text_columns].apply(lambda x : x.str.lower())
data['ticket_desc'] = data['ticket_desc'].apply(preprocess_text)
#print(data)
return data
def preprocess_text(text):
if not isinstance(text, str):
return text
# Apply various text cleaning functions using neattext
text = nfx.remove_userhandles(text)
text = nfx.remove_puncts(text)
text = nfx.remove_punctuations(text)
text = nfx.remove_numbers(text)
text = nfx.remove_special_characters(text)
# Remove stopwords using a custom function
text = remove_stopwords(text)
return text
def remove_stopwords(text):
tokens = text.split()
filtered_tokens = [word for word in tokens if word.lower() not in stopwords]
return ' '.join(filtered_tokens)
def input_data_preprocess(text):
## We will same above function for preprocessing the input data as well
return text
Editor is loading...