Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.8 kB
1
Indexable
Never
import pandas as pd
from sklearn.preprocessing import LabelEncoder


def feature_engineering():
    ## Reading data
    ticket_data=pd.read_csv("ticket_data.csv")
    
    ## Selecting only relevant columns
    data=ticket_data[['ticket_category','ticket_type', 'ticket_item', 'ticket_summary', 'ticket_severity','ticket_resolution_date',
                  'response_sla_violated', 'resolution_sla_violated','created_date','reopen_count','person_who_resolved','owner_user_id',
                 'role_name']]
    
    # Convert the 'creation_date' and 'resolution_date' columns to datetime

    data['created_date']=pd.to_datetime(data['created_date'])
    data['ticket_resolution_date']=pd.to_datetime(data['ticket_resolution_date'])
    data['ticket_resolution_time']=data.apply(lambda row:row['ticket_resolution_date']-row['created_date'],axis=1)
    data['ticket_resolution_time'] =data['ticket_resolution_time'].apply(lambda x: x.total_seconds() / 3600)

    ## removing three columns and getting a final dataframe for building

    data.drop(columns=['response_sla_violated','ticket_resolution_date','created_date'],axis=1,inplace=True)

    ## Performing Encoding for Categorical Columns-

    label_enc= LabelEncoder()
    data['ticket_category']=label_enc.fit_transform(data['ticket_category'])
    data['ticket_type']=label_enc.fit_transform(data['ticket_type'])
    data['ticket_item']=label_enc.fit_transform(data['ticket_item'])
    data['ticket_severity']=label_enc.fit_transform(data['ticket_severity'])
    data['ticket_summary']=label_enc.fit_transform(data['ticket_summary'])
    data['resolution_sla_violated'] = label_enc.fit_transform(data['resolution_sla_violated'])

    
    return data

# Call the function
processed_data = feature_engineering()
processed_data