Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
705 B
1
Indexable
There are 2 functions-

def data_pre_processing_context(data):
    ## Convert all text columns to lowercase
    text_columns=[col for col in data.columns if data[col].dtype=='O']
    data[text_columns]=data[text_columns].apply(lambda x : x.str.lower())
    
    data['ticket_desc'] = data['ticket_desc'].apply(preprocess_text)
    #print(data)
    return data

def noun_extraction(text):
    # function to test if something is a noun
    is_noun = lambda pos: pos[:2] == 'NN'
    # do the nlp stuff
    tokenized = nltk.word_tokenize(text)
    nouns = [word for (word, pos) in nltk.pos_tag(tokenized) if is_noun(pos)]

    return nouns


how to generate a new column data['Nouns'] using these 2 functions?