Untitled
unknown
plain_text
2 years ago
522 B
6
Indexable
def preprocess(input): output_list = [] splited = input.split(' ') for word in splited: # capture date and time dot_mathc = re.findall('\d[.:/-]\d',word) if dot_mathc: output_list.append(word.strip()) else: # remove symbols temp = re.sub('\W+','',word).strip() # remove chars temp = re.sub('\D','',temp).strip() if len(temp) >= 3 : output_list.append(temp) return output_list
Editor is loading...