Untitled
unknown
plain_text
3 years ago
522 B
9
Indexable
def preprocess(input):
output_list = []
splited = input.split(' ')
for word in splited:
# capture date and time
dot_mathc = re.findall('\d[.:/-]\d',word)
if dot_mathc:
output_list.append(word.strip())
else:
# remove symbols
temp = re.sub('\W+','',word).strip()
# remove chars
temp = re.sub('\D','',temp).strip()
if len(temp) >= 3 :
output_list.append(temp)
return output_listEditor is loading...