Untitled

 avatar
unknown
plain_text
2 years ago
769 B
6
Indexable
import re
import tokenize

def tokenize_paragraph(input_text):
    # Define the regular expression patterns for email, phone number, and currency
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    phone_pattern = r'\+\d{9,15}'
    currency_pattern = r'₹\d+'

    # Combine all patterns into one regular expression
    combined_pattern = r'|'.join([email_pattern, phone_pattern, currency_pattern])

    # Tokenize the input paragraph using the regular expression
    tokens = re.findall(combined_pattern + r'|\w+|[^\w\s]', input_text)

    return tokens

inputText = "My email address is rhushabh11@hotmail.com , my phone number is +919920373816 and my bank balance is ₹29000"
outputText = tokenize_paragraph(inputText)
print(outputText)
Editor is loading...