Untitled

mail@pastecode.io avatar
unknown
python
a month ago
7.3 kB
13
Indexable
Never
import re
import matplotlib.pyplot as plt
import pandas as pd
import os
import shutil
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders

def find_all_words_in_file(filename):
    '''
        Input: filename (.txt)
        Output: list of words
        Objective: Finding all the words in a given file
    '''
    with open(filename,'r', encoding='utf-8') as fp:
        text = fp.read()
    words = re.findall(r'\b[A-Za-z]+\b', text)
    return words

# ------------------------

def find_all_unique_words(words):
    '''
        Input: words (list)
        output: List of distinct words
        Objective: Removing duplicates in list of words
    '''
    word_counts = {}
    for word in words:
        word_counts[word] = word_counts.get(word,0) + 1
    unique_word_count = len(word_counts) # retrieving keys count
    # len(set(words)) # returns same
    return list(word_counts.keys()),unique_word_count

# -------------------------------

def get_files_total_words_count_and_unique_words_count(words, novels):
    files_words_length = []
    files_unique_words_counts = []
    for novel in range(len(novels)):
        words1 = find_all_words_in_file(novels[novel])
        files_words_length.append(len(words1))
        word_counts1, _ = find_all_unique_words(words1)
        files_unique_words_counts.append(len(word_counts1)) # retrieving keys count
    
    return files_words_length, files_unique_words_counts

# --------------------------------

def common_words_in_files(novels):
    common_words = {}
    for novel in range(len(novels)):
        words = find_all_words_in_file(novels[novel])
        unique_words,unique_word_count = find_all_unique_words(words)
        if common_words == {} or common_words is None:
            common_words = set(unique_words)
        else:
            common_words = common_words.intersection(set(unique_words))
    return common_words

# --------------------------------

def get_special_words_from_u_file(novels_names):
    words_onlyin_u_file = []

    u_file_words = find_all_words_in_file('./u.txt')

    unique_u_file_words, _ = find_all_unique_words(u_file_words)
    novels = [novel for novel in novels_names if novel != 'u.txt']
    for novel in range(len(novels)):
        words1 = find_all_words_in_file(novels[novel])
        
        unique_words, _ = find_all_unique_words(words1)
        
        words_onlyin_u_file = set(unique_u_file_words).difference(set(unique_words))
    # print(words_onlyin_u_file)
    return words_onlyin_u_file
## -------------------Main Program Starts from here----------------
## -------------------*****************************----------------
## -------------------*********QUESTION-1**********----------------
## ------------Find total no of words in the novel u.txt-----------

words = find_all_words_in_file('./u.txt')
print(len(words))

## -------------------*****************************----------------
## -------------------*********QUESTION-2**********----------------
## ------Finding the number of unique words in the u text file-----

unique_words, unique_word_count = find_all_unique_words(words)
print(unique_word_count)

## -------------------*****************************----------------
## -------------------*********QUESTION-3**********----------------
## Look at the other novels and find the total words and unique words for comparison.

novels = ['l.txt','k.txt','b.txt','d.txt','w.txt','u.txt','m.txt']
files_words_length, files_unique_words_counts = get_files_total_words_count_and_unique_words_count(words, novels)
data = {
    'Total Words Count': files_words_length,
    'Total Unique Words Count': files_unique_words_counts
}
df = pd.DataFrame(data)
# Plot the data
ax = df.plot.bar()
ax.set_xticklabels(novels, rotation=0)
# plt.bar(files_words_length, files_unique_words_counts)
plt.xlabel("File Names")
plt.ylabel("Counts")
plt.title("Word Counts and Unique word Counts")
plt.show()


## --------*****************************----------------
## --------*********QUESTION-4**********----------------
## ------- Common Words - Find the common words which occur in every booki.e. every text file (novel) available to you.
## --------Store those words in the file called commonwords.txt in a folder called Python_Evaluation_Result (on your local system).

common_words = common_words_in_files(novels)
# Create Directory if not exists
if not os.path.exists('Python_Evaluation_Result'):
    os.mkdir('Python_Evaluation_Result')
# Create commonwords.txt file
with open("Python_Evaluation_Result/commonwords.txt", 'w') as fp:
    fp.write('\n'.join(common_words))


## --------*****************************----------------
## --------*********QUESTION-5**********----------------
## ------- Find the special words used in u.txt novel by comparing with other novels. 
## ------- Special words are the words which are only used in u.txt novel. 
## ------- Store all the special words in a file called specialwords.txt in the same folder i.e. Python_Evaluation_Result

special_words = get_special_words_from_u_file(novels)
# Create specialwords.txt file
with open('Python_Evaluation_Result/specialwords.txt', 'w') as fp2:
    fp2.write('\n'.join(special_words))


## --------*****************************----------------
## --------*********QUESTION-6**********----------------
## ------- Find the special words used in u.txt novel by comparing with other novels. 
## ------- Special words are the words which are only used in u.txt novel. 
## ------- Store all the special words in a file called specialwords.txt in the same folder i.e. Python_Evaluation_Result
if not os.path.exists('Send_Email'):
    os.mkdir('Send_Email')
files = os.listdir('Python_Evaluation_Result')
for file in files:
    copying_dir = os.path.join('Python_Evaluation_Result', file)
    copied_dir = os.path.join('Send_Email', file)
    shutil.copy(copying_dir, copied_dir)


#### --------*****************************----------------
## --------*********QUESTION-7**********----------------
## -------Write a Python script to email the above created two files present in the Send_Email folder. Send the email to coe@celebaltech.com keeping 
## -------chayan.sharma@celebaltech.com in CC.

sender_email = "shankar@gmail.com"
receiver_email = "coe@celebaltech.com"
cc = ["chayan.sharma@celebaltech.com"]
password = "bnrx rkta wjvc nrts"
msg = MIMEMultipart()
msg["from"] = sender_email
msg["to"] = receiver_email
msg["Cc"] = ', '.join(cc)
msg["Subject"] = "COE Python Evaluation Files"
files = ["commonwords.txt", "specialwords.txt"]

for file_name in files:
    attachment = open("Send_Email/"+ file_name, 'rb')
    base = MIMEBase("application", 'octet-stream')
    base.set_payload(attachment.read())
    encoders.encode_base64(base)
    base.add_header("Content-Disposition", f"attachment; filename = {file_name}")
    msg.attach(base)

server = smtplib.SMTP("smtp.gmail.com",587)
server.starttls()
server.login(sender_email, password)
text = msg.as_string()
server.sendmail(sender_email, [receiver_email] + cc, text)
server.quit()
print("Email sent")
Leave a Comment