Untitled
unknown
python
a year ago
7.3 kB
15
Indexable
import re import matplotlib.pyplot as plt import pandas as pd import os import shutil import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.base import MIMEBase from email import encoders def find_all_words_in_file(filename): ''' Input: filename (.txt) Output: list of words Objective: Finding all the words in a given file ''' with open(filename,'r', encoding='utf-8') as fp: text = fp.read() words = re.findall(r'\b[A-Za-z]+\b', text) return words # ------------------------ def find_all_unique_words(words): ''' Input: words (list) output: List of distinct words Objective: Removing duplicates in list of words ''' word_counts = {} for word in words: word_counts[word] = word_counts.get(word,0) + 1 unique_word_count = len(word_counts) # retrieving keys count # len(set(words)) # returns same return list(word_counts.keys()),unique_word_count # ------------------------------- def get_files_total_words_count_and_unique_words_count(words, novels): files_words_length = [] files_unique_words_counts = [] for novel in range(len(novels)): words1 = find_all_words_in_file(novels[novel]) files_words_length.append(len(words1)) word_counts1, _ = find_all_unique_words(words1) files_unique_words_counts.append(len(word_counts1)) # retrieving keys count return files_words_length, files_unique_words_counts # -------------------------------- def common_words_in_files(novels): common_words = {} for novel in range(len(novels)): words = find_all_words_in_file(novels[novel]) unique_words,unique_word_count = find_all_unique_words(words) if common_words == {} or common_words is None: common_words = set(unique_words) else: common_words = common_words.intersection(set(unique_words)) return common_words # -------------------------------- def get_special_words_from_u_file(novels_names): words_onlyin_u_file = [] u_file_words = find_all_words_in_file('./u.txt') unique_u_file_words, _ = find_all_unique_words(u_file_words) novels = [novel for novel in novels_names if novel != 'u.txt'] for novel in range(len(novels)): words1 = find_all_words_in_file(novels[novel]) unique_words, _ = find_all_unique_words(words1) words_onlyin_u_file = set(unique_u_file_words).difference(set(unique_words)) # print(words_onlyin_u_file) return words_onlyin_u_file ## -------------------Main Program Starts from here---------------- ## -------------------*****************************---------------- ## -------------------*********QUESTION-1**********---------------- ## ------------Find total no of words in the novel u.txt----------- words = find_all_words_in_file('./u.txt') print(len(words)) ## -------------------*****************************---------------- ## -------------------*********QUESTION-2**********---------------- ## ------Finding the number of unique words in the u text file----- unique_words, unique_word_count = find_all_unique_words(words) print(unique_word_count) ## -------------------*****************************---------------- ## -------------------*********QUESTION-3**********---------------- ## Look at the other novels and find the total words and unique words for comparison. novels = ['l.txt','k.txt','b.txt','d.txt','w.txt','u.txt','m.txt'] files_words_length, files_unique_words_counts = get_files_total_words_count_and_unique_words_count(words, novels) data = { 'Total Words Count': files_words_length, 'Total Unique Words Count': files_unique_words_counts } df = pd.DataFrame(data) # Plot the data ax = df.plot.bar() ax.set_xticklabels(novels, rotation=0) # plt.bar(files_words_length, files_unique_words_counts) plt.xlabel("File Names") plt.ylabel("Counts") plt.title("Word Counts and Unique word Counts") plt.show() ## --------*****************************---------------- ## --------*********QUESTION-4**********---------------- ## ------- Common Words - Find the common words which occur in every booki.e. every text file (novel) available to you. ## --------Store those words in the file called commonwords.txt in a folder called Python_Evaluation_Result (on your local system). common_words = common_words_in_files(novels) # Create Directory if not exists if not os.path.exists('Python_Evaluation_Result'): os.mkdir('Python_Evaluation_Result') # Create commonwords.txt file with open("Python_Evaluation_Result/commonwords.txt", 'w') as fp: fp.write('\n'.join(common_words)) ## --------*****************************---------------- ## --------*********QUESTION-5**********---------------- ## ------- Find the special words used in u.txt novel by comparing with other novels. ## ------- Special words are the words which are only used in u.txt novel. ## ------- Store all the special words in a file called specialwords.txt in the same folder i.e. Python_Evaluation_Result special_words = get_special_words_from_u_file(novels) # Create specialwords.txt file with open('Python_Evaluation_Result/specialwords.txt', 'w') as fp2: fp2.write('\n'.join(special_words)) ## --------*****************************---------------- ## --------*********QUESTION-6**********---------------- ## ------- Find the special words used in u.txt novel by comparing with other novels. ## ------- Special words are the words which are only used in u.txt novel. ## ------- Store all the special words in a file called specialwords.txt in the same folder i.e. Python_Evaluation_Result if not os.path.exists('Send_Email'): os.mkdir('Send_Email') files = os.listdir('Python_Evaluation_Result') for file in files: copying_dir = os.path.join('Python_Evaluation_Result', file) copied_dir = os.path.join('Send_Email', file) shutil.copy(copying_dir, copied_dir) #### --------*****************************---------------- ## --------*********QUESTION-7**********---------------- ## -------Write a Python script to email the above created two files present in the Send_Email folder. Send the email to coe@celebaltech.com keeping ## -------chayan.sharma@celebaltech.com in CC. sender_email = "shankar@gmail.com" receiver_email = "coe@celebaltech.com" cc = ["chayan.sharma@celebaltech.com"] password = "bnrx rkta wjvc nrts" msg = MIMEMultipart() msg["from"] = sender_email msg["to"] = receiver_email msg["Cc"] = ', '.join(cc) msg["Subject"] = "COE Python Evaluation Files" files = ["commonwords.txt", "specialwords.txt"] for file_name in files: attachment = open("Send_Email/"+ file_name, 'rb') base = MIMEBase("application", 'octet-stream') base.set_payload(attachment.read()) encoders.encode_base64(base) base.add_header("Content-Disposition", f"attachment; filename = {file_name}") msg.attach(base) server = smtplib.SMTP("smtp.gmail.com",587) server.starttls() server.login(sender_email, password) text = msg.as_string() server.sendmail(sender_email, [receiver_email] + cc, text) server.quit() print("Email sent")
Editor is loading...
Leave a Comment