Untitled
def sentence_word_frequency_matrix(filename): with open(filename, 'r', encoding='utf-8') as file: text = file.read().lower() # Convert to lowercase # Split text into sentences based on periods, question marks, and exclamation marks sentences = [s.strip() for s in text.replace("?", ".").replace("!", ".").split(".") if s.strip()] word_freq_list = [] # List to store word frequency dictionaries unique_words = set() # Set to store all unique words # Process each sentence for sentence in sentences: words = [''.join(char for char in word if char.isalnum()) for word in sentence.split()] word_freq = {} for word in words: if word: # Ignore empty words word_freq[word] = word_freq.get(word, 0) + 1 unique_words.add(word) word_freq_list.append(word_freq) # Convert to matrix format unique_words = sorted(unique_words) # Sort words for consistent column order print("\nWord Frequency Matrix:") print("Sentence".ljust(12), " ".join(word.ljust(8) for word in unique_words)) for i, word_freq in enumerate(word_freq_list): row = [str(word_freq.get(word, 0)).ljust(8) for word in unique_words] print(f"Sentence {i+1}".ljust(12), " ".join(row)) # Example usage sentence_word_frequency_matrix("sample.txt") # Replace with your file name
Leave a Comment