Untitled
unknown
plain_text
9 months ago
1.4 kB
4
Indexable
def sentence_word_frequency_matrix(filename):
with open(filename, 'r', encoding='utf-8') as file:
text = file.read().lower() # Convert to lowercase
# Split text into sentences based on periods, question marks, and exclamation marks
sentences = [s.strip() for s in text.replace("?", ".").replace("!", ".").split(".") if s.strip()]
word_freq_list = [] # List to store word frequency dictionaries
unique_words = set() # Set to store all unique words
# Process each sentence
for sentence in sentences:
words = [''.join(char for char in word if char.isalnum()) for word in sentence.split()]
word_freq = {}
for word in words:
if word: # Ignore empty words
word_freq[word] = word_freq.get(word, 0) + 1
unique_words.add(word)
word_freq_list.append(word_freq)
# Convert to matrix format
unique_words = sorted(unique_words) # Sort words for consistent column order
print("\nWord Frequency Matrix:")
print("Sentence".ljust(12), " ".join(word.ljust(8) for word in unique_words))
for i, word_freq in enumerate(word_freq_list):
row = [str(word_freq.get(word, 0)).ljust(8) for word in unique_words]
print(f"Sentence {i+1}".ljust(12), " ".join(row))
# Example usage
sentence_word_frequency_matrix("sample.txt") # Replace with your file nameEditor is loading...
Leave a Comment