Untitled

 avatar
unknown
plain_text
13 days ago
1.4 kB
1
Indexable
def sentence_word_frequency_matrix(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read().lower()  # Convert to lowercase

    # Split text into sentences based on periods, question marks, and exclamation marks
    sentences = [s.strip() for s in text.replace("?", ".").replace("!", ".").split(".") if s.strip()]

    word_freq_list = []  # List to store word frequency dictionaries
    unique_words = set()  # Set to store all unique words

    # Process each sentence
    for sentence in sentences:
        words = [''.join(char for char in word if char.isalnum()) for word in sentence.split()]
        word_freq = {}

        for word in words:
            if word:  # Ignore empty words
                word_freq[word] = word_freq.get(word, 0) + 1
                unique_words.add(word)

        word_freq_list.append(word_freq)

    # Convert to matrix format
    unique_words = sorted(unique_words)  # Sort words for consistent column order
    print("\nWord Frequency Matrix:")
    print("Sentence".ljust(12), " ".join(word.ljust(8) for word in unique_words))

    for i, word_freq in enumerate(word_freq_list):
        row = [str(word_freq.get(word, 0)).ljust(8) for word in unique_words]
        print(f"Sentence {i+1}".ljust(12), " ".join(row))

# Example usage
sentence_word_frequency_matrix("sample.txt")  # Replace with your file name
Leave a Comment