Untitled

 avatar
unknown
plain_text
18 days ago
596 B
4
Indexable
import pandas as pd  # Importing pandas for matrix representation

def create_frequency_matrix(text): text = text.replace(" ", "")  # Removing spaces freq_matrix = {}

# Creating frequency counts for 2-character tokens
for i in range(len(text) - 1):
    token = text[i:i+2]
    if token in freq_matrix:
        freq_matrix[token] += 1
    else:
        freq_matrix[token] = 1

# Convert dictionary to a pandas DataFrame
df = pd.DataFrame(list(freq_matrix.items()), columns=['Token', 'Frequency'])
return df

Example usage

text_corpus = "hello world" print(create_frequency_matrix(text_corpus))

Editor is loading...
Leave a Comment