Untitled
unknown
plain_text
8 months ago
596 B
8
Indexable
import pandas as pd # Importing pandas for matrix representation
def create_frequency_matrix(text): text = text.replace(" ", "") # Removing spaces freq_matrix = {}
# Creating frequency counts for 2-character tokens
for i in range(len(text) - 1):
token = text[i:i+2]
if token in freq_matrix:
freq_matrix[token] += 1
else:
freq_matrix[token] = 1
# Convert dictionary to a pandas DataFrame
df = pd.DataFrame(list(freq_matrix.items()), columns=['Token', 'Frequency'])
return df
Example usage
text_corpus = "hello world" print(create_frequency_matrix(text_corpus))
Editor is loading...
Leave a Comment