Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
1.7 kB
1
Indexable
Never
class Node:
    def __init__(self, symbol, frequency):
        self.symbol = symbol
        self.frequency = frequency
        self.left = None
        self.right = None


def compare(arg):
    return arg.frequency
def build_tree(symbols):
    if len(symbols) == 1:
        return symbols[0]

    symbols.sort(key=compare, reverse=True)
    total_frequency = sum([symbol.frequency for symbol in symbols])
    current_frequency = 0
    split_index = 0

    for i, symbol in enumerate(symbols):
        current_frequency += symbol.frequency
        if current_frequency > total_frequency / 2:
            split_index = i
            break

    node = Node(None, total_frequency)
    node.left = build_tree(symbols[:split_index])
    node.right = build_tree(symbols[split_index:])
    return node

def generate_codes(node, code, codes):
    if node.symbol is not None:
        codes[node.symbol] = code
    else:
        generate_codes(node.left, code + "0", codes)
        generate_codes(node.right, code + "1", codes)

def shannon_fano_encoding(text):
    symbol_frequencies = {}
    for symbol in text:
        if symbol in symbol_frequencies:
            symbol_frequencies[symbol] += 1
        else:
            symbol_frequencies[symbol] = 1

    symbols = [Node(symbol, frequency) for symbol, frequency in symbol_frequencies.items()]
    tree = build_tree(symbols)

    codes = {}
    generate_codes(tree, "", codes)

    encoded_text = "".join([codes[symbol] for symbol in text])
    return encoded_text, codes
Leave a Comment