# Untitled

unknown
python
a month ago
6.2 kB
31
Indexable
Never
```import heapq

# [1] Define functions that implement each of the four data compression algorithms:
#
# dc_run_length_encoding()
# dc_huffman_coding()
# dc_lempel_ziv_welch()
# dc_arithmetic_encoding()
def dc_run_length_encoding(bits):
print("DC Run Length Encoding")
print(f"Bits: {bits}")
n = len(bits)
count = 0
res = ""
for i in range(n):
if bits[i] == "0":
count += 1
else:
res += f"{count} "
count = 0
if count > 0:
res += f"{count}"
print("Result: ", res)
return res

class node:
def __init__(self, freq, symbol, left=None, right=None):
# frequency of symbol
self.freq = freq

# symbol name (character)
self.symbol = symbol

# node left of current node
self.left = left

# node right of current node
self.right = right

# tree direction (0/1)
self.huff = ''

def __lt__(self, nxt):
return self.freq < nxt.freq

# utility function to print huffman

# codes for all symbols in the newly
# created Huffman tree
def printNodes(dict_huffman, node, val='',):
# huffman code for current node
newVal = val + str(node.huff)

# if node is not an edge node
# then traverse inside it
if (node.left):
printNodes(dict_huffman, node.left, newVal)
if (node.right):
printNodes(dict_huffman, node.right, newVal)

# if node is edge node then
# display its huffman code
if (not node.left and not node.right):
dict_huffman[node.symbol] = newVal
print(f"{node.symbol} -> {newVal}")

# characters for huffman tree

def dc_huffman_coding(message, alphabet, frequency):

# list containing unused nodes
nodes = []

# converting characters and frequencies
# into huffman tree nodes
for i in range(len(alphabet)):
heapq.heappush(nodes, node(frequency[i], alphabet[i]))

while len(nodes) > 1:
# sort all the nodes in ascending order
# based on their frequency
left = heapq.heappop(nodes)
right = heapq.heappop(nodes)

# assign directional value to these nodes
left.huff = 0
right.huff = 1

# combine the 2 smallest nodes to create
# new node as their parent
newNode = node(left.freq + right.freq, left.symbol + right.symbol, left, right)

heapq.heappush(nodes, newNode)

# Huffman Tree is ready!
huffman_dict = {}
printNodes(huffman_dict, nodes[0])

print(f"Huffman Dictionary: {huffman_dict}")

encoded_message ="".join([huffman_dict[letter] for letter in message])

print(f"Encoded Message: {encoded_message}")

len_uncompressed = len(message)
len_compressed = len(encoded_message)
print(f"Uncompressed Length: {len_uncompressed}")
print(f"Compressed Length: {len_compressed}")
compression_rate = round(len_compressed / len_uncompressed, 2)
print(f"Compression Rate: {compression_rate}")

def dc_lempel_ziv_welch(message, alphabet):
"""Compress a string to a list of output symbols."""

# Build the dictionary.
dict_size = len(alphabet)
dictionary = {alphabet[i]: i for i in range(dict_size)}

w = ""
result = []
for c in message:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c

# Output the code for w.
if w:
result.append(dictionary[w])
return result

def dc_arithmetic_encoding(message, chars, freqs):
# see figure 8.7 on slide 8.2
terminating = chars[-1]
cum_freqs = compute_cum_freqs(freqs)

encode(message, chars, freqs, cum_freqs)

def compute_cum_freqs(freqs):
cum_freqs = [0]
cum_freq = 0
for i, freq in enumerate(freqs):
if i == len(freqs) - 1:
break
cum_freq += freq
cum_freqs.append(cum_freq)
print(cum_freqs)
return cum_freqs

def encode(message, chars, freqs, cum_freqs):
interval0 = 0
interval1 = 1
for char in message:
idx = chars.index(char)
freq = freqs[idx]
cum_freq = cum_freqs[idx]
interval_size = interval1 - interval0
interval0 += interval_size * cum_freq
interval1 = interval0 + freq * interval_size
return interval_to_binary(interval0, interval1)

def interval_to_binary(interval_start, interval_finish):
bin_str = ""
prob = 0
index = 0
while prob < interval_start:
index += 1
power = 1 / (2 ** index)
if prob + power <= interval_finish:
bin_str += "1"
prob += power
else:
bin_str += "0"
print("prob", prob)
print("bin_string", bin_str)
return bin_str

def main():
print("RLE")
bits = "0" * 12 + "1" + "0" * 3 + "1" + "0" * 0 + "1" + "0" * 8
print("Bits: ", bits)
dc_run_length_encoding(bits)
message = "BAABABBBAABBBBAA"
alphabet = ['A', 'B']
LZW_compressed = dc_lempel_ziv_welch(message, alphabet)
print("LZW")
print(f"Message: {message}")
print("LZW Compressed: ", LZW_compressed)

print()
print("Huffman")
huffman_message = "EAEBAECDDEA"
huffman_alphabet = ['A', 'B', 'C', 'D', 'E']
huffman_frequencies = [20, 10, 10, 30 , 30]

print(f"Huffman Message: {huffman_message}")
print(f"Huffman Alphabet: {huffman_alphabet}")
print(f"Huffman Frequencies: {huffman_frequencies}")

dc_huffman_coding(huffman_message, huffman_alphabet, huffman_frequencies)

message = "BBAB*"
chars = ["A", "B", "*"]
freqs = [.4, .5, .1]

print()
print("Arithmetic Encoding")
print(f"Message: {message}")
print(f"Chars: {chars}")
print(f"Freqs: {freqs}")

dc_arithmetic_encoding(message, chars, freqs)

if __name__ == "__main__":
main()```