Untitled
unknown
python
a year ago
6.2 kB
40
Indexable
import heapq
# [1] Define functions that implement each of the four data compression algorithms:
#
# dc_run_length_encoding()
# dc_huffman_coding()
# dc_lempel_ziv_welch()
# dc_arithmetic_encoding()
def dc_run_length_encoding(bits):
print("DC Run Length Encoding")
print(f"Bits: {bits}")
n = len(bits)
count = 0
res = ""
for i in range(n):
if bits[i] == "0":
count += 1
else:
res += f"{count} "
count = 0
if count > 0:
res += f"{count}"
print("Result: ", res)
return res
class node:
def __init__(self, freq, symbol, left=None, right=None):
# frequency of symbol
self.freq = freq
# symbol name (character)
self.symbol = symbol
# node left of current node
self.left = left
# node right of current node
self.right = right
# tree direction (0/1)
self.huff = ''
def __lt__(self, nxt):
return self.freq < nxt.freq
# utility function to print huffman
# codes for all symbols in the newly
# created Huffman tree
def printNodes(dict_huffman, node, val='',):
# huffman code for current node
newVal = val + str(node.huff)
# if node is not an edge node
# then traverse inside it
if (node.left):
printNodes(dict_huffman, node.left, newVal)
if (node.right):
printNodes(dict_huffman, node.right, newVal)
# if node is edge node then
# display its huffman code
if (not node.left and not node.right):
dict_huffman[node.symbol] = newVal
print(f"{node.symbol} -> {newVal}")
# characters for huffman tree
def dc_huffman_coding(message, alphabet, frequency):
# list containing unused nodes
nodes = []
# converting characters and frequencies
# into huffman tree nodes
for i in range(len(alphabet)):
heapq.heappush(nodes, node(frequency[i], alphabet[i]))
while len(nodes) > 1:
# sort all the nodes in ascending order
# based on their frequency
left = heapq.heappop(nodes)
right = heapq.heappop(nodes)
# assign directional value to these nodes
left.huff = 0
right.huff = 1
# combine the 2 smallest nodes to create
# new node as their parent
newNode = node(left.freq + right.freq, left.symbol + right.symbol, left, right)
heapq.heappush(nodes, newNode)
# Huffman Tree is ready!
huffman_dict = {}
printNodes(huffman_dict, nodes[0])
print(f"Huffman Dictionary: {huffman_dict}")
encoded_message ="".join([huffman_dict[letter] for letter in message])
print(f"Encoded Message: {encoded_message}")
len_uncompressed = len(message)
len_compressed = len(encoded_message)
print(f"Uncompressed Length: {len_uncompressed}")
print(f"Compressed Length: {len_compressed}")
compression_rate = round(len_compressed / len_uncompressed, 2)
print(f"Compression Rate: {compression_rate}")
def dc_lempel_ziv_welch(message, alphabet):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = len(alphabet)
dictionary = {alphabet[i]: i for i in range(dict_size)}
w = ""
result = []
for c in message:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def dc_arithmetic_encoding(message, chars, freqs):
# see figure 8.7 on slide 8.2
terminating = chars[-1]
cum_freqs = compute_cum_freqs(freqs)
encode(message, chars, freqs, cum_freqs)
def compute_cum_freqs(freqs):
cum_freqs = [0]
cum_freq = 0
for i, freq in enumerate(freqs):
if i == len(freqs) - 1:
break
cum_freq += freq
cum_freqs.append(cum_freq)
print(cum_freqs)
return cum_freqs
def encode(message, chars, freqs, cum_freqs):
interval0 = 0
interval1 = 1
for char in message:
idx = chars.index(char)
freq = freqs[idx]
cum_freq = cum_freqs[idx]
interval_size = interval1 - interval0
interval0 += interval_size * cum_freq
interval1 = interval0 + freq * interval_size
return interval_to_binary(interval0, interval1)
def interval_to_binary(interval_start, interval_finish):
bin_str = ""
prob = 0
index = 0
while prob < interval_start:
index += 1
power = 1 / (2 ** index)
if prob + power <= interval_finish:
bin_str += "1"
prob += power
else:
bin_str += "0"
print("prob", prob)
print("bin_string", bin_str)
return bin_str
def main():
print("RLE")
bits = "0" * 12 + "1" + "0" * 3 + "1" + "0" * 0 + "1" + "0" * 8
print("Bits: ", bits)
dc_run_length_encoding(bits)
message = "BAABABBBAABBBBAA"
alphabet = ['A', 'B']
LZW_compressed = dc_lempel_ziv_welch(message, alphabet)
print("LZW")
print(f"Message: {message}")
print("LZW Compressed: ", LZW_compressed)
print()
print("Huffman")
huffman_message = "EAEBAECDDEA"
huffman_alphabet = ['A', 'B', 'C', 'D', 'E']
huffman_frequencies = [20, 10, 10, 30 , 30]
print(f"Huffman Message: {huffman_message}")
print(f"Huffman Alphabet: {huffman_alphabet}")
print(f"Huffman Frequencies: {huffman_frequencies}")
dc_huffman_coding(huffman_message, huffman_alphabet, huffman_frequencies)
message = "BBAB*"
chars = ["A", "B", "*"]
freqs = [.4, .5, .1]
print()
print("Arithmetic Encoding")
print(f"Message: {message}")
print(f"Chars: {chars}")
print(f"Freqs: {freqs}")
dc_arithmetic_encoding(message, chars, freqs)
if __name__ == "__main__":
main()Editor is loading...
Leave a Comment