Assignment09.py
user_9560538
python
a year ago
6.3 kB
9
Indexable
# CSCI 355 Web Technology #Summer 2024 #Assignment 9: “Data Compression” #ACK - Class import heapq # [1] Define functions that implement each of the four data compression algorithms: # # dc_run_length_encoding() # dc_huffman_coding() # dc_lempel_ziv_welch() # dc_arithmetic_encoding() def dc_run_length_encoding(bits): print("DC Run Length Encoding") print(f"Bits: {bits}") n = len(bits) count = 0 res = "" for i in range(n): if bits[i] == "0": count += 1 else: res += f"{count} " count = 0 if count > 0: res += f"{count}" print("Result: ", res) return res class node: def __init__(self, freq, symbol, left=None, right=None): # frequency of symbol self.freq = freq # symbol name (character) self.symbol = symbol # node left of current node self.left = left # node right of current node self.right = right # tree direction (0/1) self.huff = '' def __lt__(self, nxt): return self.freq < nxt.freq # utility function to print huffman # codes for all symbols in the newly # created Huffman tree def printNodes(dict_huffman, node, val='',): # huffman code for current node newVal = val + str(node.huff) # if node is not an edge node # then traverse inside it if (node.left): printNodes(dict_huffman, node.left, newVal) if (node.right): printNodes(dict_huffman, node.right, newVal) # if node is edge node then # display its huffman code if (not node.left and not node.right): dict_huffman[node.symbol] = newVal print(f"{node.symbol} -> {newVal}") # characters for huffman tree def dc_huffman_coding(message, alphabet, frequency): # list containing unused nodes nodes = [] # converting characters and frequencies # into huffman tree nodes for i in range(len(alphabet)): heapq.heappush(nodes, node(frequency[i], alphabet[i])) while len(nodes) > 1: # sort all the nodes in ascending order # based on their frequency left = heapq.heappop(nodes) right = heapq.heappop(nodes) # assign directional value to these nodes left.huff = 0 right.huff = 1 # combine the 2 smallest nodes to create # new node as their parent newNode = node(left.freq + right.freq, left.symbol + right.symbol, left, right) heapq.heappush(nodes, newNode) # Huffman Tree is ready! huffman_dict = {} printNodes(huffman_dict, nodes[0]) print(f"Huffman Dictionary: {huffman_dict}") encoded_message ="".join([huffman_dict[letter] for letter in message]) print(f"Encoded Message: {encoded_message}") len_uncompressed = len(message) len_compressed = len(encoded_message) print(f"Uncompressed Length: {len_uncompressed}") print(f"Compressed Length: {len_compressed}") compression_rate = round(len_compressed / len_uncompressed, 2) print(f"Compression Rate: {compression_rate}") def dc_lempel_ziv_welch(message, alphabet): """Compress a string to a list of output symbols.""" # Build the dictionary. dict_size = len(alphabet) dictionary = {alphabet[i]: i for i in range(dict_size)} w = "" result = [] for c in message: wc = w + c if wc in dictionary: w = wc else: result.append(dictionary[w]) # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c # Output the code for w. if w: result.append(dictionary[w]) return result def dc_arithmetic_encoding(message, chars, freqs): # see figure 8.7 on slide 8.2 terminating = chars[-1] cum_freqs = compute_cum_freqs(freqs) encode(message, chars, freqs, cum_freqs) def compute_cum_freqs(freqs): cum_freqs = [0] cum_freq = 0 for i, freq in enumerate(freqs): if i == len(freqs) - 1: break cum_freq += freq cum_freqs.append(cum_freq) print(cum_freqs) return cum_freqs def encode(message, chars, freqs, cum_freqs): interval0 = 0 interval1 = 1 for char in message: idx = chars.index(char) freq = freqs[idx] cum_freq = cum_freqs[idx] interval_size = interval1 - interval0 interval0 += interval_size * cum_freq interval1 = interval0 + freq * interval_size return interval_to_binary(interval0, interval1) def interval_to_binary(interval_start, interval_finish): bin_str = "" prob = 0 index = 0 while prob < interval_start: index += 1 power = 1 / (2 ** index) if prob + power <= interval_finish: bin_str += "1" prob += power else: bin_str += "0" print("prob", prob) print("bin_string", bin_str) return bin_str def main(): print("RLE") bits = "0" * 12 + "1" + "0" * 3 + "1" + "0" * 0 + "1" + "0" * 8 print("Bits: ", bits) dc_run_length_encoding(bits) message = "BAABABBBAABBBBAA" alphabet = ['A', 'B'] LZW_compressed = dc_lempel_ziv_welch(message, alphabet) print("LZW") print(f"Message: {message}") print("LZW Compressed: ", LZW_compressed) print() print("Huffman") huffman_message = "EAEBAECDDEA" huffman_alphabet = ['A', 'B', 'C', 'D', 'E'] huffman_frequencies = [20, 10, 10, 30 , 30] print(f"Huffman Message: {huffman_message}") print(f"Huffman Alphabet: {huffman_alphabet}") print(f"Huffman Frequencies: {huffman_frequencies}") dc_huffman_coding(huffman_message, huffman_alphabet, huffman_frequencies) message = "BBAB*" chars = ["A", "B", "*"] freqs = [.4, .5, .1] print() print("Arithmetic Encoding") print(f"Message: {message}") print(f"Chars: {chars}") print(f"Freqs: {freqs}") dc_arithmetic_encoding(message, chars, freqs) if __name__ == "__main__": main()
Editor is loading...
Leave a Comment