Algorithm
Huffman coding is a popular algorithm used for lossless data compression. It was developed by David A. Huffman in 1952. The main idea behind Huffman coding is to assign variable-length codes to input characters, with shorter codes assigned to more frequent characters. This results in a more efficient representation of the data.
Here's a basic overview of how Huffman coding works:
-
Frequency Analysis: The first step is to analyze the input data and determine the frequency of each symbol (character or group of characters). The symbols with higher frequencies will be assigned shorter codes.
-
Build a Huffman Tree: Based on the frequencies, a binary tree called the Huffman tree is constructed. The tree is built in a way that the most frequent symbols have shorter paths from the root of the tree.
-
Assign Codes: Traversing the Huffman tree, a unique binary code is assigned to each symbol. As you move to the left child, a '0' is added to the code, and as you move to the right child, a '1' is added.
-
Generate Huffman Code: Once the Huffman tree is constructed, the codes for each symbol can be obtained by reading the path from the root to the leaf corresponding to that symbol.
-
Compression: Replace each symbol in the original data with its corresponding Huffman code to create a compressed representation of the data.
The key advantage of Huffman coding is that it produces variable-length codes, where more frequent symbols have shorter codes, leading to a more compact representation of the data. This makes it particularly useful for compressing data with varying symbol frequencies.
Here's a simple example to illustrate the process:
Let's say we have the following input symbols and frequencies:
mathematica
Symbol Frequency
A 4
B 2
C 1
D 1
The Huffman tree would be built like this:
css
[A:4]
/ \
[B:2] [C:1]
/ \
[D:1] [ ]
The resulting Huffman codes might be:
makefile
A: 0
B: 10
C: 110
D: 111
So, in this example, the original data can be represented more efficiently using these variable-length codes.
Code Examples
#1 Huffman Coding implement in Python
Code -
Python Programming
import heapq
from collections import defaultdict
class HuffmanNode:
def __init__(self, char, frequency):
self.char = char
self.frequency = frequency
self.left = None
self.right = None
def __lt__(self, other):
return self.frequency < other.frequency
def build_huffman_tree(text):
frequency_map = defaultdict(int)
for char in text:
frequency_map[char] += 1
priority_queue = [HuffmanNode(char, freq) for char, freq in frequency_map.items()]
heapq.heapify(priority_queue)
while len(priority_queue) > 1:
left = heapq.heappop(priority_queue)
right = heapq.heappop(priority_queue)
merged_node = HuffmanNode(None, left.frequency + right.frequency)
merged_node.left = left
merged_node.right = right
heapq.heappush(priority_queue, merged_node)
return priority_queue[0]
def build_huffman_codes(node, code="", mapping=None):
if mapping is None:
mapping = {}
if node.char is not None:
mapping[node.char] = code
if node.left is not None:
build_huffman_codes(node.left, code + "0", mapping)
if node.right is not None:
build_huffman_codes(node.right, code + "1", mapping)
return mapping
def huffman_encode(text):
root = build_huffman_tree(text)
codes = build_huffman_codes(root)
encoded_text = ''.join(codes[char] for char in text)
return encoded_text, codes
def huffman_decode(encoded_text, codes):
reversed_codes = {code: char for char, code in codes.items()}
decoded_text = ""
current_code = ""
for bit in encoded_text:
current_code += bit
if current_code in reversed_codes:
decoded_text += reversed_codes[current_code]
current_code = ""
return decoded_text
# Example Usage:
text_to_encode = "hello, world!"
encoded_text, huffman_codes = huffman_encode(text_to_encode)
decoded_text = huffman_decode(encoded_text, huffman_codes)
print("Original text:", text_to_encode)
print("Encoded text:", encoded_text)
print("Decoded text:", decoded_text)
Copy The Code &
Try With Live Editor
#2 Huffman Coding implement in Java
Code -
Java Programming
import java.util.PriorityQueue;
import java.util.HashMap;
class HuffmanNode implements Comparable < HuffmanNode> {
char data;
int frequency;
HuffmanNode left, right;
public HuffmanNode(char data, int frequency) {
this.data = data;
this.frequency = frequency;
this.left = null;
this.right = null;
}
@Override
public int compareTo(HuffmanNode node) {
return this.frequency - node.frequency;
}
}
public class HuffmanCoding {
private static HashMap < Character, String> huffmanCodes = new HashMap<>();
public static void main(String[] args) {
String inputString = "Hello, Huffman Coding!";
System.out.println("Original string: " + inputString);
HashMap < Character, Integer> frequencies = calculateFrequencies(inputString);
HuffmanNode root = buildHuffmanTree(frequencies);
generateHuffmanCodes(root, "", huffmanCodes);
System.out.println("Huffman Codes:");
for (char c : huffmanCodes.keySet()) {
System.out.println(c + ": " + huffmanCodes.get(c));
}
String encodedString = encode(inputString);
System.out.println("Encoded string: " + encodedString);
String decodedString = decode(encodedString, root);
System.out.println("Decoded string: " + decodedString);
}
private static HashMap < Character, Integer> calculateFrequencies(String input) {
HashMap frequencies = new HashMap<>();
for (char c : input.toCharArray()) {
frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
}
return frequencies;
}
private static HuffmanNode buildHuffmanTree(HashMap < Character, Integer> frequencies) {
PriorityQueue priorityQueue = new PriorityQueue<>();
for (char c : frequencies.keySet()) {
priorityQueue.add(new HuffmanNode(c, frequencies.get(c)));
}
while (priorityQueue.size() > 1) {
HuffmanNode left = priorityQueue.poll();
HuffmanNode right = priorityQueue.poll();
HuffmanNode internalNode = new HuffmanNode('\0', left.frequency + right.frequency);
internalNode.left = left;
internalNode.right = right;
priorityQueue.add(internalNode);
}
return priorityQueue.poll();
}
private static void generateHuffmanCodes(HuffmanNode root, String code, HashMap < Character, String> huffmanCodes) {
if (root == null) {
return;
}
if (root.data != '\0') {
huffmanCodes.put(root.data, code);
}
generateHuffmanCodes(root.left, code + "0", huffmanCodes);
generateHuffmanCodes(root.right, code + "1", huffmanCodes);
}
private static String encode(String input) {
StringBuilder encodedString = new StringBuilder();
for (char c : input.toCharArray()) {
encodedString.append(huffmanCodes.get(c));
}
return encodedString.toString();
}
private static String decode(String encodedString, HuffmanNode root) {
StringBuilder decodedString = new StringBuilder();
HuffmanNode current = root;
for (char bit : encodedString.toCharArray()) {
if (bit == '0') {
current = current.left;
} else {
current = current.right;
}
if (current.data != '\0') {
decodedString.append(current.data);
current = root;
}
}
return decodedString.toString();
}
}
Copy The Code &
Try With Live Editor
#3 Huffman Coding implement in C
Code -
C Programming
#include <stdio.h>
#include <stdlib.h>
// Define a structure to represent a node in the Huffman tree
struct Node {
char data;
unsigned freq;
struct Node *left, *right;
};
// Function to create a new node with given character and frequency
struct Node* newNode(char data, unsigned freq) {
struct Node* node = (struct Node*)malloc(sizeof(struct Node));
node->left = node->right = NULL;
node->data = data;
node->freq = freq;
return node;
}
// Function to compare two nodes based on their frequencies
int compareNodes(const void* a, const void* b) {
return ((struct Node*)a)->freq - ((struct Node*)b)->freq;
}
// Function to build the Huffman tree and return the root
struct Node* buildHuffmanTree(char data[], int freq[], int size) {
struct Node *left, *right, *top;
// Create a min heap and populate it with nodes
int n = size;
struct Node* nodes = (struct Node*)malloc(n * sizeof(struct Node));
for (int i = 0; i < n; ++i)
nodes[i] = *newNode(data[i], freq[i]);
qsort(nodes, n, sizeof(struct Node), compareNodes);
// Build the Huffman tree
while (n > 1) {
left = &nodes[n - 1];
right = &nodes[n - 2];
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
nodes[n - 2] = *top;
qsort(nodes, n-1, sizeof(struct Node), compareNodes);
n--;
}
// The root of the Huffman tree
return &nodes[0];
}
// Function to print Huffman codes from the root of the Huffman tree
void printCodes(struct Node* root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
if (!(root->left) && !(root->right)) {
printf("%c: ", root->data);
for (int i = 0; i < top; ++i)
printf("%d", arr[i]);
printf("\n");
}
}
// Huffman coding function
void huffmanCodes(char data[], int freq[], int size) {
struct Node* root = buildHuffmanTree(data, freq, size);
int arr[size], top = 0;
printf("Huffman Codes:\n");
printCodes(root, arr, top);
}
int main() {
char data[] = {'a', 'b', 'c', 'd', 'e', 'f'};
int freq[] = {5, 9, 12, 13, 16, 45};
int size = sizeof(data) / sizeof(data[0]);
huffmanCodes(data, freq, size);
return 0;
}
Copy The Code &
Try With Live Editor
#4 Huffman Coding implement in C++
Code -
C++ Programming
#include <iostream>
#include <queue>
#include <unordered_map>
#include <vector>
using namespace std;
// Node structure for the Huffman tree
struct HuffmanNode {
char data;
unsigned frequency;
HuffmanNode* left;
HuffmanNode* right;
HuffmanNode(char data, unsigned frequency) : data(data), frequency(frequency), left(nullptr), right(nullptr) {}
};
// Comparison function for priority queue
struct CompareNodes {
bool operator()(HuffmanNode* left, HuffmanNode* right) {
return left->frequency > right->frequency;
}
};
// Function to build the Huffman tree
HuffmanNode* buildHuffmanTree(const string& text) {
// Count the frequency of each character
unordered_map < char, unsigned> frequencyMap;
for (char c : text) {
frequencyMap[c]++;
}
// Create a priority queue of Huffman nodes
priority_queue < HuffmanNode*, vector<HuffmanNode*>, CompareNodes> pq;
for (auto& entry : frequencyMap) {
pq.push(new HuffmanNode(entry.first, entry.second));
}
// Build the Huffman tree
while (pq.size() > 1) {
HuffmanNode* left = pq.top();
pq.pop();
HuffmanNode* right = pq.top();
pq.pop();
HuffmanNode* newNode = new HuffmanNode('\0', left->frequency + right->frequency);
newNode->left = left;
newNode->right = right;
pq.push(newNode);
}
// Return the root of the Huffman tree
return pq.top();
}
// Function to traverse the Huffman tree and generate codes
void generateHuffmanCodes(HuffmanNode* root, string code, unordered_map < char, string>& codes) {
if (root == nullptr) {
return;
}
if (root->data != '\0') {
codes[root->data] = code;
}
generateHuffmanCodes(root->left, code + "0", codes);
generateHuffmanCodes(root->right, code + "1", codes);
}
// Function to compress a string using Huffman coding
string compressHuffman(const string& text) {
HuffmanNode* root = buildHuffmanTree(text);
unordered_map < char, string> codes;
generateHuffmanCodes(root, "", codes);
string compressed;
for (char c : text) {
compressed += codes[c];
}
return compressed;
}
int main() {
string text = "hello world";
string compressed = compressHuffman(text);
cout << "Original text: " << text << endl;
cout << "Compressed text: " << compressed << endl;
return 0;
}
Copy The Code &
Try With Live Editor
Demonstration
Huffman Coding Data Structure and Algorithm