Implement Huffman encoding

This commit is contained in:
Felix Martin 2021-02-15 11:34:41 -05:00
parent 7dba9d03ab
commit 28d5f2c664
4 changed files with 109 additions and 22 deletions

View File

@ -1,5 +1,69 @@
use std::cmp::max;
use std::cmp::min;
#[derive(Debug)]
pub struct HuffmanAlphabet { pub struct HuffmanAlphabet {
pub length: u32, pub length: usize,
pub frequencies: Vec<u32>, pub frequencies: Vec<u64>,
} }
#[derive(Debug)]
struct HuffmanTreeNode {
frequency: u64,
left: Option<Box<HuffmanTreeNode>>,
right: Option<Box<HuffmanTreeNode>>,
}
fn max_depth(tree: &HuffmanTreeNode) -> usize {
let mut depth_left = 0;
if let Some(node_left) = &tree.left {
depth_left = 1 + max_depth(&node_left);
}
let mut depth_right = 0;
if let Some(node_right) = &tree.right {
depth_right = 1 + max_depth(&node_right);
}
max(depth_left, depth_right)
}
fn min_depth(tree: &HuffmanTreeNode) -> usize {
let mut depth_left = 0;
if let Some(node_left) = &tree.left {
depth_left = 1 + min_depth(&node_left);
}
let mut depth_right = 0;
if let Some(node_right) = &tree.right {
depth_right = 1 + min_depth(&node_right);
}
min(depth_left, depth_right)
}
pub fn build_huffman_tree(h: &HuffmanAlphabet) -> (usize, usize) {
let mut nodes: Vec<HuffmanTreeNode> = Vec::new();
for f in &h.frequencies {
let n = HuffmanTreeNode {
frequency: *f,
left: None,
right: None,
};
nodes.push(n);
}
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
while nodes.len() > 1 {
let a = nodes.pop().unwrap();
let b = nodes.pop().unwrap();
let n = HuffmanTreeNode {
frequency: a.frequency + b.frequency,
left: Some(Box::new(a)),
right: Some(Box::new(b)),
};
nodes.push(n);
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
}
let tree = nodes.pop().unwrap();
(min_depth(&tree), max_depth(&tree))
}

View File

@ -4,22 +4,21 @@ pub type ImpliciteGraph = Vec<u32>;
fn neighbors(a: u32, n: usize) -> Vec<u32> { fn neighbors(a: u32, n: usize) -> Vec<u32> {
let mut r = vec![a]; let mut r = vec![a];
for _ in 0..n { for _ in 0..n {
let mut r_new = r.clone(); let mut r_new = r.clone();
for a in r { for a in r {
for i in 0..24 { for i in 0..24 {
let m = 0x1 << i; let m = 0x1 << i;
let neighbor = a ^ m; let neighbor = a ^ m;
r_new.push(neighbor); r_new.push(neighbor);
} }
} }
r_new.sort(); r_new.sort();
r_new.dedup(); r_new.dedup();
r = r_new; r = r_new;
} }
r r
} }
pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect(); let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect();
let mut clusters: Vec<Vec<usize>> = (0..g.len()).map(|x| vec![x]).collect(); let mut clusters: Vec<Vec<usize>> = (0..g.len()).map(|x| vec![x]).collect();
@ -35,13 +34,13 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
} }
for node_a_id in 0..g.len() { for node_a_id in 0..g.len() {
// Iterate over all nodes in the graph. Then, for each node compute all // Iterate over all nodes in the graph. Then, for each node compute all
// neighbors that are two or less bits away. // neighbors that are two or less bits away.
for node_b_value in neighbors(g[node_a_id], 2) { for node_b_value in neighbors(g[node_a_id], 2) {
// See if there exist nodes that match the neighbor. If such nodes // See if there exist nodes that match the neighbor. If such nodes
// exist iterate over them and merge the clusters if they are not // exist iterate over them and merge the clusters if they are not
// already the same. The key insight is that we have to cluster all // already the same. The key insight is that we have to cluster all
// nodes that are two or less (that includes zero) bits apart. // nodes that are two or less (that includes zero) bits apart.
if let Some(node_b_ids) = node_map.get(&node_b_value) { if let Some(node_b_ids) = node_map.get(&node_b_value) {
for node_b_id in node_b_ids { for node_b_id in node_b_ids {
let cluster_id_a = node_id_to_cluster_id[node_a_id]; let cluster_id_a = node_id_to_cluster_id[node_a_id];

View File

@ -1,5 +1,6 @@
mod dijkstra; mod dijkstra;
mod heap; mod heap;
mod huffman;
mod jobs; mod jobs;
mod k_clustering; mod k_clustering;
mod k_clustering_big; mod k_clustering_big;
@ -117,7 +118,9 @@ fn c3a2() {
#[allow(dead_code)] #[allow(dead_code)]
fn c3a3() { fn c3a3() {
println!("continue here"); let h = util::read_huffman_alphabet("data/c3a3_huffman.txt").unwrap();
let r = huffman::build_huffman_tree(&h);
println!("r1 = {} r2 = {}", r.1, r.0);
} }
fn main() { fn main() {

View File

@ -1,3 +1,4 @@
use crate::huffman;
use crate::jobs; use crate::jobs;
use crate::k_clustering; use crate::k_clustering;
use crate::k_clustering_big; use crate::k_clustering_big;
@ -239,3 +240,23 @@ pub fn read_k_cluster_big(path: &str) -> Result<k_clustering_big::ImpliciteGraph
Ok(g) Ok(g)
} }
pub fn read_huffman_alphabet(path: &str) -> Result<huffman::HuffmanAlphabet, io::Error> {
let file = File::open(path)?;
let mut lines = BufReader::new(file).lines();
let line = lines.next().unwrap().unwrap();
let length = line.parse().unwrap();
let mut h = huffman::HuffmanAlphabet {
length: length,
frequencies: Vec::new(),
};
for line in lines {
let line = line?;
let frequency = line.parse().unwrap();
h.frequencies.push(frequency);
}
assert!(length == h.frequencies.len());
Ok(h)
}