From 28d5f2c664913ecebfcf6cac203dd0151e0c2c75 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Mon, 15 Feb 2021 11:34:41 -0500 Subject: [PATCH] Implement Huffman encoding --- src/huffman.rs | 70 +++++++++++++++++++++++++++++++++++++++-- src/k_clustering_big.rs | 35 ++++++++++----------- src/main.rs | 5 ++- src/util.rs | 21 +++++++++++++ 4 files changed, 109 insertions(+), 22 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index ea9bb92..af88810 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -1,5 +1,69 @@ +use std::cmp::max; +use std::cmp::min; +#[derive(Debug)] pub struct HuffmanAlphabet { - pub length: u32, - pub frequencies: Vec, -} \ No newline at end of file + pub length: usize, + pub frequencies: Vec, +} + +#[derive(Debug)] +struct HuffmanTreeNode { + frequency: u64, + left: Option>, + right: Option>, +} + +fn max_depth(tree: &HuffmanTreeNode) -> usize { + let mut depth_left = 0; + if let Some(node_left) = &tree.left { + depth_left = 1 + max_depth(&node_left); + } + + let mut depth_right = 0; + if let Some(node_right) = &tree.right { + depth_right = 1 + max_depth(&node_right); + } + + max(depth_left, depth_right) +} + +fn min_depth(tree: &HuffmanTreeNode) -> usize { + let mut depth_left = 0; + if let Some(node_left) = &tree.left { + depth_left = 1 + min_depth(&node_left); + } + + let mut depth_right = 0; + if let Some(node_right) = &tree.right { + depth_right = 1 + min_depth(&node_right); + } + + min(depth_left, depth_right) +} + +pub fn build_huffman_tree(h: &HuffmanAlphabet) -> (usize, usize) { + let mut nodes: Vec = Vec::new(); + for f in &h.frequencies { + let n = HuffmanTreeNode { + frequency: *f, + left: None, + right: None, + }; + nodes.push(n); + } + nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency)); + while nodes.len() > 1 { + let a = nodes.pop().unwrap(); + let b = nodes.pop().unwrap(); + let n = HuffmanTreeNode { + frequency: a.frequency + b.frequency, + left: Some(Box::new(a)), + right: Some(Box::new(b)), + }; + nodes.push(n); + nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency)); + } + let tree = nodes.pop().unwrap(); + (min_depth(&tree), max_depth(&tree)) +} diff --git a/src/k_clustering_big.rs b/src/k_clustering_big.rs index f92c75f..19fa68f 100644 --- a/src/k_clustering_big.rs +++ b/src/k_clustering_big.rs @@ -4,22 +4,21 @@ pub type ImpliciteGraph = Vec; fn neighbors(a: u32, n: usize) -> Vec { let mut r = vec![a]; for _ in 0..n { - let mut r_new = r.clone(); - for a in r { - for i in 0..24 { - let m = 0x1 << i; - let neighbor = a ^ m; - r_new.push(neighbor); - } - } - r_new.sort(); - r_new.dedup(); - r = r_new; + let mut r_new = r.clone(); + for a in r { + for i in 0..24 { + let m = 0x1 << i; + let neighbor = a ^ m; + r_new.push(neighbor); + } + } + r_new.sort(); + r_new.dedup(); + r = r_new; } r } - pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { let mut node_id_to_cluster_id: Vec = (0..g.len()).collect(); let mut clusters: Vec> = (0..g.len()).map(|x| vec![x]).collect(); @@ -35,13 +34,13 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { } for node_a_id in 0..g.len() { - // Iterate over all nodes in the graph. Then, for each node compute all - // neighbors that are two or less bits away. + // Iterate over all nodes in the graph. Then, for each node compute all + // neighbors that are two or less bits away. for node_b_value in neighbors(g[node_a_id], 2) { - // See if there exist nodes that match the neighbor. If such nodes - // exist iterate over them and merge the clusters if they are not - // already the same. The key insight is that we have to cluster all - // nodes that are two or less (that includes zero) bits apart. + // See if there exist nodes that match the neighbor. If such nodes + // exist iterate over them and merge the clusters if they are not + // already the same. The key insight is that we have to cluster all + // nodes that are two or less (that includes zero) bits apart. if let Some(node_b_ids) = node_map.get(&node_b_value) { for node_b_id in node_b_ids { let cluster_id_a = node_id_to_cluster_id[node_a_id]; diff --git a/src/main.rs b/src/main.rs index 81134b2..0b43178 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ mod dijkstra; mod heap; +mod huffman; mod jobs; mod k_clustering; mod k_clustering_big; @@ -117,7 +118,9 @@ fn c3a2() { #[allow(dead_code)] fn c3a3() { - println!("continue here"); + let h = util::read_huffman_alphabet("data/c3a3_huffman.txt").unwrap(); + let r = huffman::build_huffman_tree(&h); + println!("r1 = {} r2 = {}", r.1, r.0); } fn main() { diff --git a/src/util.rs b/src/util.rs index d6293cb..3f55aa8 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,3 +1,4 @@ +use crate::huffman; use crate::jobs; use crate::k_clustering; use crate::k_clustering_big; @@ -239,3 +240,23 @@ pub fn read_k_cluster_big(path: &str) -> Result Result { + let file = File::open(path)?; + let mut lines = BufReader::new(file).lines(); + let line = lines.next().unwrap().unwrap(); + let length = line.parse().unwrap(); + + let mut h = huffman::HuffmanAlphabet { + length: length, + frequencies: Vec::new(), + }; + + for line in lines { + let line = line?; + let frequency = line.parse().unwrap(); + h.frequencies.push(frequency); + } + assert!(length == h.frequencies.len()); + Ok(h) +}