Implement Huffman encoding
This commit is contained in:
parent
7dba9d03ab
commit
28d5f2c664
@ -1,5 +1,69 @@
|
||||
use std::cmp::max;
|
||||
use std::cmp::min;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct HuffmanAlphabet {
|
||||
pub length: u32,
|
||||
pub frequencies: Vec<u32>,
|
||||
pub length: usize,
|
||||
pub frequencies: Vec<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct HuffmanTreeNode {
|
||||
frequency: u64,
|
||||
left: Option<Box<HuffmanTreeNode>>,
|
||||
right: Option<Box<HuffmanTreeNode>>,
|
||||
}
|
||||
|
||||
fn max_depth(tree: &HuffmanTreeNode) -> usize {
|
||||
let mut depth_left = 0;
|
||||
if let Some(node_left) = &tree.left {
|
||||
depth_left = 1 + max_depth(&node_left);
|
||||
}
|
||||
|
||||
let mut depth_right = 0;
|
||||
if let Some(node_right) = &tree.right {
|
||||
depth_right = 1 + max_depth(&node_right);
|
||||
}
|
||||
|
||||
max(depth_left, depth_right)
|
||||
}
|
||||
|
||||
fn min_depth(tree: &HuffmanTreeNode) -> usize {
|
||||
let mut depth_left = 0;
|
||||
if let Some(node_left) = &tree.left {
|
||||
depth_left = 1 + min_depth(&node_left);
|
||||
}
|
||||
|
||||
let mut depth_right = 0;
|
||||
if let Some(node_right) = &tree.right {
|
||||
depth_right = 1 + min_depth(&node_right);
|
||||
}
|
||||
|
||||
min(depth_left, depth_right)
|
||||
}
|
||||
|
||||
pub fn build_huffman_tree(h: &HuffmanAlphabet) -> (usize, usize) {
|
||||
let mut nodes: Vec<HuffmanTreeNode> = Vec::new();
|
||||
for f in &h.frequencies {
|
||||
let n = HuffmanTreeNode {
|
||||
frequency: *f,
|
||||
left: None,
|
||||
right: None,
|
||||
};
|
||||
nodes.push(n);
|
||||
}
|
||||
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
|
||||
while nodes.len() > 1 {
|
||||
let a = nodes.pop().unwrap();
|
||||
let b = nodes.pop().unwrap();
|
||||
let n = HuffmanTreeNode {
|
||||
frequency: a.frequency + b.frequency,
|
||||
left: Some(Box::new(a)),
|
||||
right: Some(Box::new(b)),
|
||||
};
|
||||
nodes.push(n);
|
||||
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
|
||||
}
|
||||
let tree = nodes.pop().unwrap();
|
||||
(min_depth(&tree), max_depth(&tree))
|
||||
}
|
@ -4,22 +4,21 @@ pub type ImpliciteGraph = Vec<u32>;
|
||||
fn neighbors(a: u32, n: usize) -> Vec<u32> {
|
||||
let mut r = vec![a];
|
||||
for _ in 0..n {
|
||||
let mut r_new = r.clone();
|
||||
for a in r {
|
||||
for i in 0..24 {
|
||||
let m = 0x1 << i;
|
||||
let neighbor = a ^ m;
|
||||
r_new.push(neighbor);
|
||||
}
|
||||
}
|
||||
r_new.sort();
|
||||
r_new.dedup();
|
||||
r = r_new;
|
||||
let mut r_new = r.clone();
|
||||
for a in r {
|
||||
for i in 0..24 {
|
||||
let m = 0x1 << i;
|
||||
let neighbor = a ^ m;
|
||||
r_new.push(neighbor);
|
||||
}
|
||||
}
|
||||
r_new.sort();
|
||||
r_new.dedup();
|
||||
r = r_new;
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
|
||||
pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
|
||||
let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect();
|
||||
let mut clusters: Vec<Vec<usize>> = (0..g.len()).map(|x| vec![x]).collect();
|
||||
@ -35,13 +34,13 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
|
||||
}
|
||||
|
||||
for node_a_id in 0..g.len() {
|
||||
// Iterate over all nodes in the graph. Then, for each node compute all
|
||||
// neighbors that are two or less bits away.
|
||||
// Iterate over all nodes in the graph. Then, for each node compute all
|
||||
// neighbors that are two or less bits away.
|
||||
for node_b_value in neighbors(g[node_a_id], 2) {
|
||||
// See if there exist nodes that match the neighbor. If such nodes
|
||||
// exist iterate over them and merge the clusters if they are not
|
||||
// already the same. The key insight is that we have to cluster all
|
||||
// nodes that are two or less (that includes zero) bits apart.
|
||||
// See if there exist nodes that match the neighbor. If such nodes
|
||||
// exist iterate over them and merge the clusters if they are not
|
||||
// already the same. The key insight is that we have to cluster all
|
||||
// nodes that are two or less (that includes zero) bits apart.
|
||||
if let Some(node_b_ids) = node_map.get(&node_b_value) {
|
||||
for node_b_id in node_b_ids {
|
||||
let cluster_id_a = node_id_to_cluster_id[node_a_id];
|
||||
|
@ -1,5 +1,6 @@
|
||||
mod dijkstra;
|
||||
mod heap;
|
||||
mod huffman;
|
||||
mod jobs;
|
||||
mod k_clustering;
|
||||
mod k_clustering_big;
|
||||
@ -117,7 +118,9 @@ fn c3a2() {
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn c3a3() {
|
||||
println!("continue here");
|
||||
let h = util::read_huffman_alphabet("data/c3a3_huffman.txt").unwrap();
|
||||
let r = huffman::build_huffman_tree(&h);
|
||||
println!("r1 = {} r2 = {}", r.1, r.0);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
21
src/util.rs
21
src/util.rs
@ -1,3 +1,4 @@
|
||||
use crate::huffman;
|
||||
use crate::jobs;
|
||||
use crate::k_clustering;
|
||||
use crate::k_clustering_big;
|
||||
@ -239,3 +240,23 @@ pub fn read_k_cluster_big(path: &str) -> Result<k_clustering_big::ImpliciteGraph
|
||||
|
||||
Ok(g)
|
||||
}
|
||||
|
||||
pub fn read_huffman_alphabet(path: &str) -> Result<huffman::HuffmanAlphabet, io::Error> {
|
||||
let file = File::open(path)?;
|
||||
let mut lines = BufReader::new(file).lines();
|
||||
let line = lines.next().unwrap().unwrap();
|
||||
let length = line.parse().unwrap();
|
||||
|
||||
let mut h = huffman::HuffmanAlphabet {
|
||||
length: length,
|
||||
frequencies: Vec::new(),
|
||||
};
|
||||
|
||||
for line in lines {
|
||||
let line = line?;
|
||||
let frequency = line.parse().unwrap();
|
||||
h.frequencies.push(frequency);
|
||||
}
|
||||
assert!(length == h.frequencies.len());
|
||||
Ok(h)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user