Implement Huffman encoding
This commit is contained in:
parent
7dba9d03ab
commit
28d5f2c664
@ -1,5 +1,69 @@
|
|||||||
|
use std::cmp::max;
|
||||||
|
use std::cmp::min;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct HuffmanAlphabet {
|
pub struct HuffmanAlphabet {
|
||||||
pub length: u32,
|
pub length: usize,
|
||||||
pub frequencies: Vec<u32>,
|
pub frequencies: Vec<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct HuffmanTreeNode {
|
||||||
|
frequency: u64,
|
||||||
|
left: Option<Box<HuffmanTreeNode>>,
|
||||||
|
right: Option<Box<HuffmanTreeNode>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn max_depth(tree: &HuffmanTreeNode) -> usize {
|
||||||
|
let mut depth_left = 0;
|
||||||
|
if let Some(node_left) = &tree.left {
|
||||||
|
depth_left = 1 + max_depth(&node_left);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut depth_right = 0;
|
||||||
|
if let Some(node_right) = &tree.right {
|
||||||
|
depth_right = 1 + max_depth(&node_right);
|
||||||
|
}
|
||||||
|
|
||||||
|
max(depth_left, depth_right)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn min_depth(tree: &HuffmanTreeNode) -> usize {
|
||||||
|
let mut depth_left = 0;
|
||||||
|
if let Some(node_left) = &tree.left {
|
||||||
|
depth_left = 1 + min_depth(&node_left);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut depth_right = 0;
|
||||||
|
if let Some(node_right) = &tree.right {
|
||||||
|
depth_right = 1 + min_depth(&node_right);
|
||||||
|
}
|
||||||
|
|
||||||
|
min(depth_left, depth_right)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_huffman_tree(h: &HuffmanAlphabet) -> (usize, usize) {
|
||||||
|
let mut nodes: Vec<HuffmanTreeNode> = Vec::new();
|
||||||
|
for f in &h.frequencies {
|
||||||
|
let n = HuffmanTreeNode {
|
||||||
|
frequency: *f,
|
||||||
|
left: None,
|
||||||
|
right: None,
|
||||||
|
};
|
||||||
|
nodes.push(n);
|
||||||
|
}
|
||||||
|
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
|
||||||
|
while nodes.len() > 1 {
|
||||||
|
let a = nodes.pop().unwrap();
|
||||||
|
let b = nodes.pop().unwrap();
|
||||||
|
let n = HuffmanTreeNode {
|
||||||
|
frequency: a.frequency + b.frequency,
|
||||||
|
left: Some(Box::new(a)),
|
||||||
|
right: Some(Box::new(b)),
|
||||||
|
};
|
||||||
|
nodes.push(n);
|
||||||
|
nodes.sort_by(|a, b| b.frequency.cmp(&a.frequency));
|
||||||
|
}
|
||||||
|
let tree = nodes.pop().unwrap();
|
||||||
|
(min_depth(&tree), max_depth(&tree))
|
||||||
|
}
|
||||||
|
@ -4,22 +4,21 @@ pub type ImpliciteGraph = Vec<u32>;
|
|||||||
fn neighbors(a: u32, n: usize) -> Vec<u32> {
|
fn neighbors(a: u32, n: usize) -> Vec<u32> {
|
||||||
let mut r = vec![a];
|
let mut r = vec![a];
|
||||||
for _ in 0..n {
|
for _ in 0..n {
|
||||||
let mut r_new = r.clone();
|
let mut r_new = r.clone();
|
||||||
for a in r {
|
for a in r {
|
||||||
for i in 0..24 {
|
for i in 0..24 {
|
||||||
let m = 0x1 << i;
|
let m = 0x1 << i;
|
||||||
let neighbor = a ^ m;
|
let neighbor = a ^ m;
|
||||||
r_new.push(neighbor);
|
r_new.push(neighbor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r_new.sort();
|
r_new.sort();
|
||||||
r_new.dedup();
|
r_new.dedup();
|
||||||
r = r_new;
|
r = r_new;
|
||||||
}
|
}
|
||||||
r
|
r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
|
pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
|
||||||
let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect();
|
let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect();
|
||||||
let mut clusters: Vec<Vec<usize>> = (0..g.len()).map(|x| vec![x]).collect();
|
let mut clusters: Vec<Vec<usize>> = (0..g.len()).map(|x| vec![x]).collect();
|
||||||
@ -35,13 +34,13 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for node_a_id in 0..g.len() {
|
for node_a_id in 0..g.len() {
|
||||||
// Iterate over all nodes in the graph. Then, for each node compute all
|
// Iterate over all nodes in the graph. Then, for each node compute all
|
||||||
// neighbors that are two or less bits away.
|
// neighbors that are two or less bits away.
|
||||||
for node_b_value in neighbors(g[node_a_id], 2) {
|
for node_b_value in neighbors(g[node_a_id], 2) {
|
||||||
// See if there exist nodes that match the neighbor. If such nodes
|
// See if there exist nodes that match the neighbor. If such nodes
|
||||||
// exist iterate over them and merge the clusters if they are not
|
// exist iterate over them and merge the clusters if they are not
|
||||||
// already the same. The key insight is that we have to cluster all
|
// already the same. The key insight is that we have to cluster all
|
||||||
// nodes that are two or less (that includes zero) bits apart.
|
// nodes that are two or less (that includes zero) bits apart.
|
||||||
if let Some(node_b_ids) = node_map.get(&node_b_value) {
|
if let Some(node_b_ids) = node_map.get(&node_b_value) {
|
||||||
for node_b_id in node_b_ids {
|
for node_b_id in node_b_ids {
|
||||||
let cluster_id_a = node_id_to_cluster_id[node_a_id];
|
let cluster_id_a = node_id_to_cluster_id[node_a_id];
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
mod dijkstra;
|
mod dijkstra;
|
||||||
mod heap;
|
mod heap;
|
||||||
|
mod huffman;
|
||||||
mod jobs;
|
mod jobs;
|
||||||
mod k_clustering;
|
mod k_clustering;
|
||||||
mod k_clustering_big;
|
mod k_clustering_big;
|
||||||
@ -117,7 +118,9 @@ fn c3a2() {
|
|||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn c3a3() {
|
fn c3a3() {
|
||||||
println!("continue here");
|
let h = util::read_huffman_alphabet("data/c3a3_huffman.txt").unwrap();
|
||||||
|
let r = huffman::build_huffman_tree(&h);
|
||||||
|
println!("r1 = {} r2 = {}", r.1, r.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
21
src/util.rs
21
src/util.rs
@ -1,3 +1,4 @@
|
|||||||
|
use crate::huffman;
|
||||||
use crate::jobs;
|
use crate::jobs;
|
||||||
use crate::k_clustering;
|
use crate::k_clustering;
|
||||||
use crate::k_clustering_big;
|
use crate::k_clustering_big;
|
||||||
@ -239,3 +240,23 @@ pub fn read_k_cluster_big(path: &str) -> Result<k_clustering_big::ImpliciteGraph
|
|||||||
|
|
||||||
Ok(g)
|
Ok(g)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn read_huffman_alphabet(path: &str) -> Result<huffman::HuffmanAlphabet, io::Error> {
|
||||||
|
let file = File::open(path)?;
|
||||||
|
let mut lines = BufReader::new(file).lines();
|
||||||
|
let line = lines.next().unwrap().unwrap();
|
||||||
|
let length = line.parse().unwrap();
|
||||||
|
|
||||||
|
let mut h = huffman::HuffmanAlphabet {
|
||||||
|
length: length,
|
||||||
|
frequencies: Vec::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
for line in lines {
|
||||||
|
let line = line?;
|
||||||
|
let frequency = line.parse().unwrap();
|
||||||
|
h.frequencies.push(frequency);
|
||||||
|
}
|
||||||
|
assert!(length == h.frequencies.len());
|
||||||
|
Ok(h)
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user