From cc8a0443cb555084ecb7067772dfeb9db3cd148b Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sun, 14 Feb 2021 21:00:47 -0500 Subject: [PATCH] Finish course 3 week 2 assignment --- src/k_clustering_big.rs | 83 +++++++++++------------------------------ src/main.rs | 10 ++++- src/util.rs | 12 +----- 3 files changed, 31 insertions(+), 74 deletions(-) diff --git a/src/k_clustering_big.rs b/src/k_clustering_big.rs index 01a8906..f92c75f 100644 --- a/src/k_clustering_big.rs +++ b/src/k_clustering_big.rs @@ -1,47 +1,24 @@ use std::collections::HashMap; pub type ImpliciteGraph = Vec; -#[allow(dead_code)] -fn distance(a: &u32, b: &u32) -> u32 { - let mut r = 0; - for i in 0..24 { - let m = 0x1 << i; - if a & m != b & m { - r += 1; - } +fn neighbors(a: u32, n: usize) -> Vec { + let mut r = vec![a]; + for _ in 0..n { + let mut r_new = r.clone(); + for a in r { + for i in 0..24 { + let m = 0x1 << i; + let neighbor = a ^ m; + r_new.push(neighbor); + } + } + r_new.sort(); + r_new.dedup(); + r = r_new; } r } -#[allow(dead_code)] -fn neighbors_distance_1(a: u32) -> Vec { - let mut r = Vec::new(); - for i in 0..24 { - let m = 0x1 << i; - let neighbor = a ^ m; - r.push(neighbor); - } - r -} - -#[allow(dead_code)] -fn neighbors_distance_2(a: u32) -> Vec { - let mut r = Vec::new(); - for i in 0..24 { - let m = 0x1 << i; - let n1 = a ^ m; - for j in 0..24 { - let m = 0x1 << j; - let n2 = n1 ^ m; - if n2 != a { - r.push(n2); - } - } - } - r.sort(); - r.dedup(); - r -} pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { let mut node_id_to_cluster_id: Vec = (0..g.len()).collect(); @@ -58,35 +35,19 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { } for node_a_id in 0..g.len() { - for node_b_value in neighbors_distance_1(g[node_a_id]) { + // Iterate over all nodes in the graph. Then, for each node compute all + // neighbors that are two or less bits away. + for node_b_value in neighbors(g[node_a_id], 2) { + // See if there exist nodes that match the neighbor. If such nodes + // exist iterate over them and merge the clusters if they are not + // already the same. The key insight is that we have to cluster all + // nodes that are two or less (that includes zero) bits apart. if let Some(node_b_ids) = node_map.get(&node_b_value) { - // These node IDs have distance one meaning we want to merge them into - // the same cluster. for node_b_id in node_b_ids { let cluster_id_a = node_id_to_cluster_id[node_a_id]; let cluster_id_b = node_id_to_cluster_id[*node_b_id]; if cluster_id_a != cluster_id_b { - // Merge b into a because nodes have distance 1. - let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]); - for node_id in &cluster_b { - node_id_to_cluster_id[*node_id] = cluster_id_a; - } - clusters[cluster_id_a].append(&mut cluster_b); - cluster_count -= 1; - } - } - } - } - - for node_b_value in neighbors_distance_2(g[node_a_id]) { - if let Some(node_b_ids) = node_map.get(&node_b_value) { - // These node IDs have distance one meaning we want to merge them into - // the same cluster. - for node_b_id in node_b_ids { - let cluster_id_a = node_id_to_cluster_id[node_a_id]; - let cluster_id_b = node_id_to_cluster_id[*node_b_id]; - if cluster_id_a != cluster_id_b { - // Merge b into a because nodes have distance 2. + // Merge b into a. The code is the same as for k_clustering. let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]); for node_id in &cluster_b { node_id_to_cluster_id[*node_id] = cluster_id_a; diff --git a/src/main.rs b/src/main.rs index 9e53e6a..8c727a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -109,10 +109,15 @@ fn c3a1() { fn c3a2() { let mut graph = util::read_weighted_graph_clustering("data/c3a2_clustering.txt").unwrap(); let r1 = k_clustering(&mut graph); - println!("r1 = {:?}", r1); let graph = util::read_k_cluster_big("data/c3a2_clustering_big.txt").unwrap(); let r2 = k_clustering_big(&graph); - println!("r2 = {:?}", r2); + println!("r1 = {} r2 = {}", r1, r2); + // r1 = 106 r2 = 6118 +} + +#[allow(dead_code)] +fn c3a3() { + println!("continue here"); } fn main() { @@ -125,4 +130,5 @@ fn main() { // c2a4(); // c3a1(); c3a2(); + c3a3(); } diff --git a/src/util.rs b/src/util.rs index 18b0b9d..d6293cb 100644 --- a/src/util.rs +++ b/src/util.rs @@ -227,17 +227,7 @@ pub fn read_weighted_graph_clustering( pub fn read_k_cluster_big(path: &str) -> Result { let file = File::open(path)?; let mut lines = BufReader::new(file).lines(); - let line = lines.next().unwrap().unwrap(); - - let mut fields = line.split_whitespace(); - let total_nodes: usize = fields.next().unwrap().parse().unwrap(); - let bits_per_node: usize = fields.next().unwrap().parse().unwrap(); - - println!( - "total_nodes = {:?} bits_per_node = {:?}", - total_nodes, bits_per_node - ); - + lines.next(); let mut g = Vec::new(); for line in lines {