From cc8a0443cb555084ecb7067772dfeb9db3cd148b Mon Sep 17 00:00:00 2001
From: Felix Martin <mail@felixm.de>
Date: Sun, 14 Feb 2021 21:00:47 -0500
Subject: [PATCH] Finish course 3 week 2 assignment

---
 src/k_clustering_big.rs | 83 +++++++++++------------------------------
 src/main.rs             | 10 ++++-
 src/util.rs             | 12 +-----
 3 files changed, 31 insertions(+), 74 deletions(-)
diff --git a/src/k_clustering_big.rs b/src/k_clustering_big.rs
index 01a8906..f92c75f 100644
--- a/src/k_clustering_big.rs
+++ b/src/k_clustering_big.rs
@@ -1,47 +1,24 @@
 use std::collections::HashMap;
 pub type ImpliciteGraph = Vec<u32>;
 
-#[allow(dead_code)]
-fn distance(a: &u32, b: &u32) -> u32 {
-    let mut r = 0;
-    for i in 0..24 {
-        let m = 0x1 << i;
-        if a & m != b & m {
-            r += 1;
-        }
+fn neighbors(a: u32, n: usize) -> Vec<u32> {
+    let mut r = vec![a];
+    for _ in 0..n {
+    	let mut r_new = r.clone();
+    	for a in r {
+		    for i in 0..24 {
+		        let m = 0x1 << i;
+		        let neighbor = a ^ m;
+		        r_new.push(neighbor);
+		    }
+    	}
+    	r_new.sort();
+    	r_new.dedup();
+    	r = r_new;
     }
     r
 }
 
-#[allow(dead_code)]
-fn neighbors_distance_1(a: u32) -> Vec<u32> {
-    let mut r = Vec::new();
-    for i in 0..24 {
-        let m = 0x1 << i;
-        let neighbor = a ^ m;
-        r.push(neighbor);
-    }
-    r
-}
-
-#[allow(dead_code)]
-fn neighbors_distance_2(a: u32) -> Vec<u32> {
-    let mut r = Vec::new();
-    for i in 0..24 {
-        let m = 0x1 << i;
-        let n1 = a ^ m;
-        for j in 0..24 {
-            let m = 0x1 << j;
-            let n2 = n1 ^ m;
-            if n2 != a {
-                r.push(n2);
-            }
-        }
-    }
-    r.sort();
-    r.dedup();
-    r
-}
 
 pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
     let mut node_id_to_cluster_id: Vec<usize> = (0..g.len()).collect();
@@ -58,35 +35,19 @@ pub fn k_clustering_big(g: &ImpliciteGraph) -> usize {
     }
 
     for node_a_id in 0..g.len() {
-        for node_b_value in neighbors_distance_1(g[node_a_id]) {
+	    // Iterate over all nodes in the graph. Then, for each node compute all
+	    // neighbors that are two or less bits away.
+        for node_b_value in neighbors(g[node_a_id], 2) {
+        	// See if there exist nodes that match the neighbor. If such nodes
+        	// exist iterate over them and merge the clusters if they are not
+        	// already the same. The key insight is that we have to cluster all
+        	// nodes that are two or less (that includes zero) bits apart.
             if let Some(node_b_ids) = node_map.get(&node_b_value) {
-                // These node IDs have distance one meaning we want to merge them into
-                // the same cluster.
                 for node_b_id in node_b_ids {
                     let cluster_id_a = node_id_to_cluster_id[node_a_id];
                     let cluster_id_b = node_id_to_cluster_id[*node_b_id];
                     if cluster_id_a != cluster_id_b {
-                        // Merge b into a because nodes have distance 1.
-                        let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]);
-                        for node_id in &cluster_b {
-                            node_id_to_cluster_id[*node_id] = cluster_id_a;
-                        }
-                        clusters[cluster_id_a].append(&mut cluster_b);
-                        cluster_count -= 1;
-                    }
-                }
-            }
-        }
-
-        for node_b_value in neighbors_distance_2(g[node_a_id]) {
-            if let Some(node_b_ids) = node_map.get(&node_b_value) {
-                // These node IDs have distance one meaning we want to merge them into
-                // the same cluster.
-                for node_b_id in node_b_ids {
-                    let cluster_id_a = node_id_to_cluster_id[node_a_id];
-                    let cluster_id_b = node_id_to_cluster_id[*node_b_id];
-                    if cluster_id_a != cluster_id_b {
-                        // Merge b into a because nodes have distance 2.
+                        // Merge b into a. The code is the same as for k_clustering.
                         let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]);
                         for node_id in &cluster_b {
                             node_id_to_cluster_id[*node_id] = cluster_id_a;
diff --git a/src/main.rs b/src/main.rs
index 9e53e6a..8c727a3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -109,10 +109,15 @@ fn c3a1() {
 fn c3a2() {
     let mut graph = util::read_weighted_graph_clustering("data/c3a2_clustering.txt").unwrap();
     let r1 = k_clustering(&mut graph);
-    println!("r1 = {:?}", r1);
     let graph = util::read_k_cluster_big("data/c3a2_clustering_big.txt").unwrap();
     let r2 = k_clustering_big(&graph);
-    println!("r2 = {:?}", r2);
+    println!("r1 = {} r2 = {}", r1, r2);
+    // r1 = 106 r2 = 6118
+}
+
+#[allow(dead_code)]
+fn c3a3() {
+	println!("continue here");
 }
 
 fn main() {
@@ -125,4 +130,5 @@ fn main() {
     // c2a4();
     // c3a1();
     c3a2();
+    c3a3();
 }
diff --git a/src/util.rs b/src/util.rs
index 18b0b9d..d6293cb 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -227,17 +227,7 @@ pub fn read_weighted_graph_clustering(
 pub fn read_k_cluster_big(path: &str) -> Result<k_clustering_big::ImpliciteGraph, io::Error> {
     let file = File::open(path)?;
     let mut lines = BufReader::new(file).lines();
-    let line = lines.next().unwrap().unwrap();
-
-    let mut fields = line.split_whitespace();
-    let total_nodes: usize = fields.next().unwrap().parse().unwrap();
-    let bits_per_node: usize = fields.next().unwrap().parse().unwrap();
-
-    println!(
-        "total_nodes = {:?} bits_per_node = {:?}",
-        total_nodes, bits_per_node
-    );
-
+    lines.next();
     let mut g = Vec::new();
 
     for line in lines {