use std::collections::HashMap; pub type ImpliciteGraph = Vec; fn neighbors(a: u32, n: usize) -> Vec { let mut r = vec![a]; for _ in 0..n { let mut r_new = r.clone(); for a in r { for i in 0..24 { let m = 0x1 << i; let neighbor = a ^ m; r_new.push(neighbor); } } r_new.sort(); r_new.dedup(); r = r_new; } r } pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { let mut node_id_to_cluster_id: Vec = (0..g.len()).collect(); let mut clusters: Vec> = (0..g.len()).map(|x| vec![x]).collect(); let mut node_map: HashMap> = HashMap::new(); let mut cluster_count = g.len(); for i in 0..g.len() { if let Some(x) = node_map.get_mut(&g[i]) { x.push(i); } else { node_map.insert(g[i], vec![i]); } } for node_a_id in 0..g.len() { // Iterate over all nodes in the graph. Then, for each node compute all // neighbors that are two or less bits away. for node_b_value in neighbors(g[node_a_id], 2) { // See if there exist nodes that match the neighbor. If such nodes // exist iterate over them and merge the clusters if they are not // already the same. The key insight is that we have to cluster all // nodes that are two or less (that includes zero) bits apart. if let Some(node_b_ids) = node_map.get(&node_b_value) { for node_b_id in node_b_ids { let cluster_id_a = node_id_to_cluster_id[node_a_id]; let cluster_id_b = node_id_to_cluster_id[*node_b_id]; if cluster_id_a != cluster_id_b { // Merge b into a. The code is the same as for k_clustering. let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]); for node_id in &cluster_b { node_id_to_cluster_id[*node_id] = cluster_id_a; } clusters[cluster_id_a].append(&mut cluster_b); cluster_count -= 1; } } } } } cluster_count }