diff --git a/src/k_clustering_big.rs b/src/k_clustering_big.rs index 22628e6..01a8906 100644 --- a/src/k_clustering_big.rs +++ b/src/k_clustering_big.rs @@ -1,19 +1,103 @@ -pub struct ImpliciteGraph { - pub nodes: Vec>, -} +use std::collections::HashMap; +pub type ImpliciteGraph = Vec; -fn distance(a: &Vec, b: &Vec) -> u16 { - let mut distance = 0; - - for i in 0..a.len() { - if a[i] != b[i] { - distance += 1; +#[allow(dead_code)] +fn distance(a: &u32, b: &u32) -> u32 { + let mut r = 0; + for i in 0..24 { + let m = 0x1 << i; + if a & m != b & m { + r += 1; } } - distance + r +} + +#[allow(dead_code)] +fn neighbors_distance_1(a: u32) -> Vec { + let mut r = Vec::new(); + for i in 0..24 { + let m = 0x1 << i; + let neighbor = a ^ m; + r.push(neighbor); + } + r +} + +#[allow(dead_code)] +fn neighbors_distance_2(a: u32) -> Vec { + let mut r = Vec::new(); + for i in 0..24 { + let m = 0x1 << i; + let n1 = a ^ m; + for j in 0..24 { + let m = 0x1 << j; + let n2 = n1 ^ m; + if n2 != a { + r.push(n2); + } + } + } + r.sort(); + r.dedup(); + r } pub fn k_clustering_big(g: &ImpliciteGraph) -> usize { - println!("distance: {:?}", distance(&g.nodes[0], &g.nodes[1])); - g.nodes.len() + let mut node_id_to_cluster_id: Vec = (0..g.len()).collect(); + let mut clusters: Vec> = (0..g.len()).map(|x| vec![x]).collect(); + let mut node_map: HashMap> = HashMap::new(); + let mut cluster_count = g.len(); + + for i in 0..g.len() { + if let Some(x) = node_map.get_mut(&g[i]) { + x.push(i); + } else { + node_map.insert(g[i], vec![i]); + } + } + + for node_a_id in 0..g.len() { + for node_b_value in neighbors_distance_1(g[node_a_id]) { + if let Some(node_b_ids) = node_map.get(&node_b_value) { + // These node IDs have distance one meaning we want to merge them into + // the same cluster. + for node_b_id in node_b_ids { + let cluster_id_a = node_id_to_cluster_id[node_a_id]; + let cluster_id_b = node_id_to_cluster_id[*node_b_id]; + if cluster_id_a != cluster_id_b { + // Merge b into a because nodes have distance 1. + let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]); + for node_id in &cluster_b { + node_id_to_cluster_id[*node_id] = cluster_id_a; + } + clusters[cluster_id_a].append(&mut cluster_b); + cluster_count -= 1; + } + } + } + } + + for node_b_value in neighbors_distance_2(g[node_a_id]) { + if let Some(node_b_ids) = node_map.get(&node_b_value) { + // These node IDs have distance one meaning we want to merge them into + // the same cluster. + for node_b_id in node_b_ids { + let cluster_id_a = node_id_to_cluster_id[node_a_id]; + let cluster_id_b = node_id_to_cluster_id[*node_b_id]; + if cluster_id_a != cluster_id_b { + // Merge b into a because nodes have distance 2. + let mut cluster_b = std::mem::take(&mut clusters[cluster_id_b]); + for node_id in &cluster_b { + node_id_to_cluster_id[*node_id] = cluster_id_a; + } + clusters[cluster_id_a].append(&mut cluster_b); + cluster_count -= 1; + } + } + } + } + } + + cluster_count } diff --git a/src/util.rs b/src/util.rs index 8a5e9f7..18b0b9d 100644 --- a/src/util.rs +++ b/src/util.rs @@ -234,19 +234,17 @@ pub fn read_k_cluster_big(path: &str) -> Result = line - .split_whitespace() - .map(|s| s.parse().unwrap()) - .collect(); - g.nodes.push(v); + let mut s = line?; + s.retain(|c| !c.is_whitespace()); + let u = u32::from_str_radix(&s, 2).unwrap(); + g.push(u); } Ok(g)