cvx_analytics/
topology.rs

1//! Topological features via persistent homology.
2//!
3//! Tracks topological changes in point clouds over time using Betti numbers
4//! from Vietoris-Rips persistent homology. Detects structural shifts like
5//! "the topic space fragmented" or "two clusters merged".
6//!
7//! # Betti Numbers
8//!
9//! - β₀ = number of connected components (clusters)
10//! - β₁ = number of loops (cyclic patterns)
11//!
12//! Changes in Betti numbers over time reveal structural evolution:
13//! - Increasing β₀ → fragmentation
14//! - Decreasing β₀ → convergence
15//! - β₁ appearing → cyclic behavior emerges
16//!
17//! # Implementation
18//!
19//! Uses Vietoris-Rips filtration on pairwise distances between points.
20//! Applied on **region centroids** (K~80 at L3), NOT raw points, for tractability.
21//!
22//! # References
23//!
24//! - Edelsbrunner, H. & Harer, J. (2010). *Computational Topology*. AMS.
25//! - Zigzag persistence for temporal networks. *EPJ Data Science*, 2023.
26
27/// A persistence interval: a topological feature that is "born" at one
28/// filtration radius and "dies" at another.
29#[derive(Debug, Clone, PartialEq)]
30pub struct PersistenceInterval {
31    /// Homology dimension (0 = component, 1 = loop).
32    pub dimension: usize,
33    /// Birth radius: when this feature appears.
34    pub birth: f64,
35    /// Death radius: when this feature disappears. f64::INFINITY for essential features.
36    pub death: f64,
37}
38
39impl PersistenceInterval {
40    /// Persistence = death - birth. Longer-lived features are more significant.
41    pub fn persistence(&self) -> f64 {
42        if self.death.is_infinite() {
43            f64::INFINITY
44        } else {
45            self.death - self.birth
46        }
47    }
48}
49
50/// Persistence diagram: collection of birth-death intervals.
51#[derive(Debug, Clone)]
52pub struct PersistenceDiagram {
53    /// All persistence intervals.
54    pub intervals: Vec<PersistenceInterval>,
55    /// Number of points in the input.
56    pub n_points: usize,
57}
58
59impl PersistenceDiagram {
60    /// Count features alive at a given radius (Betti number).
61    pub fn betti(&self, dimension: usize, radius: f64) -> usize {
62        self.intervals
63            .iter()
64            .filter(|iv| iv.dimension == dimension && iv.birth <= radius && iv.death > radius)
65            .count()
66    }
67
68    /// Compute Betti curve: β(r) for a range of radii.
69    pub fn betti_curve(&self, dimension: usize, radii: &[f64]) -> Vec<usize> {
70        radii.iter().map(|&r| self.betti(dimension, r)).collect()
71    }
72
73    /// Total persistence for a given dimension (sum of lifetimes, excluding infinite).
74    pub fn total_persistence(&self, dimension: usize) -> f64 {
75        self.intervals
76            .iter()
77            .filter(|iv| iv.dimension == dimension && iv.death.is_finite())
78            .map(|iv| iv.persistence())
79            .sum()
80    }
81
82    /// Number of features with persistence above a threshold.
83    pub fn n_significant(&self, dimension: usize, min_persistence: f64) -> usize {
84        self.intervals
85            .iter()
86            .filter(|iv| iv.dimension == dimension && iv.persistence() > min_persistence)
87            .count()
88    }
89}
90
91/// Compute Vietoris-Rips persistent homology (dimension 0 only).
92///
93/// Dimension 0 (connected components) tracks how clusters merge as the
94/// filtration radius grows. This is equivalent to single-linkage clustering.
95///
96/// # Algorithm
97///
98/// 1. Compute pairwise distance matrix.
99/// 2. Sort edges by distance (Kruskal's algorithm).
100/// 3. Use Union-Find to track component merges.
101/// 4. Each merge creates a death event for the younger component.
102///
103/// # Complexity
104///
105/// O(N² log N) for N points (dominated by sorting N² edges).
106///
107/// For region centroids (N~80), this is ~6,400 edges — instant.
108pub fn vietoris_rips_h0(points: &[&[f32]]) -> PersistenceDiagram {
109    let n = points.len();
110    if n == 0 {
111        return PersistenceDiagram {
112            intervals: vec![],
113            n_points: 0,
114        };
115    }
116
117    // All points born at radius 0
118    let mut birth_times = vec![0.0f64; n];
119
120    // Compute and sort all pairwise edges
121    let mut edges: Vec<(f64, usize, usize)> = Vec::with_capacity(n * (n - 1) / 2);
122    for i in 0..n {
123        for j in (i + 1)..n {
124            edges.push((l2_dist(points[i], points[j]), i, j));
125        }
126    }
127    edges.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
128
129    // Union-Find
130    let mut parent: Vec<usize> = (0..n).collect();
131    let mut rank = vec![0usize; n];
132
133    let find = |parent: &mut Vec<usize>, mut x: usize| -> usize {
134        while parent[x] != x {
135            parent[x] = parent[parent[x]]; // path compression
136            x = parent[x];
137        }
138        x
139    };
140
141    let mut intervals = Vec::new();
142
143    for (dist, i, j) in edges {
144        let ri = find(&mut parent, i);
145        let rj = find(&mut parent, j);
146        if ri == rj {
147            continue; // already in same component
148        }
149
150        // Merge: younger component dies (the one born later, or arbitrary if same)
151        let (survivor, dying) = if birth_times[ri] <= birth_times[rj] {
152            (ri, rj)
153        } else {
154            (rj, ri)
155        };
156
157        intervals.push(PersistenceInterval {
158            dimension: 0,
159            birth: birth_times[dying],
160            death: dist,
161        });
162
163        // Union by rank
164        if rank[survivor] < rank[dying] {
165            parent[survivor] = dying;
166            birth_times[dying] = birth_times[dying].min(birth_times[survivor]);
167        } else {
168            parent[dying] = survivor;
169            if rank[survivor] == rank[dying] {
170                rank[survivor] += 1;
171            }
172        }
173    }
174
175    // The last surviving component has infinite persistence
176    let final_root = find(&mut parent, 0);
177    intervals.push(PersistenceInterval {
178        dimension: 0,
179        birth: birth_times[final_root],
180        death: f64::INFINITY,
181    });
182
183    PersistenceDiagram {
184        intervals,
185        n_points: n,
186    }
187}
188
189/// Topological summary features extracted from a persistence diagram.
190#[derive(Debug, Clone)]
191pub struct TopologicalFeatures {
192    /// Number of significant components (persistence > threshold).
193    pub n_components: usize,
194    /// Total persistence of H₀ (sum of lifetimes).
195    pub total_persistence_h0: f64,
196    /// Max persistence (most prominent feature).
197    pub max_persistence: f64,
198    /// Mean persistence of finite intervals.
199    pub mean_persistence: f64,
200    /// Persistence entropy: -Σ (p_i / P) log(p_i / P) where p_i = persistence of interval i.
201    pub persistence_entropy: f64,
202    /// Betti curve at selected radii.
203    pub betti_curve: Vec<usize>,
204    /// Radii used for Betti curve.
205    pub radii: Vec<f64>,
206}
207
208/// Extract topological summary features from a point cloud.
209///
210/// Computes H₀ persistent homology and summarizes into a fixed-size feature vector.
211///
212/// # Arguments
213///
214/// * `points` - Point cloud (e.g., region centroids).
215/// * `n_radii` - Number of radii for Betti curve sampling (default 20).
216/// * `persistence_threshold` - Minimum persistence to count as significant (default 0.1).
217pub fn topological_summary(
218    points: &[&[f32]],
219    n_radii: usize,
220    persistence_threshold: f64,
221) -> TopologicalFeatures {
222    let diagram = vietoris_rips_h0(points);
223
224    let finite: Vec<f64> = diagram
225        .intervals
226        .iter()
227        .filter(|iv| iv.death.is_finite())
228        .map(|iv| iv.persistence())
229        .collect();
230
231    let max_persistence = finite.iter().cloned().fold(0.0f64, f64::max);
232    let total = finite.iter().sum::<f64>();
233    let mean_persistence = if finite.is_empty() {
234        0.0
235    } else {
236        total / finite.len() as f64
237    };
238
239    // Persistence entropy
240    let persistence_entropy = if total > 0.0 {
241        finite
242            .iter()
243            .map(|&p| {
244                let q = p / total;
245                if q > 0.0 { -q * q.ln() } else { 0.0 }
246            })
247            .sum()
248    } else {
249        0.0
250    };
251
252    // Betti curve: sample at evenly-spaced radii from 0 to max_death
253    let max_death = finite.iter().cloned().fold(0.0f64, f64::max);
254    let radii: Vec<f64> = (0..n_radii)
255        .map(|i| max_death * (i as f64 + 0.5) / n_radii as f64)
256        .collect();
257    let betti_curve = diagram.betti_curve(0, &radii);
258
259    let n_components = diagram.n_significant(0, persistence_threshold);
260
261    TopologicalFeatures {
262        n_components,
263        total_persistence_h0: total,
264        max_persistence,
265        mean_persistence,
266        persistence_entropy,
267        betti_curve,
268        radii,
269    }
270}
271
272/// L2 distance between two vectors.
273#[inline]
274fn l2_dist(a: &[f32], b: &[f32]) -> f64 {
275    a.iter()
276        .zip(b.iter())
277        .map(|(x, y)| {
278            let d = *x as f64 - *y as f64;
279            d * d
280        })
281        .sum::<f64>()
282        .sqrt()
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn single_point_one_component() {
291        let points: Vec<&[f32]> = vec![&[0.0f32, 0.0]];
292        let d = vietoris_rips_h0(&points);
293        assert_eq!(d.intervals.len(), 1);
294        assert!(d.intervals[0].death.is_infinite()); // one eternal component
295        assert_eq!(d.betti(0, 0.0), 1);
296    }
297
298    #[test]
299    fn two_distant_clusters() {
300        // Two well-separated clusters
301        let points: Vec<&[f32]> = vec![
302            &[0.0f32, 0.0],
303            &[0.1, 0.0],
304            &[0.0, 0.1], // cluster A
305            &[10.0, 10.0],
306            &[10.1, 10.0],
307            &[10.0, 10.1], // cluster B
308        ];
309        let d = vietoris_rips_h0(&points);
310
311        // At small radius: 2 components (β₀ = 2 for the two clusters)
312        assert_eq!(d.betti(0, 0.5), 2, "should see 2 clusters at r=0.5");
313
314        // At large radius: 1 component (everything connected)
315        assert_eq!(d.betti(0, 20.0), 1, "should see 1 component at r=20");
316    }
317
318    #[test]
319    fn three_equidistant_points() {
320        // Equilateral triangle: all pairwise distances = 1.0
321        let s = 3.0f32.sqrt() / 2.0;
322        let p0 = [0.0f32, 0.0];
323        let p1 = [1.0f32, 0.0];
324        let p2 = [0.5f32, s];
325        let points: Vec<&[f32]> = vec![&p0, &p1, &p2];
326        let d = vietoris_rips_h0(&points);
327
328        // 3 points → 2 death events + 1 infinite
329        assert_eq!(d.intervals.len(), 3);
330        assert_eq!(d.betti(0, 0.5), 3); // all separate
331        assert_eq!(d.betti(0, 1.5), 1); // all connected
332    }
333
334    #[test]
335    fn betti_curve_monotone_decreasing() {
336        let points: Vec<&[f32]> = vec![&[0.0f32], &[1.0], &[3.0], &[6.0], &[10.0]];
337        let d = vietoris_rips_h0(&points);
338        let radii: Vec<f64> = (0..20).map(|i| i as f64 * 0.6).collect();
339        let curve = d.betti_curve(0, &radii);
340
341        // β₀ should be non-increasing
342        for i in 1..curve.len() {
343            assert!(
344                curve[i] <= curve[i - 1],
345                "β₀ should be non-increasing: β₀[{}]={} > β₀[{}]={}",
346                i,
347                curve[i],
348                i - 1,
349                curve[i - 1]
350            );
351        }
352    }
353
354    #[test]
355    fn topological_summary_two_clusters() {
356        let points: Vec<&[f32]> = vec![&[0.0f32, 0.0], &[0.1, 0.0], &[10.0, 0.0], &[10.1, 0.0]];
357        let feat = topological_summary(&points, 20, 1.0);
358
359        // Two clusters with large gap → at least 1 significant component separation
360        assert!(feat.n_components >= 1, "should detect cluster structure");
361        assert!(
362            feat.max_persistence > 5.0,
363            "max persistence should reflect cluster gap"
364        );
365    }
366
367    #[test]
368    fn persistence_entropy_uniform() {
369        // Many points equally spaced → all intervals have similar persistence
370        let pts: Vec<[f32; 1]> = (0..10).map(|i| [i as f32]).collect();
371        let point_refs: Vec<&[f32]> = pts.iter().map(|p| p.as_slice()).collect();
372        let feat = topological_summary(&point_refs, 10, 0.01);
373
374        // Uniform persistence → high entropy
375        assert!(
376            feat.persistence_entropy > 1.0,
377            "uniform should have high entropy, got {}",
378            feat.persistence_entropy
379        );
380    }
381
382    #[test]
383    fn empty_points() {
384        let points: Vec<&[f32]> = vec![];
385        let d = vietoris_rips_h0(&points);
386        assert_eq!(d.intervals.len(), 0);
387        assert_eq!(d.n_points, 0);
388    }
389}