cvx_analytics/
explain.rs

1//! Interpretability layer: drift attribution, trajectory projection, dimension heatmaps.
2//!
3//! Provides human-readable explanations of temporal vector evolution:
4//! - **DriftAttribution**: per-dimension drift analysis with Pareto ranking
5//! - **TrajectoryProjector**: PCA-based 2D/3D projection
6//! - **DimensionHeatmap**: time × dimension change intensity matrix
7
8use cvx_core::error::AnalyticsError;
9
10// ─── Drift Attribution ──────────────────────────────────────────────
11
12/// Per-dimension drift attribution.
13#[derive(Debug, Clone)]
14pub struct DriftAttribution {
15    /// Total L2 drift magnitude.
16    pub total_magnitude: f32,
17    /// Per-dimension: `(dim_index, absolute_change, relative_contribution)`.
18    pub dimensions: Vec<DimensionContribution>,
19}
20
21/// A single dimension's contribution to overall drift.
22#[derive(Debug, Clone)]
23pub struct DimensionContribution {
24    /// Dimension index.
25    pub index: usize,
26    /// Absolute change in this dimension.
27    pub absolute_change: f32,
28    /// Fraction of total L2² explained by this dimension.
29    pub relative_contribution: f32,
30}
31
32/// Compute drift attribution between two vectors.
33///
34/// Returns dimensions sorted by contribution (highest first).
35pub fn drift_attribution(v1: &[f32], v2: &[f32], top_k: usize) -> DriftAttribution {
36    assert_eq!(v1.len(), v2.len(), "dimension mismatch");
37
38    let changes: Vec<f32> = v1.iter().zip(v2.iter()).map(|(a, b)| b - a).collect();
39    let total_sq: f32 = changes.iter().map(|c| c * c).sum();
40    let total_magnitude = total_sq.sqrt();
41
42    let mut dims: Vec<DimensionContribution> = changes
43        .iter()
44        .enumerate()
45        .map(|(i, &c)| DimensionContribution {
46            index: i,
47            absolute_change: c.abs(),
48            relative_contribution: if total_sq > 0.0 {
49                (c * c) / total_sq
50            } else {
51                0.0
52            },
53        })
54        .collect();
55
56    dims.sort_by(|a, b| b.absolute_change.partial_cmp(&a.absolute_change).unwrap());
57    dims.truncate(top_k);
58
59    DriftAttribution {
60        total_magnitude,
61        dimensions: dims,
62    }
63}
64
65// ─── Trajectory Projection (PCA) ───────────────────────────────────
66
67/// A 2D projected point.
68#[derive(Debug, Clone)]
69pub struct ProjectedPoint {
70    /// Original timestamp.
71    pub timestamp: i64,
72    /// First principal component coordinate.
73    pub x: f32,
74    /// Second principal component coordinate.
75    pub y: f32,
76}
77
78/// Project a trajectory to 2D using PCA.
79///
80/// Computes the two principal components of the trajectory vectors
81/// and projects each point onto them.
82pub fn project_trajectory_2d(
83    trajectory: &[(i64, &[f32])],
84) -> Result<Vec<ProjectedPoint>, AnalyticsError> {
85    if trajectory.len() < 2 {
86        return Err(AnalyticsError::InsufficientData {
87            needed: 2,
88            have: trajectory.len(),
89        });
90    }
91
92    let n = trajectory.len();
93    let dim = trajectory[0].1.len();
94
95    // Compute mean
96    let mut mean = vec![0.0f64; dim];
97    for (_, v) in trajectory {
98        for d in 0..dim {
99            mean[d] += v[d] as f64;
100        }
101    }
102    for m in &mut mean {
103        *m /= n as f64;
104    }
105
106    // Center the data
107    let centered: Vec<Vec<f64>> = trajectory
108        .iter()
109        .map(|(_, v)| (0..dim).map(|d| v[d] as f64 - mean[d]).collect())
110        .collect();
111
112    // Power iteration to find top 2 eigenvectors of covariance matrix
113    let pc1 = power_iteration(&centered, dim, None);
114    let pc2 = power_iteration(&centered, dim, Some(&pc1));
115
116    // Project
117    let projected: Vec<ProjectedPoint> = trajectory
118        .iter()
119        .zip(centered.iter())
120        .map(|((ts, _), c)| {
121            let x: f64 = c.iter().zip(pc1.iter()).map(|(a, b)| a * b).sum();
122            let y: f64 = c.iter().zip(pc2.iter()).map(|(a, b)| a * b).sum();
123            ProjectedPoint {
124                timestamp: *ts,
125                x: x as f32,
126                y: y as f32,
127            }
128        })
129        .collect();
130
131    Ok(projected)
132}
133
134/// Power iteration for finding a principal component.
135///
136/// If `deflate_against` is provided, the component is orthogonalized
137/// against it (for finding the second PC).
138fn power_iteration(data: &[Vec<f64>], dim: usize, deflate_against: Option<&[f64]>) -> Vec<f64> {
139    let max_iter = 200;
140    let tol = 1e-10;
141
142    // Initialize with a vector not parallel to the deflation direction
143    let mut v: Vec<f64> = if let Some(prev) = deflate_against {
144        // Start with a vector orthogonal to the previous PC
145        // Use the second data point or a unit vector in a different direction
146        let mut init = if data.len() > 1 {
147            data[1].clone()
148        } else {
149            let mut u = vec![0.0; dim];
150            if dim > 1 {
151                u[1] = 1.0;
152            }
153            u
154        };
155        // Orthogonalize against prev
156        let dot: f64 = init.iter().zip(prev.iter()).map(|(a, b)| a * b).sum();
157        for d in 0..dim {
158            init[d] -= dot * prev[d];
159        }
160        let norm: f64 = init.iter().map(|x| x * x).sum::<f64>().sqrt();
161        if norm > tol {
162            init.iter().map(|x| x / norm).collect()
163        } else {
164            // All data points parallel — try unit vectors
165            let mut fallback = vec![0.0; dim];
166            for d in 0..dim {
167                fallback[d] = 1.0;
168                let dot: f64 = fallback.iter().zip(prev.iter()).map(|(a, b)| a * b).sum();
169                for dd in 0..dim {
170                    fallback[dd] -= dot * prev[dd];
171                }
172                let norm: f64 = fallback.iter().map(|x| x * x).sum::<f64>().sqrt();
173                if norm > tol {
174                    return fallback.iter().map(|x| x / norm).collect();
175                }
176                fallback = vec![0.0; dim];
177            }
178            fallback
179        }
180    } else if !data.is_empty() && data[0].iter().any(|&x| x != 0.0) {
181        let norm: f64 = data[0].iter().map(|x| x * x).sum::<f64>().sqrt();
182        data[0].iter().map(|x| x / norm).collect()
183    } else {
184        let mut init = vec![0.0; dim];
185        if dim > 0 {
186            init[0] = 1.0;
187        }
188        init
189    };
190
191    for _ in 0..max_iter {
192        // Compute Cov * v = (1/n) Σ (x_i * (x_i · v))
193        let mut new_v = vec![0.0f64; dim];
194        for row in data {
195            let dot: f64 = row.iter().zip(v.iter()).map(|(a, b)| a * b).sum();
196            for d in 0..dim {
197                new_v[d] += row[d] * dot;
198            }
199        }
200
201        // Deflate against previous component if needed
202        if let Some(prev) = deflate_against {
203            let dot: f64 = new_v.iter().zip(prev.iter()).map(|(a, b)| a * b).sum();
204            for d in 0..dim {
205                new_v[d] -= dot * prev[d];
206            }
207        }
208
209        // Normalize
210        let norm: f64 = new_v.iter().map(|x| x * x).sum::<f64>().sqrt();
211        if norm < tol {
212            return v;
213        }
214        for x in &mut new_v {
215            *x /= norm;
216        }
217
218        // Check convergence
219        let diff: f64 = v
220            .iter()
221            .zip(new_v.iter())
222            .map(|(a, b)| (a - b) * (a - b))
223            .sum::<f64>()
224            .sqrt();
225
226        v = new_v;
227        if diff < tol {
228            break;
229        }
230    }
231
232    v
233}
234
235// ─── Dimension Heatmap ──────────────────────────────────────────────
236
237/// Time × dimension change intensity matrix.
238#[derive(Debug, Clone)]
239pub struct DimensionHeatmap {
240    /// Number of time bins.
241    pub time_bins: usize,
242    /// Number of dimensions.
243    pub dimensions: usize,
244    /// Timestamps for each time bin.
245    pub timestamps: Vec<i64>,
246    /// Flattened matrix: `[time_bin * dimensions + dim]` = change intensity.
247    pub data: Vec<f32>,
248}
249
250impl DimensionHeatmap {
251    /// Get the change intensity at a specific time bin and dimension.
252    pub fn get(&self, time_bin: usize, dim: usize) -> f32 {
253        self.data[time_bin * self.dimensions + dim]
254    }
255}
256
257/// Build a dimension heatmap from a trajectory.
258///
259/// Computes the absolute per-dimension change between consecutive points.
260pub fn dimension_heatmap(trajectory: &[(i64, &[f32])]) -> Result<DimensionHeatmap, AnalyticsError> {
261    if trajectory.len() < 2 {
262        return Err(AnalyticsError::InsufficientData {
263            needed: 2,
264            have: trajectory.len(),
265        });
266    }
267
268    let dim = trajectory[0].1.len();
269    let time_bins = trajectory.len() - 1;
270    let mut data = Vec::with_capacity(time_bins * dim);
271    let mut timestamps = Vec::with_capacity(time_bins);
272
273    for w in trajectory.windows(2) {
274        timestamps.push(w[1].0);
275        for d in 0..dim {
276            data.push((w[1].1[d] - w[0].1[d]).abs());
277        }
278    }
279
280    Ok(DimensionHeatmap {
281        time_bins,
282        dimensions: dim,
283        timestamps,
284        data,
285    })
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    fn make_trajectory(points: &[(i64, Vec<f32>)]) -> Vec<(i64, &[f32])> {
293        points.iter().map(|(t, v)| (*t, v.as_slice())).collect()
294    }
295
296    // ─── Drift Attribution ──────────────────────────────────────────
297
298    #[test]
299    fn attribution_identifies_top_dimensions() {
300        let v1 = vec![0.0; 10];
301        let mut v2 = vec![0.0; 10];
302        v2[3] = 5.0; // dim 3 changed most
303        v2[7] = 3.0; // dim 7 second
304
305        let attr = drift_attribution(&v1, &v2, 5);
306        assert_eq!(attr.dimensions[0].index, 3);
307        assert_eq!(attr.dimensions[1].index, 7);
308        assert!((attr.dimensions[0].absolute_change - 5.0).abs() < 1e-6);
309    }
310
311    #[test]
312    fn attribution_relative_contributions_sum_to_1() {
313        let v1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
314        let v2 = vec![2.0, 3.0, 1.0, 6.0, 4.0];
315
316        let attr = drift_attribution(&v1, &v2, 5);
317        let total: f32 = attr
318            .dimensions
319            .iter()
320            .map(|d| d.relative_contribution)
321            .sum();
322        assert!(
323            (total - 1.0).abs() < 1e-5,
324            "contributions should sum to 1.0, got {total}"
325        );
326    }
327
328    #[test]
329    fn attribution_zero_drift() {
330        let v = vec![1.0, 2.0, 3.0];
331        let attr = drift_attribution(&v, &v, 3);
332        assert!(attr.total_magnitude < 1e-6);
333    }
334
335    // ─── PCA Projection ─────────────────────────────────────────────
336
337    #[test]
338    fn pca_projection_produces_2d() {
339        let points: Vec<(i64, Vec<f32>)> = (0..100)
340            .map(|i| {
341                let t = i as f32 * 0.1;
342                (i as i64 * 1000, vec![t.cos(), t.sin(), t * 0.1])
343            })
344            .collect();
345        let traj = make_trajectory(&points);
346
347        let projected = project_trajectory_2d(&traj).unwrap();
348        assert_eq!(projected.len(), 100);
349
350        // All points should have finite coordinates
351        for p in &projected {
352            assert!(p.x.is_finite(), "x should be finite");
353            assert!(p.y.is_finite(), "y should be finite");
354        }
355    }
356
357    #[test]
358    fn pca_projection_insufficient_data() {
359        let points = vec![(0i64, vec![1.0, 2.0])];
360        let traj = make_trajectory(&points);
361        assert!(project_trajectory_2d(&traj).is_err());
362    }
363
364    #[test]
365    fn pca_preserves_variance() {
366        // 2D signal + noise: PC1 should capture the main direction
367        let points: Vec<(i64, Vec<f32>)> = (0..50)
368            .map(|i| {
369                let t = i as f32;
370                // Strong signal in dim 0, weak noise in dim 1, zero in dim 2
371                (i as i64 * 1000, vec![t * 2.0, t * 0.1, 0.0])
372            })
373            .collect();
374        let traj = make_trajectory(&points);
375
376        let projected = project_trajectory_2d(&traj).unwrap();
377        let var_x: f64 =
378            projected.iter().map(|p| (p.x as f64).powi(2)).sum::<f64>() / projected.len() as f64;
379        let var_y: f64 =
380            projected.iter().map(|p| (p.y as f64).powi(2)).sum::<f64>() / projected.len() as f64;
381
382        // PC1 should capture most variance
383        assert!(
384            var_x > var_y * 5.0,
385            "PC1 variance ({var_x:.2}) should dominate PC2 ({var_y:.2})"
386        );
387    }
388
389    // ─── Dimension Heatmap ──────────────────────────────────────────
390
391    #[test]
392    fn heatmap_dimensions_correct() {
393        let points: Vec<(i64, Vec<f32>)> = (0..10)
394            .map(|i| (i as i64 * 1000, vec![i as f32; 5]))
395            .collect();
396        let traj = make_trajectory(&points);
397
398        let heatmap = dimension_heatmap(&traj).unwrap();
399        assert_eq!(heatmap.time_bins, 9); // 10 points → 9 transitions
400        assert_eq!(heatmap.dimensions, 5);
401        assert_eq!(heatmap.data.len(), 9 * 5);
402        assert_eq!(heatmap.timestamps.len(), 9);
403    }
404
405    #[test]
406    fn heatmap_stationary_is_zero() {
407        let points: Vec<(i64, Vec<f32>)> = (0..5)
408            .map(|i| (i as i64 * 1000, vec![1.0, 2.0, 3.0]))
409            .collect();
410        let traj = make_trajectory(&points);
411
412        let heatmap = dimension_heatmap(&traj).unwrap();
413        for &v in &heatmap.data {
414            assert!(v < 1e-6, "stationary entity heatmap should be zero");
415        }
416    }
417
418    #[test]
419    fn heatmap_detects_change_in_specific_dim() {
420        let points = vec![
421            (0i64, vec![0.0, 0.0, 0.0]),
422            (1000, vec![0.0, 5.0, 0.0]), // only dim 1 changes
423        ];
424        let traj = make_trajectory(&points);
425
426        let heatmap = dimension_heatmap(&traj).unwrap();
427        assert!(heatmap.get(0, 0) < 1e-6); // dim 0 unchanged
428        assert!((heatmap.get(0, 1) - 5.0).abs() < 1e-6); // dim 1 changed by 5
429        assert!(heatmap.get(0, 2) < 1e-6); // dim 2 unchanged
430    }
431
432    #[test]
433    fn heatmap_insufficient_data() {
434        let points = vec![(0i64, vec![1.0])];
435        let traj = make_trajectory(&points);
436        assert!(dimension_heatmap(&traj).is_err());
437    }
438}
cvx_analytics/explain.rs

cvx_analytics/
explain.rs