cvx_analytics/
signatures.rs

1//! Path signatures for trajectory characterization.
2//!
3//! Implements truncated path signatures from rough path theory — a **universal**
4//! and **order-aware** feature of sequential data. Any continuous function of a
5//! path can be approximated by a linear function of its signature.
6//!
7//! # Key Properties
8//!
9//! - **Universality**: sufficient statistics for trajectory classification
10//! - **Reparametrization invariance**: captures shape, not sampling rate
11//! - **Chen's Identity**: `S(α * β) = S(α) ⊗ S(β)` — incremental updates in O(K²)
12//! - **Hierarchical**: depth 1 = displacement, depth 2 = signed area (correlation/volatility)
13//!
14//! # Usage with CVX
15//!
16//! Path signatures operate on **region trajectories** (K~80 dims at L3), NOT on
17//! raw embeddings (D=768). The HNSW graph hierarchy provides the dimensionality
18//! reduction that makes signatures tractable:
19//!
20//! - Region trajectory at L3: K=80 → depth 2 signature = 80 + 6400 = 6,480 features
21//! - Raw embeddings: D=768 → depth 2 = 768 + 589,824 → intractable
22//!
23//! # References
24//!
25//! - Lyons, T.J. (1998). Differential equations driven by rough signals.
26//! - Chevyrev & Kormilitzin (2016). A primer on the signature method in ML.
27//! - Kidger & Lyons (2021). Signatory: differentiable computations of the signature.
28
29/// Configuration for signature computation.
30#[derive(Debug, Clone)]
31pub struct SignatureConfig {
32    /// Truncation depth (1-3). Higher = more expressive but larger output.
33    pub depth: usize,
34    /// Whether to augment with time as an extra dimension.
35    /// Breaks reparametrization invariance but captures speed information.
36    pub time_augmentation: bool,
37}
38
39impl Default for SignatureConfig {
40    fn default() -> Self {
41        Self {
42            depth: 2,
43            time_augmentation: false,
44        }
45    }
46}
47
48/// Computed path signature result.
49#[derive(Debug, Clone)]
50pub struct PathSignatureResult {
51    /// Truncation depth used.
52    pub depth: usize,
53    /// Input dimensionality (K for region trajectories).
54    pub input_dim: usize,
55    /// The signature vector (concatenation of all levels up to depth).
56    pub signature: Vec<f64>,
57    /// Total output dimensionality.
58    pub output_dim: usize,
59}
60
61/// Compute the truncated path signature of a trajectory.
62///
63/// The trajectory is a sequence of (timestamp, vector) pairs. The signature
64/// captures the ordered, multi-scale structure of the path.
65///
66/// # Arguments
67///
68/// * `trajectory` - Sequence of (timestamp, vector) pairs, ordered by time.
69/// * `config` - Truncation depth and options.
70///
71/// # Returns
72///
73/// The concatenated signature up to the specified depth:
74/// - Depth 1: K features (net displacement per dimension)
75/// - Depth 2: K + K² features (displacement + signed areas)
76/// - Depth 3: K + K² + K³ features
77///
78/// # Complexity
79///
80/// O(N × K^depth) where N = trajectory length, K = dimensionality.
81pub fn compute_signature(
82    trajectory: &[(i64, &[f32])],
83    config: &SignatureConfig,
84) -> Result<PathSignatureResult, SignatureError> {
85    if trajectory.len() < 2 {
86        return Err(SignatureError::InsufficientData {
87            got: trajectory.len(),
88            need: 2,
89        });
90    }
91
92    let k = trajectory[0].1.len();
93    if k == 0 {
94        return Err(SignatureError::ZeroDimension);
95    }
96
97    // Optionally augment with time dimension
98    let (points, dim) = if config.time_augmentation {
99        let t0 = trajectory[0].0 as f64;
100        let t_range = (trajectory.last().unwrap().0 - trajectory[0].0).max(1) as f64;
101        let augmented: Vec<Vec<f64>> = trajectory
102            .iter()
103            .map(|(ts, vec)| {
104                let mut p = Vec::with_capacity(k + 1);
105                p.push((*ts as f64 - t0) / t_range); // normalized time [0, 1]
106                p.extend(vec.iter().map(|&v| v as f64));
107                p
108            })
109            .collect();
110        (augmented, k + 1)
111    } else {
112        let points: Vec<Vec<f64>> = trajectory
113            .iter()
114            .map(|(_, vec)| vec.iter().map(|&v| v as f64).collect())
115            .collect();
116        (points, k)
117    };
118
119    // Compute increments: dx[i] = points[i+1] - points[i]
120    let n = points.len();
121    let increments: Vec<Vec<f64>> = (0..n - 1)
122        .map(|i| (0..dim).map(|d| points[i + 1][d] - points[i][d]).collect())
123        .collect();
124
125    let mut signature = Vec::new();
126    let mut output_dim = 0;
127
128    // Depth 1: S^i = sum of increments = net displacement
129    if config.depth >= 1 {
130        let mut level1 = vec![0.0f64; dim];
131        for dx in &increments {
132            for d in 0..dim {
133                level1[d] += dx[d];
134            }
135        }
136        output_dim += dim;
137        signature.extend_from_slice(&level1);
138    }
139
140    // Depth 2: S^{i,j} = sum_{s<t} dx_s^i * dx_t^j (iterated integral approximation)
141    if config.depth >= 2 {
142        let mut level2 = vec![0.0f64; dim * dim];
143        // Running sum for Chen's identity-style incremental computation:
144        // S^{i,j} = sum_t (cumsum_i[t] * dx_t^j)
145        let mut cumsum = vec![0.0f64; dim];
146        for dx in &increments {
147            // Accumulate: level2[i*dim + j] += cumsum[i] * dx[j]
148            for i in 0..dim {
149                for j in 0..dim {
150                    level2[i * dim + j] += cumsum[i] * dx[j];
151                }
152            }
153            // Update cumulative sum
154            for d in 0..dim {
155                cumsum[d] += dx[d];
156            }
157        }
158        output_dim += dim * dim;
159        signature.extend_from_slice(&level2);
160    }
161
162    // Depth 3: S^{i,j,k} (optional, expensive for large dim)
163    if config.depth >= 3 {
164        let mut level3 = vec![0.0f64; dim * dim * dim];
165        // S^{i,j,k} = sum_t cumsum2[i,j] * dx_t[k]
166        // where cumsum2[i,j] = running sum of level2 contributions up to t
167        let mut cumsum1 = vec![0.0f64; dim];
168        let mut cumsum2 = vec![0.0f64; dim * dim];
169        for dx in &increments {
170            // Accumulate depth 3
171            for i in 0..dim {
172                for j in 0..dim {
173                    for kk in 0..dim {
174                        level3[i * dim * dim + j * dim + kk] += cumsum2[i * dim + j] * dx[kk];
175                    }
176                }
177            }
178            // Update cumsum2: cumsum2[i,j] += cumsum1[i] * dx[j]
179            for i in 0..dim {
180                for j in 0..dim {
181                    cumsum2[i * dim + j] += cumsum1[i] * dx[j];
182                }
183            }
184            // Update cumsum1
185            for d in 0..dim {
186                cumsum1[d] += dx[d];
187            }
188        }
189        output_dim += dim * dim * dim;
190        signature.extend_from_slice(&level3);
191    }
192
193    Ok(PathSignatureResult {
194        depth: config.depth,
195        input_dim: dim,
196        signature,
197        output_dim,
198    })
199}
200
201/// Compute the log-signature (compact alternative via antisymmetric part).
202///
203/// For depth 2, the log-signature extracts the antisymmetric part of the
204/// level-2 signature: `L^{i,j} = (S^{i,j} - S^{j,i}) / 2`.
205/// This removes redundant symmetric components.
206///
207/// Dimension at depth 2: K + K(K-1)/2 (vs K + K² for full signature).
208pub fn compute_log_signature(
209    trajectory: &[(i64, &[f32])],
210    config: &SignatureConfig,
211) -> Result<PathSignatureResult, SignatureError> {
212    // First compute full signature
213    let full = compute_signature(trajectory, config)?;
214    let dim = full.input_dim;
215
216    let mut log_sig = Vec::new();
217
218    // Level 1: same as signature (displacement)
219    log_sig.extend_from_slice(&full.signature[..dim]);
220
221    // Level 2: antisymmetric part only (upper triangle of S^{i,j} - S^{j,i})
222    if config.depth >= 2 {
223        let level2_start = dim;
224        for i in 0..dim {
225            for j in (i + 1)..dim {
226                let s_ij = full.signature[level2_start + i * dim + j];
227                let s_ji = full.signature[level2_start + j * dim + i];
228                log_sig.push((s_ij - s_ji) / 2.0);
229            }
230        }
231    }
232
233    // Level 3 log-signature is more complex (Dynkin map / BCH formula).
234    // For now, we include the full level 3 if requested.
235    if config.depth >= 3 {
236        let level3_start = dim + dim * dim;
237        let level3_len = dim * dim * dim;
238        if full.signature.len() >= level3_start + level3_len {
239            log_sig.extend_from_slice(&full.signature[level3_start..level3_start + level3_len]);
240        }
241    }
242
243    let output_dim = log_sig.len();
244    Ok(PathSignatureResult {
245        depth: config.depth,
246        input_dim: dim,
247        signature: log_sig,
248        output_dim,
249    })
250}
251
252/// Incrementally update a signature when a new point is appended.
253///
254/// Uses Chen's Identity: S(α * β) = S(α) ⊗ S(β)
255/// where β is the single-segment path from the last point to the new point.
256///
257/// Cost: O(K²) for depth 2 (vs O(N × K²) for full recomputation).
258///
259/// # Arguments
260///
261/// * `existing` - The current signature (will be modified in-place).
262/// * `last_point` - The last point of the existing trajectory.
263/// * `new_point` - The new point being appended.
264pub fn update_signature_incremental(
265    existing: &mut PathSignatureResult,
266    last_point: &[f32],
267    new_point: &[f32],
268) -> Result<(), SignatureError> {
269    let dim = existing.input_dim;
270    if last_point.len() != dim || new_point.len() != dim {
271        return Err(SignatureError::DimensionMismatch {
272            expected: dim,
273            got: new_point.len(),
274        });
275    }
276
277    // Increment: dx = new_point - last_point
278    let dx: Vec<f64> = (0..dim)
279        .map(|d| new_point[d] as f64 - last_point[d] as f64)
280        .collect();
281
282    // Chen's Identity for depth ≤ 2:
283    // S(α * β)^i = S(α)^i + dx^i
284    // S(α * β)^{i,j} = S(α)^{i,j} + S(α)^i * dx^j + dx^i * dx^j / 2
285    //   (the last term is the self-integral of the single segment)
286
287    // Update level 1: displacement
288    for (sig, &delta) in existing.signature.iter_mut().zip(dx.iter()).take(dim) {
289        *sig += delta;
290    }
291
292    // Update level 2: signed area
293    if existing.depth >= 2 {
294        let level2_start = dim;
295        // Read current level 1 BEFORE the update we just did
296        // Actually, we already updated level 1. We need the value BEFORE update.
297        // So: old_s1[d] = existing.signature[d] - dx[d]
298        for i in 0..dim {
299            let old_s1_i = existing.signature[i] - dx[i];
300            for (j, &dxj) in dx.iter().enumerate().take(dim) {
301                existing.signature[level2_start + i * dim + j] += old_s1_i * dxj;
302            }
303        }
304    }
305
306    Ok(())
307}
308
309/// Compute L2 distance between two signatures.
310///
311/// This is a fast trajectory similarity measure: O(output_dim) per comparison,
312/// capturing all order-dependent temporal dynamics.
313pub fn signature_distance(a: &PathSignatureResult, b: &PathSignatureResult) -> f64 {
314    if a.signature.len() != b.signature.len() {
315        return f64::INFINITY;
316    }
317    a.signature
318        .iter()
319        .zip(b.signature.iter())
320        .map(|(x, y)| (x - y) * (x - y))
321        .sum::<f64>()
322        .sqrt()
323}
324
325/// Error types for signature computation.
326#[derive(Debug, thiserror::Error)]
327pub enum SignatureError {
328    /// Not enough data points.
329    #[error("insufficient data: got {got} points, need at least {need}")]
330    InsufficientData {
331        /// Number of points provided.
332        got: usize,
333        /// Minimum required.
334        need: usize,
335    },
336    /// Zero-dimensional vectors.
337    #[error("zero-dimensional input vectors")]
338    ZeroDimension,
339    /// Dimension mismatch in incremental update.
340    #[error("dimension mismatch: expected {expected}, got {got}")]
341    DimensionMismatch {
342        /// Expected dimension.
343        expected: usize,
344        /// Actual dimension.
345        got: usize,
346    },
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    fn make_trajectory<'a>(points: &'a [&'a [f32]]) -> Vec<(i64, &'a [f32])> {
354        points
355            .iter()
356            .enumerate()
357            .map(|(i, p)| (i as i64, *p))
358            .collect()
359    }
360
361    #[test]
362    fn depth1_is_displacement() {
363        let traj = make_trajectory(&[&[0.0, 0.0], &[1.0, 0.0], &[1.0, 2.0]]);
364        let config = SignatureConfig {
365            depth: 1,
366            time_augmentation: false,
367        };
368        let result = compute_signature(&traj, &config).unwrap();
369
370        assert_eq!(result.output_dim, 2);
371        // Net displacement: (1.0, 2.0)
372        assert!((result.signature[0] - 1.0).abs() < 1e-10);
373        assert!((result.signature[1] - 2.0).abs() < 1e-10);
374    }
375
376    #[test]
377    fn depth2_captures_rotation() {
378        // Path going right then up: (0,0) → (1,0) → (1,1)
379        let traj_a = make_trajectory(&[&[0.0, 0.0], &[1.0, 0.0], &[1.0, 1.0]]);
380        // Path going up then right: (0,0) → (0,1) → (1,1)
381        let traj_b = make_trajectory(&[&[0.0, 0.0], &[0.0, 1.0], &[1.0, 1.0]]);
382
383        let config = SignatureConfig {
384            depth: 2,
385            time_augmentation: false,
386        };
387        let sig_a = compute_signature(&traj_a, &config).unwrap();
388        let sig_b = compute_signature(&traj_b, &config).unwrap();
389
390        // Same displacement (depth 1)
391        assert!((sig_a.signature[0] - sig_b.signature[0]).abs() < 1e-10);
392        assert!((sig_a.signature[1] - sig_b.signature[1]).abs() < 1e-10);
393
394        // Different signed area (depth 2) — this is the key:
395        // path A and B have opposite signed areas
396        let area_a = sig_a.signature[2 + 1]; // S^{0,1} for path A
397        let area_b = sig_b.signature[2 + 1]; // S^{0,1} for path B
398        assert!(
399            (area_a - area_b).abs() > 0.1,
400            "Depth 2 should distinguish rotation: area_a={area_a}, area_b={area_b}"
401        );
402    }
403
404    #[test]
405    fn incremental_matches_full() {
406        let points: Vec<[f32; 3]> = vec![
407            [0.0, 0.0, 0.0],
408            [1.0, 0.5, -0.3],
409            [1.5, 1.0, 0.2],
410            [2.0, 0.8, 0.5],
411        ];
412
413        let config = SignatureConfig {
414            depth: 2,
415            time_augmentation: false,
416        };
417
418        // Full signature on all 4 points
419        let traj_full: Vec<(i64, &[f32])> = points
420            .iter()
421            .enumerate()
422            .map(|(i, p)| (i as i64, p.as_slice()))
423            .collect();
424        let full = compute_signature(&traj_full, &config).unwrap();
425
426        // Incremental: compute on first 3, then update with 4th
427        let traj_3: Vec<(i64, &[f32])> = points[..3]
428            .iter()
429            .enumerate()
430            .map(|(i, p)| (i as i64, p.as_slice()))
431            .collect();
432        let mut incremental = compute_signature(&traj_3, &config).unwrap();
433        update_signature_incremental(&mut incremental, &points[2], &points[3]).unwrap();
434
435        // Should match within numerical precision
436        for (i, (a, b)) in full
437            .signature
438            .iter()
439            .zip(incremental.signature.iter())
440            .enumerate()
441        {
442            assert!(
443                (a - b).abs() < 1e-8,
444                "Mismatch at index {i}: full={a}, incremental={b}"
445            );
446        }
447    }
448
449    #[test]
450    fn log_signature_is_smaller() {
451        let traj = make_trajectory(&[&[0.0, 0.0, 0.0], &[1.0, 0.5, -0.3], &[1.5, 1.0, 0.2]]);
452        let config = SignatureConfig {
453            depth: 2,
454            time_augmentation: false,
455        };
456
457        let full = compute_signature(&traj, &config).unwrap();
458        let log = compute_log_signature(&traj, &config).unwrap();
459
460        // K=3: full depth 2 = 3 + 9 = 12, log depth 2 = 3 + 3 = 6
461        assert_eq!(full.output_dim, 12);
462        assert_eq!(log.output_dim, 6);
463    }
464
465    #[test]
466    fn signature_distance_works() {
467        let traj_a = make_trajectory(&[&[0.0, 0.0], &[1.0, 0.0], &[1.0, 1.0]]);
468        let traj_b = make_trajectory(&[&[0.0, 0.0], &[0.0, 1.0], &[1.0, 1.0]]);
469        let traj_c = make_trajectory(&[&[0.0, 0.0], &[1.0, 0.0], &[1.0, 1.0]]); // same as A
470
471        let config = SignatureConfig {
472            depth: 2,
473            time_augmentation: false,
474        };
475        let sig_a = compute_signature(&traj_a, &config).unwrap();
476        let sig_b = compute_signature(&traj_b, &config).unwrap();
477        let sig_c = compute_signature(&traj_c, &config).unwrap();
478
479        assert!(signature_distance(&sig_a, &sig_c) < 1e-10); // identical
480        assert!(signature_distance(&sig_a, &sig_b) > 0.1); // different paths
481    }
482
483    #[test]
484    fn insufficient_data_error() {
485        let traj = make_trajectory(&[&[1.0, 2.0]]);
486        let config = SignatureConfig::default();
487        assert!(compute_signature(&traj, &config).is_err());
488    }
489}