cvx_analytics/
point_process.rs

1//! Temporal point process features extracted from event timestamps.
2//!
3//! The *when* of events is a signal independent of *what* the vectors contain.
4//! This module extracts features from inter-event intervals that characterize
5//! temporal patterns: regularity, burstiness, self-excitation, circadian rhythms.
6//!
7//! # Features
8//!
9//! | Feature | Range | Interpretation |
10//! |---------|-------|---------------|
11//! | `burstiness` | [−1, 1] | −1 = perfectly regular, 0 = Poisson, +1 = maximally bursty |
12//! | `memory` | [−1, 1] | Autocorrelation of consecutive gaps |
13//! | `temporal_entropy` | [0, ∞) | Entropy of gap distribution (higher = more irregular) |
14//! | `intensity_trend` | ℝ | Slope of event rate over time (positive = accelerating) |
15//! | `circadian_strength` | [0, 1] | Amplitude of 24h periodicity |
16//!
17//! # References
18//!
19//! - Goh, K.-I. & Barabási, A.-L. (2008). Burstiness and memory. *EPL*, 81(4).
20//! - Hawkes, A.G. (1971). Self-exciting point processes. *Biometrika*, 58(1).
21
22/// Features extracted from event timestamps.
23#[derive(Debug, Clone)]
24pub struct EventFeatures {
25    /// Number of events.
26    pub n_events: usize,
27    /// Total time span (last - first timestamp).
28    pub span: f64,
29    /// Mean inter-event interval.
30    pub mean_gap: f64,
31    /// Standard deviation of inter-event intervals.
32    pub std_gap: f64,
33    /// Burstiness parameter B = (σ - μ) / (σ + μ).
34    /// B = −1: perfectly regular. B = 0: Poisson. B → +1: bursty.
35    pub burstiness: f64,
36    /// Memory coefficient: correlation between consecutive gaps.
37    /// M > 0: short gaps follow short gaps (clustering).
38    /// M < 0: short gaps follow long gaps (alternating).
39    /// M ≈ 0: no memory (Poisson-like).
40    pub memory: f64,
41    /// Shannon entropy of the gap distribution (binned).
42    /// Higher = more irregular/unpredictable event timing.
43    pub temporal_entropy: f64,
44    /// Slope of event rate over time (positive = accelerating).
45    pub intensity_trend: f64,
46    /// Coefficient of variation of gaps (std/mean).
47    pub gap_cv: f64,
48    /// Maximum gap (longest silence).
49    pub max_gap: f64,
50    /// Strength of 24h circadian rhythm (0 = no rhythm, 1 = strong).
51    /// Only meaningful if timestamps are in seconds/milliseconds.
52    pub circadian_strength: f64,
53}
54
55/// Extract temporal point process features from a sequence of timestamps.
56///
57/// Timestamps should be sorted in ascending order. Units are arbitrary
58/// but consistent (seconds, days, etc.).
59///
60/// # Arguments
61///
62/// * `timestamps` - Sorted event timestamps (at least 3 for meaningful features).
63///
64/// # Returns
65///
66/// [`EventFeatures`] struct with all computed features.
67pub fn extract_event_features(timestamps: &[i64]) -> Result<EventFeatures, PointProcessError> {
68    let n = timestamps.len();
69    if n < 3 {
70        return Err(PointProcessError::InsufficientEvents { got: n, need: 3 });
71    }
72
73    // Inter-event intervals
74    let gaps: Vec<f64> = timestamps
75        .windows(2)
76        .map(|w| (w[1] - w[0]) as f64)
77        .collect();
78    let n_gaps = gaps.len();
79
80    let span = (timestamps[n - 1] - timestamps[0]) as f64;
81    let mean_gap = gaps.iter().sum::<f64>() / n_gaps as f64;
82    let std_gap = (gaps.iter().map(|g| (g - mean_gap).powi(2)).sum::<f64>() / n_gaps as f64).sqrt();
83    let max_gap = gaps.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
84
85    // Burstiness: B = (σ - μ) / (σ + μ)
86    let burstiness = if mean_gap + std_gap > 0.0 {
87        (std_gap - mean_gap) / (std_gap + mean_gap)
88    } else {
89        0.0
90    };
91
92    // Gap CV
93    let gap_cv = if mean_gap > 0.0 {
94        std_gap / mean_gap
95    } else {
96        0.0
97    };
98
99    // Memory coefficient: autocorrelation of consecutive gaps
100    let memory = if n_gaps >= 3 {
101        let mut num = 0.0;
102        let mut den1 = 0.0;
103        let mut den2 = 0.0;
104        for i in 0..n_gaps - 1 {
105            let a = gaps[i] - mean_gap;
106            let b = gaps[i + 1] - mean_gap;
107            num += a * b;
108            den1 += a * a;
109            den2 += b * b;
110        }
111        let denom = (den1 * den2).sqrt();
112        if denom > 1e-15 { num / denom } else { 0.0 }
113    } else {
114        0.0
115    };
116
117    // Temporal entropy: bin gaps into 10 bins, compute Shannon entropy
118    let temporal_entropy = {
119        let n_bins = 10;
120        let min_gap = gaps.iter().cloned().fold(f64::INFINITY, f64::min);
121        let range = max_gap - min_gap;
122        if range > 0.0 {
123            let mut counts = vec![0usize; n_bins];
124            for &g in &gaps {
125                let bin = (((g - min_gap) / range) * (n_bins - 1) as f64).round() as usize;
126                counts[bin.min(n_bins - 1)] += 1;
127            }
128            let total = n_gaps as f64;
129            counts
130                .iter()
131                .filter(|&&c| c > 0)
132                .map(|&c| {
133                    let p = c as f64 / total;
134                    -p * p.ln()
135                })
136                .sum()
137        } else {
138            0.0 // all gaps identical → zero entropy
139        }
140    };
141
142    // Intensity trend: divide timeline into 5 windows, fit slope of event count
143    let intensity_trend = {
144        let n_windows = 5;
145        let window_size = span / n_windows as f64;
146        if window_size > 0.0 {
147            let counts: Vec<f64> = (0..n_windows)
148                .map(|w| {
149                    let start = timestamps[0] as f64 + w as f64 * window_size;
150                    let end = start + window_size;
151                    timestamps
152                        .iter()
153                        .filter(|&&t| (t as f64) >= start && (t as f64) < end)
154                        .count() as f64
155                })
156                .collect();
157            // Linear regression slope
158            let x_mean = (n_windows - 1) as f64 / 2.0;
159            let y_mean = counts.iter().sum::<f64>() / n_windows as f64;
160            let mut num = 0.0;
161            let mut den = 0.0;
162            for (i, &c) in counts.iter().enumerate() {
163                let x = i as f64 - x_mean;
164                num += x * (c - y_mean);
165                den += x * x;
166            }
167            if den > 0.0 { num / den } else { 0.0 }
168        } else {
169            0.0
170        }
171    };
172
173    // Circadian strength: amplitude of 24h Fourier component
174    // Assumes timestamps are in seconds
175    let circadian_strength = {
176        let period = 86400.0; // 24 hours in seconds
177        let mut sin_sum = 0.0;
178        let mut cos_sum = 0.0;
179        for &t in timestamps {
180            let phase = 2.0 * std::f64::consts::PI * (t as f64 % period) / period;
181            sin_sum += phase.sin();
182            cos_sum += phase.cos();
183        }
184
185        ((sin_sum / n as f64).powi(2) + (cos_sum / n as f64).powi(2)).sqrt() // Range [0, 1], higher = stronger circadian pattern
186    };
187
188    Ok(EventFeatures {
189        n_events: n,
190        span,
191        mean_gap,
192        std_gap,
193        burstiness,
194        memory,
195        temporal_entropy,
196        intensity_trend,
197        gap_cv,
198        max_gap,
199        circadian_strength,
200    })
201}
202
203/// Error types for point process analysis.
204#[derive(Debug, thiserror::Error)]
205pub enum PointProcessError {
206    /// Not enough events.
207    #[error("insufficient events: got {got}, need at least {need}")]
208    InsufficientEvents {
209        /// Number provided.
210        got: usize,
211        /// Minimum required.
212        need: usize,
213    },
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn regular_events_low_burstiness() {
222        // Perfectly regular: gaps = [10, 10, 10, ...]
223        let timestamps: Vec<i64> = (0..100).map(|i| i * 10).collect();
224        let f = extract_event_features(&timestamps).unwrap();
225        assert!(
226            f.burstiness < -0.9,
227            "regular events should have burstiness ≈ -1, got {}",
228            f.burstiness
229        );
230        assert!(f.temporal_entropy < 0.1, "regular → low entropy");
231    }
232
233    #[test]
234    fn poisson_events_zero_burstiness() {
235        // Approximate Poisson: exponential gaps
236        // Using deterministic pseudo-exponential for reproducibility
237        let mut timestamps = vec![0i64];
238        let mut t = 0i64;
239        let gaps = [
240            8, 12, 7, 15, 9, 11, 13, 6, 14, 10, 8, 12, 11, 9, 7, 13, 10, 14, 8, 12,
241        ];
242        for &g in &gaps {
243            t += g;
244            timestamps.push(t);
245        }
246        let f = extract_event_features(&timestamps).unwrap();
247        // Poisson-like should have burstiness between regular (-1) and bursty (+1)
248        // With only 20 samples, variance is high. Check it's not extreme.
249        assert!(
250            f.burstiness > -0.8 && f.burstiness < 0.8,
251            "Poisson-like should have moderate burstiness, got {}",
252            f.burstiness
253        );
254    }
255
256    #[test]
257    fn bursty_events_high_burstiness() {
258        // Bursts: clusters of rapid events separated by long silences
259        let mut timestamps = Vec::new();
260        for burst in 0..5 {
261            let base = burst * 1000;
262            for i in 0..10 {
263                timestamps.push(base + i); // 10 events in 10 time units
264            }
265        }
266        let f = extract_event_features(&timestamps).unwrap();
267        assert!(
268            f.burstiness > 0.5,
269            "bursty events should have B > 0.5, got {}",
270            f.burstiness
271        );
272    }
273
274    #[test]
275    fn memory_positive_for_clustered() {
276        // Short gaps followed by short gaps, long by long
277        let gaps = [1, 1, 1, 50, 50, 50, 1, 1, 1, 50, 50, 50];
278        let mut timestamps = vec![0i64];
279        let mut t = 0;
280        for &g in &gaps {
281            t += g;
282            timestamps.push(t);
283        }
284        let f = extract_event_features(&timestamps).unwrap();
285        assert!(
286            f.memory > 0.2,
287            "clustered gaps should have positive memory, got {}",
288            f.memory
289        );
290    }
291
292    #[test]
293    fn accelerating_positive_trend() {
294        // Events get more frequent over time
295        let mut timestamps = Vec::new();
296        let mut t = 0i64;
297        for i in 1..50 {
298            t += 100 / i; // decreasing gaps
299            timestamps.push(t);
300        }
301        let f = extract_event_features(&timestamps).unwrap();
302        assert!(
303            f.intensity_trend > 0.0,
304            "accelerating events should have positive trend, got {}",
305            f.intensity_trend
306        );
307    }
308
309    #[test]
310    fn insufficient_events_error() {
311        assert!(extract_event_features(&[1, 2]).is_err());
312        assert!(extract_event_features(&[1]).is_err());
313        assert!(extract_event_features(&[]).is_err());
314    }
315
316    #[test]
317    fn features_are_finite() {
318        let timestamps: Vec<i64> = (0..50).map(|i| i * 100 + (i * 7) % 13).collect();
319        let f = extract_event_features(&timestamps).unwrap();
320        assert!(f.burstiness.is_finite());
321        assert!(f.memory.is_finite());
322        assert!(f.temporal_entropy.is_finite());
323        assert!(f.intensity_trend.is_finite());
324        assert!(f.circadian_strength.is_finite());
325    }
326}