1use cvx_core::error::AnalyticsError;
19
20#[derive(Debug, Clone)]
24pub struct Motif {
25 pub canonical_index: usize,
27 pub occurrences: Vec<MotifOccurrence>,
29 pub period: Option<usize>,
31 pub mean_match_distance: f32,
33}
34
35#[derive(Debug, Clone)]
37pub struct MotifOccurrence {
38 pub start_index: usize,
40 pub timestamp: i64,
42 pub distance: f32,
44}
45
46#[derive(Debug, Clone)]
48pub struct Discord {
49 pub start_index: usize,
51 pub timestamp: i64,
53 pub nn_distance: f32,
55}
56
57pub fn matrix_profile(
79 trajectory: &[(i64, &[f32])],
80 window: usize,
81 exclusion_zone: f32,
82) -> Result<(Vec<f32>, Vec<usize>), AnalyticsError> {
83 let n = trajectory.len();
84 if n < 2 * window {
85 return Err(AnalyticsError::InsufficientData {
86 needed: 2 * window,
87 have: n,
88 });
89 }
90
91 let n_subs = n - window + 1;
92 let excl = ((window as f32) * exclusion_zone).ceil() as usize;
93
94 let mut profile = vec![f32::MAX; n_subs];
95 let mut profile_index = vec![0usize; n_subs];
96
97 for i in 0..n_subs {
98 for j in 0..n_subs {
99 if i.abs_diff(j) < excl {
101 continue;
102 }
103
104 let dist = subsequence_distance(trajectory, i, j, window);
105
106 if dist < profile[i] {
107 profile[i] = dist;
108 profile_index[i] = j;
109 }
110 }
111 }
112
113 Ok((profile, profile_index))
114}
115
116fn subsequence_distance(trajectory: &[(i64, &[f32])], i: usize, j: usize, window: usize) -> f32 {
118 let mut sum_sq = 0.0f64;
119 for step in 0..window {
120 let vi = trajectory[i + step].1;
121 let vj = trajectory[j + step].1;
122 for (a, b) in vi.iter().zip(vj.iter()) {
123 let diff = (*a as f64) - (*b as f64);
124 sum_sq += diff * diff;
125 }
126 }
127 (sum_sq / window as f64).sqrt() as f32
128}
129
130pub fn discover_motifs(
149 trajectory: &[(i64, &[f32])],
150 window: usize,
151 max_motifs: usize,
152 exclusion_zone: f32,
153) -> Result<Vec<Motif>, AnalyticsError> {
154 let (profile, profile_index) = matrix_profile(trajectory, window, exclusion_zone)?;
155 let n_subs = profile.len();
156 let excl = ((window as f32) * exclusion_zone).ceil() as usize;
157
158 let mut indices: Vec<usize> = (0..n_subs).collect();
160 indices.sort_by(|&a, &b| profile[a].partial_cmp(&profile[b]).unwrap());
161
162 let mut motifs: Vec<Motif> = Vec::new();
163 let mut used = vec![false; n_subs]; for &candidate in &indices {
166 if motifs.len() >= max_motifs {
167 break;
168 }
169 if used[candidate] || profile[candidate] == f32::MAX {
170 continue;
171 }
172
173 let threshold = (profile[candidate] * 2.0).max(1e-6);
175 let mut occurrences: Vec<MotifOccurrence> = Vec::new();
176
177 occurrences.push(MotifOccurrence {
179 start_index: candidate,
180 timestamp: trajectory[candidate].0,
181 distance: 0.0,
182 });
183
184 let nn = profile_index[candidate];
186 occurrences.push(MotifOccurrence {
187 start_index: nn,
188 timestamp: trajectory[nn].0,
189 distance: profile[candidate],
190 });
191
192 for k in 0..n_subs {
194 if k == candidate || k == nn {
195 continue;
196 }
197 if k.abs_diff(candidate) < excl || k.abs_diff(nn) < excl {
198 continue;
199 }
200 let dist = subsequence_distance(trajectory, candidate, k, window);
201 if dist <= threshold {
202 occurrences.push(MotifOccurrence {
203 start_index: k,
204 timestamp: trajectory[k].0,
205 distance: dist,
206 });
207 }
208 }
209
210 for occ in &occurrences {
212 let start = occ.start_index.saturating_sub(excl);
213 let end = (occ.start_index + excl).min(n_subs);
214 for flag in used.iter_mut().take(end).skip(start) {
215 *flag = true;
216 }
217 }
218
219 occurrences.sort_by_key(|o| o.start_index);
221
222 let period = detect_period(&occurrences);
224
225 let mean_match_distance = if occurrences.len() > 1 {
226 occurrences.iter().map(|o| o.distance).sum::<f32>() / occurrences.len() as f32
227 } else {
228 0.0
229 };
230
231 motifs.push(Motif {
232 canonical_index: candidate,
233 occurrences,
234 period,
235 mean_match_distance,
236 });
237 }
238
239 Ok(motifs)
240}
241
242pub fn discover_discords(
259 trajectory: &[(i64, &[f32])],
260 window: usize,
261 max_discords: usize,
262) -> Result<Vec<Discord>, AnalyticsError> {
263 let (profile, _) = matrix_profile(trajectory, window, 0.5)?;
264 let n_subs = profile.len();
265 let excl = ((window as f32) * 0.5).ceil() as usize;
266
267 let mut indices: Vec<usize> = (0..n_subs).collect();
269 indices.sort_by(|&a, &b| profile[b].partial_cmp(&profile[a]).unwrap());
270
271 let mut discords: Vec<Discord> = Vec::new();
272 let mut used = vec![false; n_subs];
273
274 for &candidate in &indices {
275 if discords.len() >= max_discords {
276 break;
277 }
278 if used[candidate] || profile[candidate] == f32::MAX {
279 continue;
280 }
281
282 discords.push(Discord {
283 start_index: candidate,
284 timestamp: trajectory[candidate].0,
285 nn_distance: profile[candidate],
286 });
287
288 let start = candidate.saturating_sub(excl);
290 let end = (candidate + excl).min(n_subs);
291 for flag in used.iter_mut().take(end).skip(start) {
292 *flag = true;
293 }
294 }
295
296 Ok(discords)
297}
298
299fn detect_period(occurrences: &[MotifOccurrence]) -> Option<usize> {
306 if occurrences.len() < 3 {
307 return None;
308 }
309
310 let gaps: Vec<usize> = occurrences
311 .windows(2)
312 .map(|w| w[1].start_index - w[0].start_index)
313 .collect();
314
315 if gaps.is_empty() {
316 return None;
317 }
318
319 let mut sorted_gaps = gaps.clone();
320 sorted_gaps.sort();
321 let median_gap = sorted_gaps[sorted_gaps.len() / 2];
322
323 if median_gap == 0 {
324 return None;
325 }
326
327 let tolerance = (median_gap as f64 * 0.2).max(1.0) as usize;
329 let is_regular = gaps.iter().all(|&g| g.abs_diff(median_gap) <= tolerance);
330
331 if is_regular { Some(median_gap) } else { None }
332}
333
334#[cfg(test)]
337#[allow(clippy::needless_range_loop)]
338mod tests {
339 use super::*;
340
341 fn as_refs(points: &[(i64, Vec<f32>)]) -> Vec<(i64, &[f32])> {
342 points.iter().map(|(t, v)| (*t, v.as_slice())).collect()
343 }
344
345 fn trajectory_with_planted_motif() -> Vec<(i64, Vec<f32>)> {
347 let dim = 4;
348 let n = 100;
349 let mut traj = Vec::with_capacity(n);
350
351 for i in 0..n {
352 let t = i as i64 * 1000;
353 let base: Vec<f32> = (0..dim).map(|d| (i as f32 * 0.01 + d as f32)).collect();
355 traj.push((t, base));
356 }
357
358 let motif_pattern: Vec<Vec<f32>> = (0..5)
360 .map(|step| {
361 (0..dim)
362 .map(|d| 10.0 + (step as f32 * 0.5) + d as f32 * 0.1)
363 .collect()
364 })
365 .collect();
366
367 for &start in &[10, 40, 70] {
368 for (step, pattern) in motif_pattern.iter().enumerate() {
369 traj[start + step].1 = pattern.clone();
370 }
371 }
372
373 traj
374 }
375
376 #[test]
379 fn matrix_profile_insufficient_data() {
380 let owned = vec![(0i64, vec![1.0f32]), (1, vec![2.0])];
381 let traj = as_refs(&owned);
382 let result = matrix_profile(&traj, 3, 0.5);
383 assert!(result.is_err());
384 }
385
386 #[test]
387 fn matrix_profile_basic() {
388 let owned: Vec<(i64, Vec<f32>)> = (0..10)
390 .map(|i| (i as i64 * 1000, vec![i as f32 * 0.1]))
391 .collect();
392 let traj = as_refs(&owned);
393
394 let (profile, index) = matrix_profile(&traj, 3, 0.5).unwrap();
395 assert_eq!(profile.len(), 8); assert_eq!(index.len(), 8);
397
398 for &val in &profile {
400 assert!(val.is_finite());
401 assert!(val >= 0.0);
402 }
403 }
404
405 #[test]
406 fn matrix_profile_identical_subsequences() {
407 let mut owned: Vec<(i64, Vec<f32>)> =
409 (0..10).map(|i| (i as i64 * 1000, vec![i as f32])).collect();
410
411 owned[5].1 = vec![0.0];
413 owned[6].1 = vec![1.0];
414 owned[7].1 = vec![2.0];
415
416 let traj = as_refs(&owned);
417 let (profile, index) = matrix_profile(&traj, 3, 0.5).unwrap();
418
419 assert!(
421 profile[0] < 0.01,
422 "identical subsequences should have ~0 profile, got {}",
423 profile[0]
424 );
425 assert_eq!(index[0], 5);
426 }
427
428 #[test]
431 fn motifs_planted_pattern() {
432 let owned = trajectory_with_planted_motif();
433 let traj = as_refs(&owned);
434
435 let motifs = discover_motifs(&traj, 5, 3, 0.5).unwrap();
436
437 assert!(!motifs.is_empty(), "should find at least one motif");
438
439 let best = &motifs[0];
440 assert!(
441 best.occurrences.len() >= 2,
442 "best motif should have at least 2 occurrences, got {}",
443 best.occurrences.len()
444 );
445
446 let planted = [10usize, 40, 70];
448 let found_planted = best
449 .occurrences
450 .iter()
451 .any(|o| planted.iter().any(|&p| o.start_index.abs_diff(p) <= 2));
452 assert!(
453 found_planted,
454 "should find at least one planted position, got indices: {:?}",
455 best.occurrences
456 .iter()
457 .map(|o| o.start_index)
458 .collect::<Vec<_>>()
459 );
460 }
461
462 #[test]
463 fn motifs_period_detection() {
464 let owned = trajectory_with_planted_motif();
465 let traj = as_refs(&owned);
466
467 let motifs = discover_motifs(&traj, 5, 1, 0.5).unwrap();
468 assert!(!motifs.is_empty());
469
470 let best = &motifs[0];
471 if best.occurrences.len() >= 3 {
472 if let Some(period) = best.period {
474 assert!(
475 period.abs_diff(30) <= 6,
476 "expected period ~30, got {period}"
477 );
478 }
479 }
480 }
481
482 #[test]
483 fn motifs_stationary_trajectory() {
484 let owned: Vec<(i64, Vec<f32>)> =
486 (0..30).map(|i| (i as i64 * 1000, vec![1.0, 2.0])).collect();
487 let traj = as_refs(&owned);
488
489 let motifs = discover_motifs(&traj, 5, 3, 0.5).unwrap();
490 assert!(!motifs.is_empty());
491 assert!(
493 motifs[0].mean_match_distance < 0.01,
494 "constant trajectory should have ~0 match distance"
495 );
496 }
497
498 #[test]
499 fn motifs_max_motifs_respected() {
500 let owned: Vec<(i64, Vec<f32>)> = (0..50)
501 .map(|i| (i as i64 * 1000, vec![(i as f32).sin()]))
502 .collect();
503 let traj = as_refs(&owned);
504
505 let motifs = discover_motifs(&traj, 5, 2, 0.5).unwrap();
506 assert!(motifs.len() <= 2);
507 }
508
509 #[test]
512 fn discords_planted_anomaly() {
513 let dim = 2;
514 let mut owned: Vec<(i64, Vec<f32>)> =
515 (0..50).map(|i| (i as i64 * 1000, vec![1.0; dim])).collect();
516
517 for step in 25..30 {
519 owned[step].1 = vec![100.0; dim];
520 }
521
522 let traj = as_refs(&owned);
523 let discords = discover_discords(&traj, 5, 3).unwrap();
524
525 assert!(!discords.is_empty(), "should detect planted anomaly");
526
527 let top = &discords[0];
529 assert!(
530 top.start_index.abs_diff(25) <= 5,
531 "top discord should be near position 25, got {}",
532 top.start_index
533 );
534 assert!(
535 top.nn_distance > 1.0,
536 "discord should have high nn_distance, got {}",
537 top.nn_distance
538 );
539 }
540
541 #[test]
542 fn discords_constant_trajectory() {
543 let owned: Vec<(i64, Vec<f32>)> = (0..30).map(|i| (i as i64 * 1000, vec![1.0])).collect();
545 let traj = as_refs(&owned);
546
547 let discords = discover_discords(&traj, 5, 3).unwrap();
548 for d in &discords {
550 assert!(
551 d.nn_distance < 0.01,
552 "constant trajectory should have ~0 discord distance"
553 );
554 }
555 }
556
557 #[test]
558 fn discords_max_discords_respected() {
559 let owned: Vec<(i64, Vec<f32>)> = (0..50)
560 .map(|i| (i as i64 * 1000, vec![(i as f32 * 0.1).sin()]))
561 .collect();
562 let traj = as_refs(&owned);
563
564 let discords = discover_discords(&traj, 5, 1).unwrap();
565 assert!(discords.len() <= 1);
566 }
567
568 #[test]
571 fn period_regular_spacing() {
572 let occs = vec![
573 MotifOccurrence {
574 start_index: 10,
575 timestamp: 10000,
576 distance: 0.0,
577 },
578 MotifOccurrence {
579 start_index: 30,
580 timestamp: 30000,
581 distance: 0.1,
582 },
583 MotifOccurrence {
584 start_index: 50,
585 timestamp: 50000,
586 distance: 0.1,
587 },
588 MotifOccurrence {
589 start_index: 70,
590 timestamp: 70000,
591 distance: 0.05,
592 },
593 ];
594 let period = detect_period(&occs);
595 assert_eq!(period, Some(20));
596 }
597
598 #[test]
599 fn period_irregular_spacing() {
600 let occs = vec![
601 MotifOccurrence {
602 start_index: 5,
603 timestamp: 5000,
604 distance: 0.0,
605 },
606 MotifOccurrence {
607 start_index: 10,
608 timestamp: 10000,
609 distance: 0.1,
610 },
611 MotifOccurrence {
612 start_index: 50,
613 timestamp: 50000,
614 distance: 0.1,
615 },
616 ];
617 let period = detect_period(&occs);
618 assert!(period.is_none(), "irregular gaps should return None");
619 }
620
621 #[test]
622 fn period_too_few_occurrences() {
623 let occs = vec![
624 MotifOccurrence {
625 start_index: 0,
626 timestamp: 0,
627 distance: 0.0,
628 },
629 MotifOccurrence {
630 start_index: 10,
631 timestamp: 10000,
632 distance: 0.1,
633 },
634 ];
635 assert!(detect_period(&occs).is_none());
636 }
637
638 #[test]
641 fn subseq_distance_identical() {
642 let owned: Vec<(i64, Vec<f32>)> = (0..10).map(|i| (i as i64, vec![1.0, 2.0])).collect();
643 let traj = as_refs(&owned);
644 let dist = subsequence_distance(&traj, 0, 5, 3);
645 assert!(
646 dist < 1e-6,
647 "identical subsequences should have ~0 distance"
648 );
649 }
650
651 #[test]
652 fn subseq_distance_different() {
653 let mut owned: Vec<(i64, Vec<f32>)> = (0..10).map(|i| (i as i64, vec![0.0])).collect();
654 for i in 5..8 {
656 owned[i].1 = vec![10.0];
657 }
658 let traj = as_refs(&owned);
659 let dist = subsequence_distance(&traj, 0, 5, 3);
660 assert!(
661 dist > 1.0,
662 "different subsequences should have large distance"
663 );
664 }
665}