cvx_ingest/
validation.rs

1//! Input validation for the ingestion pipeline.
2//!
3//! Validates incoming temporal points before they enter the storage and index layers.
4//! Checks: dimension consistency, timestamp sanity, vector norm (no NaN/Inf, not zero).
5
6use cvx_core::error::IngestError;
7use cvx_core::types::TemporalPoint;
8
9/// Validation configuration.
10#[derive(Debug, Clone)]
11pub struct ValidationConfig {
12    /// Expected vector dimensionality. 0 = accept any.
13    pub expected_dim: usize,
14    /// Minimum allowed timestamp (microseconds).
15    pub min_timestamp: i64,
16    /// Maximum allowed timestamp (microseconds).
17    pub max_timestamp: i64,
18    /// Whether to reject zero-norm vectors.
19    pub reject_zero_vectors: bool,
20}
21
22impl Default for ValidationConfig {
23    fn default() -> Self {
24        Self {
25            expected_dim: 0,
26            min_timestamp: i64::MIN,
27            max_timestamp: i64::MAX,
28            reject_zero_vectors: true,
29        }
30    }
31}
32
33/// Validate a single temporal point.
34pub fn validate_point(point: &TemporalPoint, config: &ValidationConfig) -> Result<(), IngestError> {
35    // Dimension check
36    if config.expected_dim > 0 && point.dim() != config.expected_dim {
37        return Err(IngestError::DimensionMismatch {
38            entity_id: point.entity_id(),
39            expected: config.expected_dim,
40            got: point.dim(),
41        });
42    }
43
44    // Empty vector check
45    if point.dim() == 0 {
46        return Err(IngestError::ValidationFailed {
47            reason: "vector must have at least one dimension".into(),
48        });
49    }
50
51    // Timestamp range check
52    if point.timestamp() < config.min_timestamp || point.timestamp() > config.max_timestamp {
53        return Err(IngestError::ValidationFailed {
54            reason: format!(
55                "timestamp {} outside allowed range [{}, {}]",
56                point.timestamp(),
57                config.min_timestamp,
58                config.max_timestamp
59            ),
60        });
61    }
62
63    // NaN/Inf check
64    for (i, &v) in point.vector().iter().enumerate() {
65        if v.is_nan() {
66            return Err(IngestError::ValidationFailed {
67                reason: format!("NaN at dimension {i}"),
68            });
69        }
70        if v.is_infinite() {
71            return Err(IngestError::ValidationFailed {
72                reason: format!("Infinity at dimension {i}"),
73            });
74        }
75    }
76
77    // Zero vector check
78    if config.reject_zero_vectors {
79        let norm_sq: f32 = point.vector().iter().map(|v| v * v).sum();
80        if norm_sq == 0.0 {
81            return Err(IngestError::ValidationFailed {
82                reason: "zero vector not allowed".into(),
83            });
84        }
85    }
86
87    Ok(())
88}
89
90/// Validate a batch of points. Returns the index of the first invalid point.
91pub fn validate_batch(
92    points: &[TemporalPoint],
93    config: &ValidationConfig,
94) -> Result<(), (usize, IngestError)> {
95    for (i, point) in points.iter().enumerate() {
96        validate_point(point, config).map_err(|e| (i, e))?;
97    }
98    Ok(())
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    fn valid_point() -> TemporalPoint {
106        TemporalPoint::new(1, 1000, vec![0.1, 0.2, 0.3])
107    }
108
109    fn config_dim3() -> ValidationConfig {
110        ValidationConfig {
111            expected_dim: 3,
112            ..Default::default()
113        }
114    }
115
116    #[test]
117    fn valid_point_passes() {
118        assert!(validate_point(&valid_point(), &config_dim3()).is_ok());
119    }
120
121    #[test]
122    fn wrong_dimension_rejected() {
123        let point = TemporalPoint::new(1, 1000, vec![0.1, 0.2]);
124        let err = validate_point(&point, &config_dim3()).unwrap_err();
125        assert!(matches!(
126            err,
127            IngestError::DimensionMismatch {
128                expected: 3,
129                got: 2,
130                ..
131            }
132        ));
133    }
134
135    #[test]
136    fn empty_vector_rejected() {
137        let point = TemporalPoint::new(1, 1000, vec![]);
138        let config = ValidationConfig::default();
139        let err = validate_point(&point, &config).unwrap_err();
140        assert!(matches!(err, IngestError::ValidationFailed { .. }));
141    }
142
143    #[test]
144    fn nan_rejected() {
145        let point = TemporalPoint::new(1, 1000, vec![0.1, f32::NAN, 0.3]);
146        let err = validate_point(&point, &config_dim3()).unwrap_err();
147        match err {
148            IngestError::ValidationFailed { reason } => assert!(reason.contains("NaN")),
149            _ => panic!("expected ValidationFailed"),
150        }
151    }
152
153    #[test]
154    fn infinity_rejected() {
155        let point = TemporalPoint::new(1, 1000, vec![0.1, f32::INFINITY, 0.3]);
156        let err = validate_point(&point, &config_dim3()).unwrap_err();
157        match err {
158            IngestError::ValidationFailed { reason } => assert!(reason.contains("Infinity")),
159            _ => panic!("expected ValidationFailed"),
160        }
161    }
162
163    #[test]
164    fn zero_vector_rejected_by_default() {
165        let point = TemporalPoint::new(1, 1000, vec![0.0, 0.0, 0.0]);
166        let err = validate_point(&point, &config_dim3()).unwrap_err();
167        match err {
168            IngestError::ValidationFailed { reason } => assert!(reason.contains("zero")),
169            _ => panic!("expected ValidationFailed"),
170        }
171    }
172
173    #[test]
174    fn zero_vector_allowed_when_configured() {
175        let point = TemporalPoint::new(1, 1000, vec![0.0, 0.0, 0.0]);
176        let config = ValidationConfig {
177            expected_dim: 3,
178            reject_zero_vectors: false,
179            ..Default::default()
180        };
181        assert!(validate_point(&point, &config).is_ok());
182    }
183
184    #[test]
185    fn timestamp_out_of_range() {
186        let config = ValidationConfig {
187            min_timestamp: 0,
188            max_timestamp: 10_000,
189            ..Default::default()
190        };
191        let point = TemporalPoint::new(1, -100, vec![1.0]);
192        assert!(validate_point(&point, &config).is_err());
193
194        let point2 = TemporalPoint::new(1, 20_000, vec![1.0]);
195        assert!(validate_point(&point2, &config).is_err());
196
197        let point3 = TemporalPoint::new(1, 5000, vec![1.0]);
198        assert!(validate_point(&point3, &config).is_ok());
199    }
200
201    #[test]
202    fn any_dim_accepted_when_expected_dim_is_zero() {
203        let config = ValidationConfig::default();
204        let p1 = TemporalPoint::new(1, 100, vec![1.0]);
205        let p2 = TemporalPoint::new(1, 100, vec![1.0; 768]);
206        assert!(validate_point(&p1, &config).is_ok());
207        assert!(validate_point(&p2, &config).is_ok());
208    }
209
210    #[test]
211    fn batch_validation() {
212        let config = config_dim3();
213        let points = vec![
214            TemporalPoint::new(1, 100, vec![1.0, 2.0, 3.0]),
215            TemporalPoint::new(2, 200, vec![4.0, 5.0, 6.0]),
216            TemporalPoint::new(3, 300, vec![7.0, 8.0]), // wrong dim
217        ];
218        let (idx, err) = validate_batch(&points, &config).unwrap_err();
219        assert_eq!(idx, 2);
220        assert!(matches!(err, IngestError::DimensionMismatch { .. }));
221    }
222
223    #[test]
224    fn batch_all_valid() {
225        let config = config_dim3();
226        let points = vec![
227            TemporalPoint::new(1, 100, vec![1.0, 2.0, 3.0]),
228            TemporalPoint::new(2, 200, vec![4.0, 5.0, 6.0]),
229        ];
230        assert!(validate_batch(&points, &config).is_ok());
231    }
232}