cvx_core/traits/
mod.rs

1//! Core trait definitions for ChronosVector subsystems.
2//!
3//! These traits define the contracts between subsystems. Each crate implements
4//! the relevant traits, enabling loose coupling and testability via mock implementations.
5
6pub mod quantizer;
7
8use crate::error::{AnalyticsError, IndexError, QueryError, StorageError};
9use crate::types::{ChangePoint, CpdMethod, ScoredResult, TemporalFilter, TemporalPoint};
10
11/// Operations on a vector space.
12///
13/// Defines the algebraic structure that embedding vectors inhabit.
14/// Implementations are not required for Layer 0 — only signatures.
15pub trait VectorSpace: Clone + Send + Sync {
16    /// Dimensionality of vectors in this space.
17    fn dim(&self) -> usize;
18
19    /// The zero vector.
20    fn zero(dim: usize) -> Self;
21
22    /// Component-wise addition.
23    fn add(&self, other: &Self) -> Self;
24
25    /// Scalar multiplication.
26    fn scale(&self, factor: f32) -> Self;
27
28    /// View as a float slice.
29    fn as_slice(&self) -> &[f32];
30}
31
32/// A distance metric over vectors.
33///
34/// Implementations must satisfy metric properties:
35/// - Non-negativity: $d(a, b) \geq 0$
36/// - Identity: $d(a, a) = 0$
37/// - Symmetry: $d(a, b) = d(b, a)$
38///
39/// Triangle inequality is desired but not required (cosine distance violates it).
40pub trait DistanceMetric: Send + Sync {
41    /// Compute the distance between two vectors.
42    ///
43    /// # Panics
44    ///
45    /// Implementations should panic if `a.len() != b.len()`.
46    fn distance(&self, a: &[f32], b: &[f32]) -> f32;
47
48    /// Human-readable name of this metric (e.g., `"cosine"`, `"l2"`).
49    fn name(&self) -> &str;
50}
51
52/// Persistent storage backend for temporal points.
53///
54/// Abstracts over the underlying storage engine (in-memory, RocksDB, etc.).
55pub trait StorageBackend: Send + Sync {
56    /// Retrieve a single point by entity, space, and timestamp.
57    fn get(
58        &self,
59        entity_id: u64,
60        space_id: u32,
61        timestamp: i64,
62    ) -> Result<Option<TemporalPoint>, StorageError>;
63
64    /// Store a temporal point.
65    fn put(&self, space_id: u32, point: &TemporalPoint) -> Result<(), StorageError>;
66
67    /// Retrieve all points for an entity in a time range, ordered by timestamp.
68    fn range(
69        &self,
70        entity_id: u64,
71        space_id: u32,
72        start: i64,
73        end: i64,
74    ) -> Result<Vec<TemporalPoint>, StorageError>;
75
76    /// Delete a specific point.
77    fn delete(&self, entity_id: u64, space_id: u32, timestamp: i64) -> Result<(), StorageError>;
78}
79
80// ─── Segregated traits (RFC-012 P6) ────────────────────────────
81
82/// Core search operations on a temporal index.
83pub trait TemporalSearch: Send + Sync {
84    /// Search with temporal filtering, returning (node_id, score) pairs.
85    fn search_raw(
86        &self,
87        query: &[f32],
88        k: usize,
89        filter: TemporalFilter,
90        alpha: f32,
91        query_timestamp: i64,
92    ) -> Vec<(u32, f32)>;
93}
94
95/// Access to individual points and trajectories.
96pub trait TrajectoryAccess: Send + Sync {
97    /// Retrieve trajectory for an entity: (timestamp, node_id) pairs.
98    fn trajectory(&self, entity_id: u64, filter: TemporalFilter) -> Vec<(i64, u32)>;
99
100    /// Get the vector for a node. Returns owned vec for thread safety.
101    fn vector(&self, node_id: u32) -> Vec<f32>;
102
103    /// Get the entity_id for a node.
104    fn entity_id(&self, node_id: u32) -> u64;
105
106    /// Get the timestamp for a node.
107    fn timestamp(&self, node_id: u32) -> i64;
108
109    /// Number of points in the index.
110    fn len(&self) -> usize;
111
112    /// Whether the index is empty.
113    fn is_empty(&self) -> bool {
114        self.len() == 0
115    }
116}
117
118/// Low-level temporal index access combining search + trajectory + regions.
119///
120/// Implements all operations. The segregated traits (`TemporalSearch`,
121/// `TrajectoryAccess`) are automatically implemented for any type that
122/// implements `TemporalIndexAccess` via blanket impls.
123pub trait TemporalIndexAccess: Send + Sync {
124    /// Search with temporal filtering, returning (node_id, score) pairs.
125    fn search_raw(
126        &self,
127        query: &[f32],
128        k: usize,
129        filter: TemporalFilter,
130        alpha: f32,
131        query_timestamp: i64,
132    ) -> Vec<(u32, f32)>;
133
134    /// Retrieve trajectory for an entity: (timestamp, node_id) pairs.
135    fn trajectory(&self, entity_id: u64, filter: TemporalFilter) -> Vec<(i64, u32)>;
136
137    /// Get the vector for a node. Returns owned vec for thread safety.
138    fn vector(&self, node_id: u32) -> Vec<f32>;
139
140    /// Get the entity_id for a node.
141    fn entity_id(&self, node_id: u32) -> u64;
142
143    /// Get the timestamp for a node.
144    fn timestamp(&self, node_id: u32) -> i64;
145
146    /// Number of points in the index.
147    fn len(&self) -> usize;
148
149    /// Whether the index is empty.
150    fn is_empty(&self) -> bool {
151        self.len() == 0
152    }
153
154    /// Get semantic regions at a given HNSW level (RFC-004).
155    /// Returns `(hub_node_id, hub_vector, n_assigned)` per region.
156    fn regions(&self, _level: usize) -> Vec<(u32, Vec<f32>, usize)> {
157        Vec::new()
158    }
159
160    /// Get points belonging to a specific region, optionally time-filtered (RFC-005).
161    /// Returns `(node_id, entity_id, timestamp)` per member.
162    fn region_members(
163        &self,
164        _region_hub: u32,
165        _level: usize,
166        _filter: TemporalFilter,
167    ) -> Vec<(u32, u64, i64)> {
168        Vec::new()
169    }
170
171    /// Assign all nodes to regions in a single O(N) pass, optionally time-filtered.
172    /// Returns HashMap<hub_id, Vec<(entity_id, timestamp)>>.
173    fn region_assignments(
174        &self,
175        _level: usize,
176        _filter: TemporalFilter,
177    ) -> std::collections::HashMap<u32, Vec<(u64, i64)>> {
178        std::collections::HashMap::new()
179    }
180
181    /// Smoothed region-distribution trajectory for an entity (RFC-004).
182    fn region_trajectory(
183        &self,
184        _entity_id: u64,
185        _level: usize,
186        _window_days: i64,
187        _alpha: f32,
188    ) -> Vec<(i64, Vec<f32>)> {
189        Vec::new()
190    }
191
192    /// Get metadata for a node. Returns empty map if not available.
193    fn metadata(&self, _node_id: u32) -> std::collections::HashMap<String, String> {
194        std::collections::HashMap::new()
195    }
196
197    /// Search with metadata filtering (post-filter on search results).
198    /// Default: ignores metadata filter and delegates to search_raw.
199    fn search_with_metadata(
200        &self,
201        query: &[f32],
202        k: usize,
203        filter: TemporalFilter,
204        alpha: f32,
205        query_timestamp: i64,
206        metadata_filter: &crate::types::MetadataFilter,
207    ) -> Vec<(u32, f32)> {
208        if metadata_filter.is_empty() {
209            return self.search_raw(query, k, filter, alpha, query_timestamp);
210        }
211        // Over-fetch and post-filter
212        let overfetch = k * 4;
213        let candidates = self.search_raw(query, overfetch, filter, alpha, query_timestamp);
214        candidates
215            .into_iter()
216            .filter(|&(nid, _)| metadata_filter.matches(&self.metadata(nid)))
217            .take(k)
218            .collect()
219    }
220}
221
222// Blanket implementations: any TemporalIndexAccess automatically
223// implements the segregated traits.
224impl<T: TemporalIndexAccess> TemporalSearch for T {
225    fn search_raw(
226        &self,
227        query: &[f32],
228        k: usize,
229        filter: TemporalFilter,
230        alpha: f32,
231        query_timestamp: i64,
232    ) -> Vec<(u32, f32)> {
233        TemporalIndexAccess::search_raw(self, query, k, filter, alpha, query_timestamp)
234    }
235}
236
237impl<T: TemporalIndexAccess> TrajectoryAccess for T {
238    fn trajectory(&self, entity_id: u64, filter: TemporalFilter) -> Vec<(i64, u32)> {
239        TemporalIndexAccess::trajectory(self, entity_id, filter)
240    }
241    fn vector(&self, node_id: u32) -> Vec<f32> {
242        TemporalIndexAccess::vector(self, node_id)
243    }
244    fn entity_id(&self, node_id: u32) -> u64 {
245        TemporalIndexAccess::entity_id(self, node_id)
246    }
247    fn timestamp(&self, node_id: u32) -> i64 {
248        TemporalIndexAccess::timestamp(self, node_id)
249    }
250    fn len(&self) -> usize {
251        TemporalIndexAccess::len(self)
252    }
253}
254
255/// Index backend for approximate nearest neighbor search.
256///
257/// Abstracts over the indexing structure (HNSW, brute-force, etc.).
258pub trait IndexBackend: Send + Sync {
259    /// Insert a point into the index.
260    fn insert(&self, entity_id: u64, vector: &[f32], timestamp: i64) -> Result<u32, IndexError>;
261
262    /// Search for the k nearest neighbors with temporal filtering.
263    ///
264    /// `alpha` controls the semantic vs temporal weight:
265    /// - `alpha = 1.0`: pure semantic distance
266    /// - `alpha = 0.0`: pure temporal distance
267    ///
268    /// `query_timestamp` is the reference time for temporal distance computation.
269    fn search(
270        &self,
271        query: &[f32],
272        k: usize,
273        filter: TemporalFilter,
274        alpha: f32,
275        query_timestamp: i64,
276    ) -> Result<Vec<ScoredResult>, QueryError>;
277
278    /// Remove a point from the index.
279    fn remove(&self, point_id: u64) -> Result<(), IndexError>;
280
281    /// Number of points in the index.
282    fn len(&self) -> usize;
283
284    /// Whether the index is empty.
285    fn is_empty(&self) -> bool {
286        self.len() == 0
287    }
288}
289
290/// Analytics backend for temporal analysis operations.
291///
292/// Provides prediction, change point detection, and differential calculus.
293pub trait AnalyticsBackend: Send + Sync {
294    /// Predict a future vector state using the learned trajectory model.
295    fn predict(
296        &self,
297        trajectory: &[TemporalPoint],
298        target_timestamp: i64,
299    ) -> Result<TemporalPoint, AnalyticsError>;
300
301    /// Detect change points in a trajectory.
302    fn detect_changepoints(
303        &self,
304        trajectory: &[TemporalPoint],
305        method: CpdMethod,
306    ) -> Result<Vec<ChangePoint>, AnalyticsError>;
307
308    /// Compute the velocity vector at a given timestamp.
309    fn velocity(
310        &self,
311        trajectory: &[TemporalPoint],
312        timestamp: i64,
313    ) -> Result<Vec<f32>, AnalyticsError>;
314}
315
316// ─── Embedder trait (RFC-009) ───────────────────────────────────────
317
318/// Error type for embedding operations.
319#[derive(Debug, thiserror::Error)]
320pub enum EmbedError {
321    /// Model not loaded or unavailable.
322    #[error("model not available: {0}")]
323    ModelNotAvailable(String),
324    /// Input text is empty or invalid.
325    #[error("invalid input: {0}")]
326    InvalidInput(String),
327    /// Backend-specific error.
328    #[error("embedding error: {0}")]
329    BackendError(String),
330}
331
332/// Trait for converting text to embedding vectors.
333///
334/// Implementations may use local models (ONNX, TorchScript) or
335/// remote APIs (OpenAI, Cohere).
336pub trait Embedder: Send + Sync {
337    /// Embed a single text string into a vector.
338    fn embed(&self, text: &str) -> Result<Vec<f32>, EmbedError>;
339
340    /// Embed multiple texts in a batch (more efficient for APIs).
341    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>, EmbedError> {
342        texts.iter().map(|t| self.embed(t)).collect()
343    }
344
345    /// Output dimensionality of the embedding model.
346    fn dimension(&self) -> usize;
347
348    /// Name of the embedding model.
349    fn model_name(&self) -> &str;
350}