laminar_db/ai/
cache.rs

1//! Per-row inference result cache, keyed `(content_hash, model_id, params_version)`.
2//!
3//! The composite key ensures different models and parameter sets never collide on
4//! the same input. Backed by `quick_cache::sync::Cache` with S3-FIFO eviction.
5
6use quick_cache::sync::{Cache, DefaultLifecycle};
7use quick_cache::{DefaultHashBuilder, Weighter};
8
9use crate::ai::provider::InferenceParams;
10use crate::ai::registry::Task;
11
12/// Cache key. All fields are `Copy`; lookups need no allocation.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub struct AiCacheKey {
15    /// xxh3-128 of the input text.
16    pub content_hash: u128,
17    /// Stable per-model integer assigned by the registry.
18    pub model_id: u32,
19    /// Task is part of the key because the same model returns different outputs
20    /// for classify vs sentiment on the same input.
21    pub task: Task,
22    /// Hash of the request parameters (see [`params_version`]).
23    pub params_version: u64,
24}
25
26/// One row's cached inference output.
27#[derive(Debug, Clone, PartialEq)]
28pub enum CachedOutput {
29    /// A text output (label, completion, summary, …).
30    Text(String),
31    /// A numeric embedding vector.
32    Vector(Vec<f32>),
33    /// A scalar sentiment score in `[-1, 1]`.
34    Score(f64),
35}
36
37/// xxh3-128 of the input content.
38#[must_use]
39pub fn content_hash(input: &str) -> u128 {
40    xxhash_rust::xxh3::xxh3_128(input.as_bytes())
41}
42
43/// Hash of the parameters that affect model output (currently the label set).
44///
45/// Fold every output-affecting field of [`InferenceParams`] here explicitly —
46/// non-`Hash` fields like `f32` temperature must use `to_bits()`.
47#[must_use]
48pub fn params_version(params: &InferenceParams) -> u64 {
49    use std::hash::{Hash, Hasher};
50    let mut hasher = xxhash_rust::xxh3::Xxh3::new();
51    params.labels.hash(&mut hasher);
52    hasher.finish()
53}
54
55/// Configuration for [`AiResultCache`].
56#[derive(Debug, Clone, Copy)]
57pub struct AiResultCacheConfig {
58    /// Memory budget. Entries are weighted by payload size, not count, so
59    /// large embeddings and small labels are both bounded correctly.
60    pub capacity_bytes: usize,
61}
62
63impl Default for AiResultCacheConfig {
64    fn default() -> Self {
65        Self {
66            capacity_bytes: 64 * 1024 * 1024,
67        }
68    }
69}
70
71/// Payload bytes plus key/overhead so tiny entries still count against the budget.
72#[derive(Debug, Clone)]
73struct OutputWeighter;
74
75impl Weighter<AiCacheKey, CachedOutput> for OutputWeighter {
76    fn weight(&self, _key: &AiCacheKey, value: &CachedOutput) -> u64 {
77        let payload = match value {
78            CachedOutput::Text(s) => s.len(),
79            CachedOutput::Vector(v) => v.len() * std::mem::size_of::<f32>(),
80            CachedOutput::Score(_) => std::mem::size_of::<f64>(),
81        };
82        (payload + std::mem::size_of::<AiCacheKey>() + 32) as u64
83    }
84}
85
86/// `quick_cache`-backed in-memory cache of per-row inference results.
87pub struct AiResultCache {
88    cache: Cache<AiCacheKey, CachedOutput, OutputWeighter>,
89}
90
91impl AiResultCache {
92    /// Create a cache with the given configuration.
93    #[must_use]
94    pub fn new(config: AiResultCacheConfig) -> Self {
95        // Rough item count for internal table sizing; ~256 B/entry assumed.
96        let estimated_items = (config.capacity_bytes / 256).max(64);
97        let cache = Cache::with(
98            estimated_items,
99            config.capacity_bytes as u64,
100            OutputWeighter,
101            DefaultHashBuilder::default(),
102            DefaultLifecycle::default(),
103        );
104        Self { cache }
105    }
106
107    /// Create a cache with default configuration (64 MiB).
108    #[must_use]
109    pub fn with_defaults() -> Self {
110        Self::new(AiResultCacheConfig::default())
111    }
112
113    /// Look up a cached result.
114    #[must_use]
115    pub fn get(&self, key: &AiCacheKey) -> Option<CachedOutput> {
116        self.cache.get(key)
117    }
118
119    /// Insert a result.
120    pub fn insert(&self, key: AiCacheKey, value: CachedOutput) {
121        self.cache.insert(key, value);
122    }
123
124    /// Number of cached entries.
125    #[must_use]
126    pub fn len(&self) -> usize {
127        self.cache.len()
128    }
129
130    /// Whether the cache holds no entries.
131    #[must_use]
132    pub fn is_empty(&self) -> bool {
133        self.len() == 0
134    }
135}
136
137impl std::fmt::Debug for AiResultCache {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        f.debug_struct("AiResultCache")
140            .field("len", &self.len())
141            .finish()
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    fn key(content: &str, model_id: u32, labels: Option<Vec<String>>) -> AiCacheKey {
150        let params = InferenceParams { labels };
151        AiCacheKey {
152            content_hash: content_hash(content),
153            model_id,
154            task: Task::Sentiment,
155            params_version: params_version(&params),
156        }
157    }
158
159    #[test]
160    fn params_version_separates_label_sets() {
161        let a = InferenceParams {
162            labels: Some(vec!["pos".into(), "neg".into()]),
163        };
164        let b = InferenceParams {
165            labels: Some(vec!["pos".into(), "neg".into(), "neutral".into()]),
166        };
167        assert_eq!(params_version(&a), params_version(&a));
168        assert_ne!(params_version(&a), params_version(&b));
169        assert_ne!(
170            params_version(&a),
171            params_version(&InferenceParams::default())
172        );
173    }
174
175    #[test]
176    fn same_text_different_model_does_not_collide() {
177        let cache = AiResultCache::with_defaults();
178        let finbert = key("flat quarter", 1, None);
179        let remote = key("flat quarter", 2, None);
180        cache.insert(finbert, CachedOutput::Text("neutral".into()));
181        cache.insert(remote, CachedOutput::Text("negative".into()));
182        assert_eq!(
183            cache.get(&finbert),
184            Some(CachedOutput::Text("neutral".into()))
185        );
186        assert_eq!(
187            cache.get(&remote),
188            Some(CachedOutput::Text("negative".into()))
189        );
190    }
191}
laminar_db/ai/cache.rs

laminar_db/ai/
cache.rs