sketches_ddsketch/
ddsketch.rs

1use std::error;
2use std::fmt;
3
4use crate::config::Config;
5use crate::store::Store;
6
7#[cfg(feature = "use_serde")]
8use serde::{Deserialize, Serialize};
9
10type Result<T> = std::result::Result<T, DDSketchError>;
11
12/// General error type for DDSketch, represents either an invalid quantile or an
13/// incompatible merge operation.
14///
15#[derive(Debug, Clone)]
16pub enum DDSketchError {
17    Quantile,
18    Merge,
19}
20impl fmt::Display for DDSketchError {
21    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
22        match self {
23            DDSketchError::Quantile => {
24                write!(f, "Invalid quantile, must be between 0 and 1 (inclusive)")
25            }
26            DDSketchError::Merge => write!(f, "Can not merge sketches with different configs"),
27        }
28    }
29}
30impl error::Error for DDSketchError {
31    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
32        // Generic
33        None
34    }
35}
36
37/// This struct represents a [DDSketch](https://arxiv.org/pdf/1908.10693.pdf)
38#[derive(Clone)]
39#[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
40pub struct DDSketch {
41    config: Config,
42    store: Store,
43    negative_store: Store,
44    min: f64,
45    max: f64,
46    sum: f64,
47    zero_count: u64,
48}
49
50impl Default for DDSketch {
51    fn default() -> Self {
52        Self::new(Default::default())
53    }
54}
55
56// XXX: functions should return Option<> in the case of empty
57impl DDSketch {
58    /// Construct a `DDSketch`. Requires a `Config` specifying the parameters of the sketch
59    pub fn new(config: Config) -> Self {
60        DDSketch {
61            config,
62            store: Store::new(config.max_num_bins as usize),
63            negative_store: Store::new(config.max_num_bins as usize),
64            min: f64::INFINITY,
65            max: f64::NEG_INFINITY,
66            sum: 0.0,
67            zero_count: 0,
68        }
69    }
70
71    /// Add the sample to the sketch
72    pub fn add(&mut self, v: f64) {
73        if v > self.config.min_possible() {
74            let key = self.config.key(v);
75            self.store.add(key);
76        } else if v < -self.config.min_possible() {
77            let key = self.config.key(-v);
78            self.negative_store.add(key);
79        } else {
80            self.zero_count += 1;
81        }
82
83        if v < self.min {
84            self.min = v;
85        }
86        if self.max < v {
87            self.max = v;
88        }
89        self.sum += v;
90    }
91
92    /// Return the quantile value for quantiles between 0.0 and 1.0. Result is an error, represented
93    /// as DDSketchError::Quantile if the requested quantile is outside of that range.
94    ///
95    /// If the sketch is empty the result is None, else Some(v) for the quantile value.
96    pub fn quantile(&self, q: f64) -> Result<Option<f64>> {
97        if q < 0.0 || q > 1.0 {
98            return Err(DDSketchError::Quantile);
99        }
100
101        if self.empty() {
102            return Ok(None);
103        }
104
105        if q == 0.0 {
106            return Ok(Some(self.min));
107        } else if q == 1.0 {
108            return Ok(Some(self.max));
109        }
110
111        let rank = (q * (self.count() as f64 - 1.0)) as u64;
112        let quantile;
113        if rank < self.negative_store.count() {
114            let reversed_rank = self.negative_store.count() - rank - 1;
115            let key = self.negative_store.key_at_rank(reversed_rank);
116            quantile = -self.config.value(key);
117        } else if rank < self.zero_count + self.negative_store.count() {
118            quantile = 0.0;
119        } else {
120            let key = self
121                .store
122                .key_at_rank(rank - self.zero_count - self.negative_store.count());
123            quantile = self.config.value(key);
124        }
125
126        Ok(Some(quantile))
127    }
128
129    /// Returns the minimum value seen, or None if sketch is empty
130    pub fn min(&self) -> Option<f64> {
131        if self.empty() {
132            None
133        } else {
134            Some(self.min)
135        }
136    }
137
138    /// Returns the maximum value seen, or None if sketch is empty
139    pub fn max(&self) -> Option<f64> {
140        if self.empty() {
141            None
142        } else {
143            Some(self.max)
144        }
145    }
146
147    /// Returns the sum of values seen, or None if sketch is empty
148    pub fn sum(&self) -> Option<f64> {
149        if self.empty() {
150            None
151        } else {
152            Some(self.sum)
153        }
154    }
155
156    /// Returns the number of values added to the sketch
157    pub fn count(&self) -> usize {
158        (self.store.count() + self.zero_count + self.negative_store.count()) as usize
159    }
160
161    /// Returns the length of the underlying `Store`. This is mainly only useful for understanding
162    /// how much the sketch has grown given the inserted values.
163    pub fn length(&self) -> usize {
164        self.store.length() as usize
165    }
166
167    /// Merge the contents of another sketch into this one. The sketch that is merged into this one
168    /// is unchanged after the merge.
169    pub fn merge(&mut self, o: &DDSketch) -> Result<()> {
170        if self.config != o.config {
171            return Err(DDSketchError::Merge);
172        }
173
174        let was_empty = self.store.count() == 0;
175
176        // Merge the stores
177        self.store.merge(&o.store);
178        self.negative_store.merge(&o.negative_store);
179        self.zero_count += o.zero_count;
180
181        // Need to ensure we don't override min/max with initializers
182        // if either store were empty
183        if was_empty {
184            self.min = o.min;
185            self.max = o.max;
186        } else if o.store.count() > 0 {
187            if o.min < self.min {
188                self.min = o.min
189            }
190            if o.max > self.max {
191                self.max = o.max;
192            }
193        }
194        self.sum += o.sum;
195
196        Ok(())
197    }
198
199    fn empty(&self) -> bool {
200        self.count() == 0
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use approx::assert_relative_eq;
207
208    use crate::Config;
209    use crate::DDSketch;
210
211    #[test]
212    fn test_add_zero() {
213        let alpha = 0.01;
214        let c = Config::new(alpha, 2048, 10e-9);
215        let mut dd = DDSketch::new(c);
216        dd.add(0.0);
217    }
218
219    #[test]
220    fn test_quartiles() {
221        let alpha = 0.01;
222        let c = Config::new(alpha, 2048, 10e-9);
223        let mut dd = DDSketch::new(c);
224
225        // Initialize sketch with {1.0, 2.0, 3.0, 4.0}
226        for i in 1..5 {
227            dd.add(i as f64);
228        }
229
230        // We expect the following mappings from quantile to value:
231        // [0,0.33]: 1.0, (0.34,0.66]: 2.0, (0.67,0.99]: 3.0, (0.99, 1.0]: 4.0
232        let test_cases = vec![
233            (0.0, 1.0),
234            (0.25, 1.0),
235            (0.33, 1.0),
236            (0.34, 2.0),
237            (0.5, 2.0),
238            (0.66, 2.0),
239            (0.67, 3.0),
240            (0.75, 3.0),
241            (0.99, 3.0),
242            (1.0, 4.0),
243        ];
244
245        for (q, val) in test_cases {
246            assert_relative_eq!(dd.quantile(q).unwrap().unwrap(), val, max_relative = alpha);
247        }
248    }
249
250    #[test]
251    fn test_neg_quartiles() {
252        let alpha = 0.01;
253        let c = Config::new(alpha, 2048, 10e-9);
254        let mut dd = DDSketch::new(c);
255
256        // Initialize sketch with {1.0, 2.0, 3.0, 4.0}
257        for i in 1..5 {
258            dd.add(-i as f64);
259        }
260
261        let test_cases = vec![
262            (0.0, -4.0),
263            (0.25, -4.0),
264            (0.5, -3.0),
265            (0.75, -2.0),
266            (1.0, -1.0),
267        ];
268
269        for (q, val) in test_cases {
270            assert_relative_eq!(dd.quantile(q).unwrap().unwrap(), val, max_relative = alpha);
271        }
272    }
273
274    #[test]
275    fn test_simple_quantile() {
276        let c = Config::defaults();
277        let mut dd = DDSketch::new(c);
278
279        for i in 1..101 {
280            dd.add(i as f64);
281        }
282
283        assert_eq!(dd.quantile(0.95).unwrap().unwrap().ceil(), 95.0);
284
285        assert!(dd.quantile(-1.01).is_err());
286        assert!(dd.quantile(1.01).is_err());
287    }
288
289    #[test]
290    fn test_empty_sketch() {
291        let c = Config::defaults();
292        let dd = DDSketch::new(c);
293
294        assert_eq!(dd.quantile(0.98).unwrap(), None);
295        assert_eq!(dd.max(), None);
296        assert_eq!(dd.min(), None);
297        assert_eq!(dd.sum(), None);
298        assert_eq!(dd.count(), 0);
299
300        assert!(dd.quantile(1.01).is_err());
301    }
302
303    #[test]
304    fn test_basic_histogram_data() {
305        let values = &[
306            0.754225035,
307            0.752900282,
308            0.752812246,
309            0.752602367,
310            0.754310155,
311            0.753525981,
312            0.752981082,
313            0.752715536,
314            0.751667941,
315            0.755079054,
316            0.753528150,
317            0.755188464,
318            0.752508723,
319            0.750064549,
320            0.753960428,
321            0.751139298,
322            0.752523560,
323            0.753253428,
324            0.753498342,
325            0.751858358,
326            0.752104636,
327            0.753841300,
328            0.754467374,
329            0.753814334,
330            0.750881719,
331            0.753182556,
332            0.752576884,
333            0.753945708,
334            0.753571911,
335            0.752314573,
336            0.752586651,
337        ];
338
339        let c = Config::defaults();
340        let mut dd = DDSketch::new(c);
341
342        for value in values {
343            dd.add(*value);
344        }
345
346        assert_eq!(dd.max(), Some(0.755188464));
347        assert_eq!(dd.min(), Some(0.750064549));
348        assert_eq!(dd.count(), 31);
349        assert_eq!(dd.sum(), Some(23.343630625000003));
350
351        assert!(dd.quantile(0.25).unwrap().is_some());
352        assert!(dd.quantile(0.5).unwrap().is_some());
353        assert!(dd.quantile(0.75).unwrap().is_some());
354    }
355}