@@ -89,6 +89,7 @@ class tdigest {
8989 using vector_t = std::vector<T, Allocator>;
9090 using vector_centroid = std::vector<centroid, typename std::allocator_traits<Allocator>::template rebind_alloc<centroid>>;
9191 using vector_bytes = std::vector<uint8_t , typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t >>;
92+ using vector_double = std::vector<double , typename std::allocator_traits<Allocator>::template rebind_alloc<double >>;
9293
9394 struct centroid_cmp {
9495 centroid_cmp () {}
@@ -142,20 +143,67 @@ class tdigest {
142143 */
143144 uint64_t get_total_weight () const ;
144145
146+ /* *
147+ * Returns an instance of the allocator for this t-Digest.
148+ * @return allocator
149+ */
150+ Allocator get_allocator () const ;
151+
145152 /* *
146153 * Compute approximate normalized rank of the given value.
154+ *
155+ * <p>If the sketch is empty this throws std::runtime_error.
156+ *
147157 * @param value to be ranked
148158 * @return normalized rank (from 0 to 1 inclusive)
149159 */
150160 double get_rank (T value) const ;
151161
152162 /* *
153163 * Compute approximate quantile value corresponding to the given normalized rank
164+ *
165+ * <p>If the sketch is empty this throws std::runtime_error.
166+ *
154167 * @param rank normalized rank (from 0 to 1 inclusive)
155168 * @return quantile value corresponding to the given rank
156169 */
157170 T get_quantile (double rank) const ;
158171
172+ /* *
173+ * Returns an approximation to the Probability Mass Function (PMF) of the input stream
174+ * given a set of split points.
175+ *
176+ * <p>If the sketch is empty this throws std::runtime_error.
177+ *
178+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
179+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
180+ *
181+ * @param size the number of split points in the array
182+ *
183+ * @return an array of m+1 doubles each of which is an approximation
184+ * to the fraction of the input stream values (the mass) that fall into one of those intervals.
185+ */
186+ vector_double get_PMF (const T* split_points, uint32_t size) const ;
187+
188+ /* *
189+ * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
190+ * cumulative analog of the PMF, of the input stream given a set of split points.
191+ *
192+ * <p>If the sketch is empty this throws std::runtime_error.
193+ *
194+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
195+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
196+ *
197+ * @param size the number of split points in the array
198+ *
199+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
200+ * of the input stream given the split_points. The value at array position j of the returned
201+ * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
202+ * array. This can be viewed as array of ranks of the given split points plus one more value
203+ * that is always 1.
204+ */
205+ vector_double get_CDF (const T* split_points, uint32_t size) const ;
206+
159207 /* *
160208 * @return parameter k (compression) that was used to configure this t-Digest
161209 */
@@ -245,6 +293,8 @@ class tdigest {
245293 // for compatibility with format of the reference implementation
246294 static tdigest deserialize_compat (std::istream& is, const Allocator& allocator = Allocator());
247295 static tdigest deserialize_compat (const void * bytes, size_t size, const Allocator& allocator = Allocator());
296+
297+ static inline void check_split_points (const T* values, uint32_t size);
248298};
249299
250300} /* namespace datasketches */
0 commit comments