GDAL
raster_stats.h
1/******************************************************************************
2*
3 * Project: GDAL
4 * Purpose: GDALZonalStats implementation
5 * Author: Dan Baston
6 *
7 ******************************************************************************
8 * Copyright (c) 2018-2025, ISciences LLC
9 *
10 * SPDX-License-Identifier: MIT
11 ****************************************************************************/
12
13#pragma once
14
15#include <algorithm>
16#include <cmath>
17#include <limits>
18#include <optional>
19#include <unordered_map>
20
22
23namespace gdal
24{
25struct RasterStatsOptions
26{
27 static constexpr float min_coverage_fraction_default =
28 std::numeric_limits<float>::min(); // ~1e-38
29
30 float min_coverage_fraction = min_coverage_fraction_default;
31 bool calc_variance = false;
32 bool store_histogram = false;
33 bool store_values = false;
34 bool store_weights = false;
35 bool store_coverage_fraction = false;
36 bool store_xy = false;
37 bool include_nodata = false;
38 double default_weight = std::numeric_limits<double>::quiet_NaN();
39
40 bool operator==(const RasterStatsOptions &other) const
41 {
42 return min_coverage_fraction == other.min_coverage_fraction &&
43 calc_variance == other.calc_variance &&
44 store_histogram == other.store_histogram &&
45 store_values == other.store_values &&
46 store_weights == other.store_weights &&
47 store_coverage_fraction == other.store_coverage_fraction &&
48 store_xy == other.store_xy &&
49 include_nodata == other.include_nodata &&
50 (default_weight == other.default_weight ||
51 (std::isnan(default_weight) &&
52 std::isnan(other.default_weight)));
53 }
54
55 bool operator!=(const RasterStatsOptions &other) const
56 {
57 return !(*this == other);
58 }
59};
60
61class WestVariance
62{
68
69 private:
70 double sum_w = 0;
71 double mean = 0;
72 double t = 0;
73
74 public:
80 void process(double x, double w)
81 {
82 if (w == 0)
83 {
84 return;
85 }
86
87 double mean_old = mean;
88
89 sum_w += w;
90 mean += (w / sum_w) * (x - mean_old);
91 t += w * (x - mean_old) * (x - mean);
92 }
93
96 constexpr double variance() const
97 {
98 return t / sum_w;
99 }
100
103 double stdev() const
104 {
105 return std::sqrt(variance());
106 }
107
110 double coefficent_of_variation() const
111 {
112 return stdev() / mean;
113 }
114};
115
116template <typename ValueType> class RasterStats
117{
118 public:
124 explicit RasterStats(const RasterStatsOptions &options)
125 : m_min{std::numeric_limits<ValueType>::max()},
126 m_max{std::numeric_limits<ValueType>::lowest()}, m_sum_ciwi{0},
127 m_sum_ci{0}, m_sum_xici{0}, m_sum_xiciwi{0}, m_options{options}
128 {
129 }
130
131 // All pixels covered 100%
132 void process(const ValueType *pValues, const GByte *pabyMask,
133 const double *padfWeights, const GByte *pabyWeightsMask,
134 const double *padfX, const double *padfY, size_t nX, size_t nY)
135 {
136 for (size_t i = 0; i < nX * nY; i++)
137 {
138 if (pabyMask[i] == 255)
139 {
140 if (padfX && padfY)
141 {
142 process_location(padfX[i % nX], padfY[i / nX]);
143 }
144 const double dfWeight =
145 pabyWeightsMask
146 ? (pabyWeightsMask[i] == 255
147 ? padfWeights[i]
148 : std::numeric_limits<double>::quiet_NaN())
149 : 1.0;
150 process_value(pValues[i], 1.0, dfWeight);
151 }
152 }
153 }
154
155 // Pixels covered 0% or 100%
156 void process(const ValueType *pValues, const GByte *pabyMask,
157 const double *padfWeights, const GByte *pabyWeightsMask,
158 const GByte *pabyCov, const double *pdfX, const double *pdfY,
159 size_t nX, size_t nY)
160 {
161 for (size_t i = 0; i < nX * nY; i++)
162 {
163 if (pabyMask[i] == 255 && pabyCov[i])
164 {
165 if (pdfX && pdfY)
166 {
167 process_location(pdfX[i % nX], pdfY[i / nX]);
168 }
169 const double dfWeight =
170 pabyWeightsMask
171 ? (pabyWeightsMask[i] == 255
172 ? padfWeights[i]
173 : std::numeric_limits<double>::quiet_NaN())
174 : 1.0;
175 process_value(pValues[i], 1.0, dfWeight);
176 }
177 }
178 }
179
180 // Pixels fractionally covered
181 void process(const ValueType *pValues, const GByte *pabyMask,
182 const double *padfWeights, const GByte *pabyWeightsMask,
183 const float *pfCov, const double *pdfX, const double *pdfY,
184 size_t nX, size_t nY)
185 {
186 for (size_t i = 0; i < nX * nY; i++)
187 {
188 if (pabyMask[i] == 255 &&
189 pfCov[i] >= m_options.min_coverage_fraction)
190 {
191 if (pdfX && pdfY)
192 {
193 process_location(pdfX[i % nX], pdfY[i / nX]);
194 }
195 const double dfWeight =
196 pabyWeightsMask
197 ? (pabyWeightsMask[i] == 255
198 ? padfWeights[i]
199 : std::numeric_limits<double>::quiet_NaN())
200 : 1.0;
201 process_value(pValues[i], pfCov[i], dfWeight);
202 }
203 }
204 }
205
206 void process_location(double x, double y)
207 {
208 if (m_options.store_xy)
209 {
210 m_cell_x.push_back(x);
211 m_cell_y.push_back(y);
212 }
213 }
214
215 void process_value(const ValueType &val, float coverage, double weight)
216 {
217 if (m_options.store_coverage_fraction)
218 {
219 m_cell_cov.push_back(coverage);
220 }
221
222 m_sum_ci += static_cast<double>(coverage);
223 m_sum_xici += static_cast<double>(val) * static_cast<double>(coverage);
224
225 double ciwi = static_cast<double>(coverage) * weight;
226 m_sum_ciwi += ciwi;
227 m_sum_xiciwi += static_cast<double>(val) * ciwi;
228
229 if (m_options.calc_variance)
230 {
231 m_variance.process(static_cast<double>(val),
232 static_cast<double>(coverage));
233 m_weighted_variance.process(static_cast<double>(val), ciwi);
234 }
235
236 if (val < m_min)
237 {
238 m_min = val;
239 if (m_options.store_xy)
240 {
241 m_min_xy = {m_cell_x.back(), m_cell_y.back()};
242 }
243 }
244
245 if (val > m_max)
246 {
247 m_max = val;
248 if (m_options.store_xy)
249 {
250 m_max_xy = {m_cell_x.back(), m_cell_y.back()};
251 }
252 }
253
254 if (m_options.store_histogram)
255 {
256 auto &entry = m_freq[val];
257 entry.m_sum_ci += static_cast<double>(coverage);
258 entry.m_sum_ciwi += ciwi;
259 }
260
261 if (m_options.store_values)
262 {
263 m_cell_values.push_back(val);
264 }
265
266 if (m_options.store_weights)
267 {
268 m_cell_weights.push_back(weight);
269 }
270 }
271
276 double mean() const
277 {
278 if (count() > 0)
279 {
280 return sum() / count();
281 }
282 else
283 {
284 return std::numeric_limits<double>::quiet_NaN();
285 }
286 }
287
297 double weighted_mean() const
298 {
299 if (weighted_count() > 0)
300 {
301 return weighted_sum() / weighted_count();
302 }
303 else
304 {
305 return std::numeric_limits<double>::quiet_NaN();
306 }
307 }
308
313 double weighted_fraction() const
314 {
315 return weighted_sum() / sum();
316 }
317
324 std::optional<ValueType> mode() const
325 {
326 auto it = std::max_element(
327 m_freq.cbegin(), m_freq.cend(),
328 [](const auto &a, const auto &b)
329 {
330 return a.second.m_sum_ci < b.second.m_sum_ci ||
331 (a.second.m_sum_ci == b.second.m_sum_ci &&
332 a.first < b.first);
333 });
334 if (it == m_freq.end())
335 {
336 return std::nullopt;
337 }
338 return it->first;
339 }
340
345 std::optional<ValueType> min() const
346 {
347 if (m_sum_ci == 0)
348 {
349 return std::nullopt;
350 }
351 return m_min;
352 }
353
356 std::optional<std::pair<double, double>> min_xy() const
357 {
358 if (m_sum_ci == 0)
359 {
360 return std::nullopt;
361 }
362 return m_min_xy;
363 }
364
369 std::optional<ValueType> max() const
370 {
371 if (m_sum_ci == 0)
372 {
373 return std::nullopt;
374 }
375 return m_max;
376 }
377
380 std::optional<std::pair<double, double>> max_xy() const
381 {
382 if (m_sum_ci == 0)
383 {
384 return std::nullopt;
385 }
386 return m_max_xy;
387 }
388
393 double sum() const
394 {
395 return m_sum_xici;
396 }
397
406 double weighted_sum() const
407 {
408 return m_sum_xiciwi;
409 }
410
416 double count() const
417 {
418 return m_sum_ci;
419 }
420
426 std::optional<double> count(const ValueType &value) const
427 {
428 const auto &entry = m_freq.find(value);
429
430 if (entry == m_freq.end())
431 {
432 return std::nullopt;
433 }
434
435 return entry->second.m_sum_ci;
436 }
437
443 std::optional<double> frac(const ValueType &value) const
444 {
445 auto count_for_value = count(value);
446
447 if (!count_for_value.has_value())
448 {
449 return count_for_value;
450 }
451
452 return count_for_value.value() / count();
453 }
454
459 std::optional<double> weighted_frac(const ValueType &value) const
460 {
461 auto count_for_value = weighted_count(value);
462
463 if (!count_for_value.has_value())
464 {
465 return count_for_value;
466 }
467
468 return count_for_value.value() / weighted_count();
469 }
470
476 double variance() const
477 {
478 return m_variance.variance();
479 }
480
486 double weighted_variance() const
487 {
488 return m_weighted_variance.variance();
489 }
490
497 double stdev() const
498 {
499 return m_variance.stdev();
500 }
501
507 double weighted_stdev() const
508 {
509 return m_weighted_variance.stdev();
510 }
511
521 double weighted_count() const
522 {
523 return m_sum_ciwi;
524 }
525
535 std::optional<double> weighted_count(const ValueType &value) const
536 {
537 const auto &entry = m_freq.find(value);
538
539 if (entry == m_freq.end())
540 {
541 return std::nullopt;
542 }
543
544 return entry->second.m_sum_ciwi;
545 }
546
555 std::optional<ValueType> minority() const
556 {
557 auto it = std::min_element(
558 m_freq.cbegin(), m_freq.cend(),
559 [](const auto &a, const auto &b)
560 {
561 return a.second.m_sum_ci < b.second.m_sum_ci ||
562 (a.second.m_sum_ci == b.second.m_sum_ci &&
563 a.first < b.first);
564 });
565 if (it == m_freq.end())
566 {
567 return std::nullopt;
568 }
569 return it->first;
570 }
571
576 std::uint64_t variety() const
577 {
578 return m_freq.size();
579 }
580
581 const std::vector<ValueType> &values() const
582 {
583 return m_cell_values;
584 }
585
586 const std::vector<bool> &values_defined() const
587 {
588 return m_cell_values_defined;
589 }
590
591 const std::vector<float> &coverage_fractions() const
592 {
593 return m_cell_cov;
594 }
595
596 const std::vector<double> &weights() const
597 {
598 return m_cell_weights;
599 }
600
601 const std::vector<bool> &weights_defined() const
602 {
603 return m_cell_weights_defined;
604 }
605
606 const std::vector<double> &center_x() const
607 {
608 return m_cell_x;
609 }
610
611 const std::vector<double> &center_y() const
612 {
613 return m_cell_y;
614 }
615
616 const auto &freq() const
617 {
618 return m_freq;
619 }
620
621 private:
622 ValueType m_min{};
623 ValueType m_max{};
624 std::pair<double, double> m_min_xy{
625 std::numeric_limits<double>::quiet_NaN(),
626 std::numeric_limits<double>::quiet_NaN()};
627 std::pair<double, double> m_max_xy{
628 std::numeric_limits<double>::quiet_NaN(),
629 std::numeric_limits<double>::quiet_NaN()};
630
631 // ci: coverage fraction of pixel i
632 // wi: weight of pixel i
633 // xi: value of pixel i
634 double m_sum_ciwi{0};
635 double m_sum_ci{0};
636 double m_sum_xici{0};
637 double m_sum_xiciwi{0};
638
639 WestVariance m_variance{};
640 WestVariance m_weighted_variance{};
641
642 struct ValueFreqEntry
643 {
644 double m_sum_ci = 0;
645 double m_sum_ciwi = 0;
646 };
647
648 std::unordered_map<ValueType, ValueFreqEntry> m_freq{};
649
650 std::vector<float> m_cell_cov{};
651 std::vector<ValueType> m_cell_values{};
652 std::vector<double> m_cell_weights{};
653 std::vector<double> m_cell_x{};
654 std::vector<double> m_cell_y{};
655 std::vector<bool> m_cell_values_defined{};
656 std::vector<bool> m_cell_weights_defined{};
657
658 RasterStatsOptions m_options;
659};
660
661template <typename T>
662std::ostream &operator<<(std::ostream &os, const RasterStats<T> &stats)
663{
664 os << "{" << std::endl;
665 os << " \"count\" : " << stats.count() << "," << std::endl;
666
667 os << " \"min\" : ";
668 if (stats.min().has_value())
669 {
670 os << stats.min().value();
671 }
672 else
673 {
674 os << "null";
675 }
676 os << "," << std::endl;
677
678 os << " \"max\" : ";
679 if (stats.max().has_value())
680 {
681 os << stats.max().value();
682 }
683 else
684 {
685 os << "null";
686 }
687 os << "," << std::endl;
688
689 os << " \"mean\" : " << stats.mean() << "," << std::endl;
690 os << " \"sum\" : " << stats.sum() << "," << std::endl;
691 os << " \"weighted_mean\" : " << stats.weighted_mean() << "," << std::endl;
692 os << " \"weighted_sum\" : " << stats.weighted_sum();
693 if (stats.stores_values())
694 {
695 os << "," << std::endl;
696 os << " \"mode\" : ";
697 if (stats.mode().has_value())
698 {
699 os << stats.mode().value();
700 }
701 else
702 {
703 os << "null";
704 }
705 os << "," << std::endl;
706
707 os << " \"minority\" : ";
708 if (stats.minority().has_value())
709 {
710 os << stats.minority().value();
711 }
712 else
713 {
714 os << "null";
715 }
716 os << "," << std::endl;
717
718 os << " \"variety\" : " << stats.variety() << std::endl;
719 }
720 else
721 {
722 os << std::endl;
723 }
724 os << "}" << std::endl;
725 return os;
726}
727
728} // namespace gdal
729
unsigned char GByte
Unsigned byte type.
Definition cpl_port.h:175