001/* 002 * This file is part of McIDAS-V 003 * 004 * Copyright 2007-2025 005 * Space Science and Engineering Center (SSEC) 006 * University of Wisconsin - Madison 007 * 1225 W. Dayton Street, Madison, WI 53706, USA 008 * https://www.ssec.wisc.edu/mcidas/ 009 * 010 * All Rights Reserved 011 * 012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and 013 * some McIDAS-V source code is based on IDV and VisAD source code. 014 * 015 * McIDAS-V is free software; you can redistribute it and/or modify 016 * it under the terms of the GNU Lesser Public License as published by 017 * the Free Software Foundation; either version 3 of the License, or 018 * (at your option) any later version. 019 * 020 * McIDAS-V is distributed in the hope that it will be useful, 021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 023 * GNU Lesser Public License for more details. 024 * 025 * You should have received a copy of the GNU Lesser Public License 026 * along with this program. If not, see https://www.gnu.org/licenses/. 027 */ 028package edu.wisc.ssec.mcidasv.data.hydra; 029 030import static edu.wisc.ssec.mcidasv.data.StatsTable.fmtMe; 031import static java.util.Arrays.asList; 032 033import java.rmi.RemoteException; 034import java.util.ArrayList; 035import java.util.Collection; 036import java.util.Collections; 037import java.util.EnumSet; 038import java.util.HashSet; 039import java.util.List; 040import java.util.Set; 041 042import org.apache.commons.math3.random.EmpiricalDistribution; 043import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; 044import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 045import org.apache.commons.math3.stat.StatUtils; 046import org.apache.commons.math3.stat.descriptive.SummaryStatistics; 047 048import visad.Data; 049import visad.FlatField; 050import visad.FunctionType; 051import visad.MathType; 052import visad.Real; 053import visad.RealTuple; 054import visad.RealTupleType; 055import visad.RealType; 056import visad.TupleType; 057import visad.VisADException; 058 059/** 060 * Used to obtain various commonly used statistics for VisAD 061 * {@link FlatField FlatFields}. 062 */ 063public class Statistics { 064 065 /** 066 * Various types of statistics reported by the 067 * {@link #describe(Object...)} {@literal "function"}. 068 */ 069 enum DescribeParams { 070 HISTOGRAM, 071 LENGTH, 072 MIN, 073 MAX, 074 RANGE, 075 Q1, 076 Q2, 077 Q3, 078 IQR, 079 MEAN, 080 MODE, 081 KURTOSIS, 082 SKEWNESS, 083 STDDEV, 084 VARIANCE, 085 GOODPTS 086 } 087 088 /** 089 * Characters used to create {@literal "sparklines"}. 090 */ 091 private static final List<Character> CHARS = 092 asList('\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586', 093 '\u2587', '\u2588'); 094 095 DescriptiveStatistics[] descriptiveStats = null; 096 097 double[][] values_x; 098 double[][] rngVals; 099 100 int rngTupLen; 101 102 int numPoints; 103 104 int[] numGoodPoints; 105 106 MathType statType; 107 108 PearsonsCorrelation pCorrelation = null; 109 110 public Statistics(FlatField fltFld) throws VisADException { 111 rngVals = fltFld.getValues(false); 112 rngTupLen = rngVals.length; 113 numPoints = fltFld.getDomainSet().getLength(); 114 numGoodPoints = new int[rngTupLen]; 115 116 values_x = new double[rngTupLen][]; 117 118 for (int k = 0; k < rngTupLen; k++) { 119 values_x[k] = removeMissing(rngVals[k]); 120 numGoodPoints[k] = values_x[k].length; 121 } 122 123 descriptiveStats = new DescriptiveStatistics[rngTupLen]; 124 for (int k = 0; k < rngTupLen; k++) { 125 descriptiveStats[k] = new DescriptiveStatistics(values_x[k]); 126 } 127 128 MathType rangeType = ((FunctionType) fltFld.getType()).getRange(); 129 130 if (rangeType instanceof RealTupleType) { 131 RealType[] rttypes = ((TupleType) rangeType).getRealComponents(); 132 if (rngTupLen > 1) { 133 statType = new RealTupleType(rttypes); 134 } else { 135 statType = rttypes[0]; 136 } 137 } else if (rangeType instanceof RealType) { 138 statType = rangeType; 139 } else { 140 throw new VisADException("fltFld must be RealTupleType or RealType"); 141 } 142 143 pCorrelation = new PearsonsCorrelation(); 144 } 145 146 /** 147 * Get the number of points in the domain of the {@link FlatField}. 148 * 149 * @return Number of points. 150 */ 151 public int numPoints() { 152 return numPoints; 153 } 154 155 /** 156 * Get the number of non-missing points in each range component. 157 * 158 * @return Number of non-missing points. 159 */ 160 public int[] getNumGoodPoints() { 161 return numGoodPoints; 162 } 163 164 /** 165 * Get the original range values. 166 * 167 * @return Original range values. 168 */ 169 public double[][] getRngVals() { 170 return rngVals; 171 } 172 173 /** 174 * Get the range values actually used (missing removed). 175 * 176 * @return Range values used. 177 */ 178 public double[][] getValues() { 179 return values_x; 180 } 181 182 private double[] removeMissing(double[] vals) { 183 int num = vals.length; 184 int cnt = 0; 185 int[] good = new int[num]; 186 for (int k = 0; k < num; k++) { 187 if (!(Double.isNaN(vals[k]))) { 188 good[cnt] = k; 189 cnt++; 190 } 191 } 192 193 if (cnt == num) { 194 return vals; 195 } 196 197 double[] newVals = new double[cnt]; 198 for (int k = 0; k < cnt; k++) { 199 newVals[k] = vals[good[k]]; 200 } 201 202 return newVals; 203 } 204 205 private double[][] removeMissing(double[][] vals) { 206 int tupLen = vals.length; 207 double[][] newVals = new double[tupLen][]; 208 for (int k = 0; k < tupLen; k++) { 209 newVals[k] = removeMissing(vals[k]); 210 } 211 return newVals; 212 } 213 214 public Data mean() throws VisADException, RemoteException { 215 double[] stats = new double[rngTupLen]; 216 for (int k = 0; k < rngTupLen; k++) { 217 stats[k] = descriptiveStats[k].getMean(); 218 } 219 return makeStat(stats); 220 } 221 222 public Data geometricMean() throws VisADException, RemoteException { 223 double[] stats = new double[rngTupLen]; 224 for (int k = 0; k < rngTupLen; k++) { 225 stats[k] = descriptiveStats[k].getGeometricMean(); 226 } 227 return makeStat(stats); 228 } 229 230 231 public Data max() throws VisADException, RemoteException { 232 double[] stats = new double[rngTupLen]; 233 for (int k = 0; k < rngTupLen; k++) { 234 stats[k] = descriptiveStats[k].getMax(); 235 } 236 return makeStat(stats); 237 } 238 239 public Data min() throws VisADException, RemoteException { 240 double[] stats = new double[rngTupLen]; 241 for (int k = 0; k < rngTupLen; k++) { 242 stats[k] = descriptiveStats[k].getMin(); 243 } 244 return makeStat(stats); 245 } 246 247 public Data median() throws VisADException, RemoteException { 248 double[] stats = new double[rngTupLen]; 249 for (int k = 0; k < rngTupLen; k++) { 250 stats[k] = descriptiveStats[k].getPercentile(50.0); 251 } 252 return makeStat(stats); 253 } 254 255 public Data percentile(double p) throws VisADException, RemoteException { 256 double[] stats = new double[rngTupLen]; 257 for (int k = 0; k < rngTupLen; k++) { 258 stats[k] = descriptiveStats[k].getPercentile(p); 259 } 260 return makeStat(stats); 261 } 262 263 public Data variance() throws VisADException, RemoteException { 264 double[] stats = new double[rngTupLen]; 265 for (int k = 0; k < rngTupLen; k++) { 266 stats[k] = descriptiveStats[k].getVariance(); 267 } 268 return makeStat(stats); 269 } 270 271 public Data kurtosis() throws VisADException, RemoteException { 272 double[] stats = new double[rngTupLen]; 273 for (int k = 0; k < rngTupLen; k++) { 274 stats[k] = descriptiveStats[k].getKurtosis(); 275 } 276 return makeStat(stats); 277 } 278 279 public Data standardDeviation() throws VisADException, RemoteException { 280 double[] stats = new double[rngTupLen]; 281 for (int k = 0; k < rngTupLen; k++) { 282 stats[k] = descriptiveStats[k].getStandardDeviation(); 283 } 284 return makeStat(stats); 285 } 286 287 public Data skewness() throws VisADException, RemoteException { 288 double[] stats = new double[rngTupLen]; 289 for (int k = 0; k < rngTupLen; k++) { 290 stats[k] = descriptiveStats[k].getSkewness(); 291 } 292 return makeStat(stats); 293 } 294 295 public Data correlation(FlatField fltFld) 296 throws VisADException, RemoteException { 297 double[][] values_x = this.rngVals; 298 double[][] values_y = fltFld.getValues(false); 299 300 if (values_y.length != rngTupLen) { 301 throw new VisADException("fields must have same range tuple length"); 302 } 303 304 double[] stats = new double[rngTupLen]; 305 306 for (int k = 0; k < rngTupLen; k++) { 307 double[][] newVals = removeMissingAND(values_x[k], values_y[k]); 308 stats[k] = pCorrelation.correlation(newVals[0], newVals[1]); 309 } 310 311 return makeStat(stats); 312 } 313 314 private Data makeStat(double[] stats) 315 throws VisADException, RemoteException { 316 if (statType instanceof RealType) { 317 return new Real((RealType) statType, stats[0]); 318 } else if (statType instanceof RealTupleType) { 319 return new RealTuple((RealTupleType) statType, stats); 320 } 321 return null; 322 } 323 324 private double[][] removeMissingAND(double[] vals_x, double[] vals_y) { 325 int cnt = 0; 326 int[] good = new int[vals_x.length]; 327 for (int k = 0; k < vals_x.length; k++) { 328 if (!(Double.isNaN(vals_x[k])) && !(Double.isNaN(vals_y[k]))) { 329 good[cnt] = k; 330 cnt++; 331 } 332 } 333 334 if (cnt == vals_x.length) { 335 return new double[][]{vals_x, vals_y}; 336 } else { 337 double[] newVals_x = new double[cnt]; 338 double[] newVals_y = new double[cnt]; 339 for (int k = 0; k < cnt; k++) { 340 newVals_x[k] = vals_x[good[k]]; 341 newVals_y[k] = vals_y[good[k]]; 342 } 343 return new double[][]{newVals_x, newVals_y}; 344 } 345 } 346 347 /** 348 * Creates a {@literal "description"} of any {@link FlatField FlatFields} 349 * in {@code params}. 350 * 351 * <p>This is mostly useful from within the Jython Shell.</p> 352 * 353 * <p>Some notes about {@code params}: 354 * <ul> 355 * <li>Understands {@code FlatField} and {@code String} objects.</li> 356 * <li>Strings must be found within {@link DescribeParams}.</li> 357 * <li>Strings control descriptions returned for all fields in 358 * {@code params}.</li> 359 * <li>{@code FlatField} and {@code String} objects may appear in any order.</li> 360 * </ul> 361 * </p> 362 * 363 * @param params See description of this method. If {@code null} or empty, 364 * nothing will happen. 365 * 366 * @return String descriptions of any {@code FlatField} objects in 367 * {@code params}, with relevant strings in {@code params} 368 * controlling what shows up in all descriptions. 369 * 370 * @throws VisADException if VisAD had problems. 371 * @throws RemoteException if VisAD had problems. 372 */ 373 public static String describe(Object... params) 374 throws VisADException, RemoteException { 375 String result = ""; 376 if (params != null) { 377 List<FlatField> fields = new ArrayList<>(params.length); 378 List<String> descs = new ArrayList<>(params.length); 379 for (Object param : params) { 380 if (param instanceof FlatField) { 381 fields.add((FlatField) param); 382 } else if (param instanceof String) { 383 descs.add((String) param); 384 } 385 } 386 387 // 350 is typical size of a single, "complete" describe call with 388 // one field. 389 StringBuilder buf = new StringBuilder(350 * descs.size()); 390 for (FlatField field : fields) { 391 Description d = new Description(field, descs); 392 buf.append(d.makeDescription()); 393 } 394 result = buf.toString(); 395 } 396 return result; 397 } 398 399 /** 400 * Creates a {@literal "binned sparkline"} of the given 401 * {@link FlatField FlatFields}. 402 * 403 * @param fields {@code FlatField} objects to {@literal "visualize"} with 404 * sparklines. 405 * 406 * @return String sparkline for each {@code FlatField} in {@code fields}. 407 * 408 * @throws VisADException if VisAD had problems. 409 * @throws RemoteException if VisAD had problems. 410 * 411 * @see <a href="https://en.wikipedia.org/wiki/Sparkline" target="_top">Sparkline Wikipedia Article</a> 412 */ 413 public static String sparkline(FlatField... fields) 414 throws VisADException, RemoteException 415 { 416 // assuming sparkline is only using 20 bins 417 StringBuilder sb = new StringBuilder(25 * fields.length); 418 for (FlatField field : fields) { 419 Statistics s = new Statistics(field); 420 sb.append(sparkline(field, s)).append('\n'); 421 } 422 return sb.toString(); 423 } 424 425 public static Long[] histogram(FlatField field, int bins) 426 throws VisADException { 427 Long[] histogram = new Long[bins]; 428 EmpiricalDistribution distribution = new EmpiricalDistribution(bins); 429 distribution.load(field.getValues(false)[0]); 430 int k = 0; 431 for (SummaryStatistics stats : distribution.getBinStats()) { 432 histogram[k++] = stats.getN(); 433 } 434 return histogram; 435 } 436 437 public static String sparkline(FlatField field, Statistics s) 438 throws VisADException, RemoteException 439 { 440 Long[] values = histogram(field, 20); 441 Real sMin = (Real) s.min(); 442 Real sMax = (Real) s.max(); 443 Collection<Long> collection = asList(values); 444 long max = Collections.max(collection); 445 long min = Collections.min(collection); 446 float scale = (max - min) / 7f; 447 final StringBuilder buf = new StringBuilder(values.length); 448 449 // TJJ Mar 2018 - sandwich with min/max 450 // http://mcidas.ssec.wisc.edu/inquiry-v/?inquiry=2548 451 buf.append(fmtMe((sMin).getValue())); 452 for (Long value : values) { 453 int index = Math.round((value - min) / scale); 454 buf.append(CHARS.get(index)); 455 } 456 buf.append(fmtMe((sMax).getValue())); 457 458 return buf.toString(); 459 } 460 461 private static EnumSet<DescribeParams> parseParams(List<String> ps) { 462 Set<DescribeParams> params = Collections.emptySet(); 463 if (ps != null) { 464 params = new HashSet<>(ps.size()); 465 for (String p : ps) { 466 params.add(DescribeParams.valueOf(p.toUpperCase())); 467 } 468 } 469 return EnumSet.copyOf(params); 470 } 471 472 public static class Description { 473 474 private final FlatField field; 475 476 private final EnumSet<DescribeParams> params; 477 478 public Description(FlatField field, List<String> params) { 479 this.field = field; 480 if ((params == null) || params.isEmpty()) { 481 this.params = EnumSet.allOf(DescribeParams.class); 482 } else { 483 this.params = parseParams(params); 484 } 485 } 486 487 public String makeDescription() 488 throws VisADException, RemoteException { 489 StringBuilder sb = new StringBuilder(1024); 490 Statistics s = new Statistics(field); 491 double max = ((Real) s.max()).getValue(); 492 double min = ((Real) s.min()).getValue(); 493 double q1 = ((Real) s.percentile(25.0)).getValue(); 494 double q3 = ((Real) s.percentile(75.0)).getValue(); 495 double[] modes = StatUtils.mode(field.getValues(false)[0]); 496 497 StringBuilder tmp = new StringBuilder(128); 498 for (int i = 0; i < modes.length; i++) { 499 tmp.append(fmtMe(modes[i])); 500 if ((i + 1) < modes.length) { 501 tmp.append(", "); 502 } 503 } 504 505 String temp; 506 char endl = '\n'; 507 if (params.contains(DescribeParams.HISTOGRAM)) { 508 temp = sparkline(field, s); 509 sb.append("Histogram : ").append(temp).append(endl); 510 } 511 if (params.contains(DescribeParams.LENGTH)) { 512 temp = String.format("%d", s.numPoints()); 513 sb.append("Length : ").append(temp).append(endl); 514 } 515 if (params.contains(DescribeParams.MIN)) { 516 temp = fmtMe(((Real) s.min()).getValue()); 517 sb.append("Min : ").append(temp).append(endl); 518 } 519 if (params.contains(DescribeParams.MAX)) { 520 temp = fmtMe(((Real) s.max()).getValue()); 521 sb.append("Max : ").append(temp).append(endl); 522 } 523 if (params.contains(DescribeParams.RANGE)) { 524 temp = fmtMe(max - min); 525 sb.append("Range : ").append(temp).append(endl); 526 } 527 if (params.contains(DescribeParams.Q1)) { 528 sb.append("Q1 : ").append(fmtMe(q1)).append(endl); 529 } 530 if (params.contains(DescribeParams.Q2)) { 531 temp = fmtMe(((Real) s.percentile(50.0)).getValue()); 532 sb.append("Q2 : ").append(temp).append(endl); 533 } 534 if (params.contains(DescribeParams.Q3)) { 535 sb.append("Q3 : ").append(fmtMe(q3)).append(endl); 536 } 537 if (params.contains(DescribeParams.IQR)) { 538 temp = fmtMe(q3 - q1); 539 sb.append("IQR : ").append(temp).append(endl); 540 } 541 if (params.contains(DescribeParams.MEAN)) { 542 temp = fmtMe(((Real) s.mean()).getValue()); 543 sb.append("Mean : ").append(temp).append(endl); 544 } 545 if (params.contains(DescribeParams.MODE)) { 546 sb.append("Mode : ").append(tmp).append(endl); 547 } 548 if (params.contains(DescribeParams.KURTOSIS)) { 549 temp = fmtMe(((Real) s.kurtosis()).getValue()); 550 sb.append("Kurtosis : ").append(temp).append(endl); 551 } 552 if (params.contains(DescribeParams.SKEWNESS)) { 553 temp = fmtMe(((Real) s.skewness()).getValue()); 554 sb.append("Skewness : ").append(temp).append(endl); 555 } 556 if (params.contains(DescribeParams.STDDEV)) { 557 temp = fmtMe(((Real) s.standardDeviation()).getValue()); 558 sb.append("Std Dev : ").append(temp).append(endl); 559 } 560 if (params.contains(DescribeParams.VARIANCE)) { 561 temp = fmtMe(((Real) s.variance()).getValue()); 562 sb.append("Variance : ").append(temp).append(endl); 563 } 564 if (params.contains(DescribeParams.GOODPTS)) { 565 temp = String.format("%d", s.getNumGoodPoints()[0]); 566 sb.append("# Good Pts: ").append(temp).append(endl); 567 } 568 return sb.toString(); 569 } 570 } 571}