001/* 002 * This file is part of McIDAS-V 003 * 004 * Copyright 2007-2016 005 * Space Science and Engineering Center (SSEC) 006 * University of Wisconsin - Madison 007 * 1225 W. Dayton Street, Madison, WI 53706, USA 008 * https://www.ssec.wisc.edu/mcidas 009 * 010 * All Rights Reserved 011 * 012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and 013 * some McIDAS-V source code is based on IDV and VisAD source code. 014 * 015 * McIDAS-V is free software; you can redistribute it and/or modify 016 * it under the terms of the GNU Lesser Public License as published by 017 * the Free Software Foundation; either version 3 of the License, or 018 * (at your option) any later version. 019 * 020 * McIDAS-V is distributed in the hope that it will be useful, 021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 023 * GNU Lesser Public License for more details. 024 * 025 * You should have received a copy of the GNU Lesser Public License 026 * along with this program. If not, see http://www.gnu.org/licenses. 027 */ 028package edu.wisc.ssec.mcidasv.data.hydra; 029 030import static edu.wisc.ssec.mcidasv.data.StatsTable.fmtMe; 031import static java.util.Arrays.asList; 032 033import java.rmi.RemoteException; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.Collections; 037import java.util.List; 038 039import org.apache.commons.math3.random.EmpiricalDistribution; 040import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; 041import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 042import org.apache.commons.math3.stat.StatUtils; 043 044import org.apache.commons.math3.stat.descriptive.SummaryStatistics; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047import visad.Data; 048import visad.FlatField; 049import visad.FunctionType; 050import visad.MathType; 051import visad.Real; 052import visad.RealTuple; 053import visad.RealTupleType; 054import visad.RealType; 055import visad.TupleType; 056import visad.VisADException; 057 058public class Statistics { 059 060 private static final List<Character> CHARS = asList('\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586', '\u2587', '\u2588'); 061 062 DescriptiveStatistics[] descriptiveStats = null; 063 double[][] values_x; 064 double[][] rngVals; 065 int rngTupLen; 066 int numPoints; 067 int[] numGoodPoints; 068 MathType statType; 069 070 PearsonsCorrelation pCorrelation = null; 071 072 073 public Statistics(FlatField fltFld) throws VisADException, RemoteException { 074 rngVals = fltFld.getValues(false); 075 rngTupLen = rngVals.length; 076 numPoints = fltFld.getDomainSet().getLength(); 077 numGoodPoints = new int[rngTupLen]; 078 079 values_x = new double[rngTupLen][]; 080 081 for (int k=0; k<rngTupLen; k++) { 082 values_x[k] = removeMissing(rngVals[k]); 083 numGoodPoints[k] = values_x[k].length; 084 } 085 086 descriptiveStats = new DescriptiveStatistics[rngTupLen]; 087 for (int k=0; k<rngTupLen; k++) { 088 descriptiveStats[k] = new DescriptiveStatistics(values_x[k]); 089 } 090 091 MathType rangeType = ((FunctionType)fltFld.getType()).getRange(); 092 093 if (rangeType instanceof RealTupleType) { 094 RealType[] rttypes = ((TupleType)rangeType).getRealComponents(); 095 if (rngTupLen > 1) { 096 statType = new RealTupleType(rttypes); 097 } 098 else { 099 statType = (RealType) rttypes[0]; 100 } 101 } 102 else if (rangeType instanceof RealType) { 103 statType = (RealType) rangeType; 104 } 105 else { 106 throw new VisADException("incoming type must be RealTupleType or RealType"); 107 } 108 109 pCorrelation = new PearsonsCorrelation(); 110 } 111 112 /** get the number of points in the domain of the FlatField 113 * 114 * @return number of points 115 */ 116 public int numPoints() { 117 return numPoints; 118 } 119 120 /** get the number of non-missing points in each range component 121 * 122 * @return number of non-missing points 123 */ 124 public int[] getNumGoodPoints() { 125 return numGoodPoints; 126 } 127 128 /* get the original range values 129 * 130 *@return the original range values 131 */ 132 public double[][] getRngVals() { 133 return rngVals; 134 } 135 136 /* get the range values actually used (missing removed) 137 * 138 *@return range values used 139 */ 140 141 public double[][] getValues() { 142 return values_x; 143 } 144 145 private double[] removeMissing(double[] vals) { 146 int num = vals.length; 147 int cnt = 0; 148 int[] good = new int[num]; 149 for (int k=0; k<num; k++) { 150 if ( !(Double.isNaN(vals[k])) ) { 151 good[cnt] = k; 152 cnt++; 153 } 154 } 155 156 if (cnt == num) { 157 return vals; 158 } 159 160 double[] newVals = new double[cnt]; 161 for (int k=0; k<cnt; k++) { 162 newVals[k] = vals[good[k]]; 163 } 164 165 return newVals; 166 } 167 168 private double[][] removeMissing(double[][] vals) { 169 int tupLen = vals.length; 170 double[][] newVals = new double[tupLen][]; 171 for (int k=0; k < tupLen; k++) { 172 newVals[k] = removeMissing(vals[k]); 173 } 174 return newVals; 175 } 176 177 public Data mean() throws VisADException, RemoteException { 178 double[] stats = new double[rngTupLen]; 179 for (int k=0; k<rngTupLen; k++) { 180 stats[k] = descriptiveStats[k].getMean(); 181 } 182 return makeStat(stats); 183 } 184 185 public Data geometricMean() throws VisADException, RemoteException { 186 double[] stats = new double[rngTupLen]; 187 for (int k=0; k<rngTupLen; k++) { 188 stats[k] = descriptiveStats[k].getGeometricMean(); 189 } 190 return makeStat(stats); 191 } 192 193 194 public Data max() throws VisADException, RemoteException { 195 double[] stats = new double[rngTupLen]; 196 for (int k=0; k<rngTupLen; k++) { 197 stats[k] = descriptiveStats[k].getMax(); 198 } 199 return makeStat(stats); 200 } 201 202 public Data min() throws VisADException, RemoteException { 203 double[] stats = new double[rngTupLen]; 204 for (int k=0; k<rngTupLen; k++) { 205 stats[k] = descriptiveStats[k].getMin(); 206 } 207 return makeStat(stats); 208 } 209 210 public Data median() throws VisADException, RemoteException { 211 double[] stats = new double[rngTupLen]; 212 for (int k=0; k<rngTupLen; k++) { 213 stats[k] = descriptiveStats[k].getPercentile(50.0); 214 } 215 return makeStat(stats); 216 } 217 218 public Data percentile(double p) throws VisADException, RemoteException { 219 double[] stats = new double[rngTupLen]; 220 for (int k=0; k<rngTupLen; k++) { 221 stats[k] = descriptiveStats[k].getPercentile(p); 222 } 223 return makeStat(stats); 224 } 225 226 public Data variance() throws VisADException, RemoteException { 227 double[] stats = new double[rngTupLen]; 228 for (int k=0; k<rngTupLen; k++) { 229 stats[k] = descriptiveStats[k].getVariance(); 230 } 231 return makeStat(stats); 232 } 233 234 public Data kurtosis() throws VisADException, RemoteException { 235 double[] stats = new double[rngTupLen]; 236 for (int k=0; k<rngTupLen; k++) { 237 stats[k] = descriptiveStats[k].getKurtosis(); 238 } 239 return makeStat(stats); 240 } 241 242 public Data standardDeviation() throws VisADException, RemoteException { 243 double[] stats = new double[rngTupLen]; 244 for (int k=0; k<rngTupLen; k++) { 245 stats[k] = descriptiveStats[k].getStandardDeviation(); 246 } 247 return makeStat(stats); 248 } 249 250 public Data skewness() throws VisADException, RemoteException { 251 double[] stats = new double[rngTupLen]; 252 for (int k=0; k<rngTupLen; k++) { 253 stats[k] = descriptiveStats[k].getSkewness(); 254 } 255 return makeStat(stats); 256 } 257 258 public Data correlation(FlatField fltFld) throws VisADException, RemoteException { 259 double[][] values_x = this.rngVals; 260 double[][] values_y = fltFld.getValues(false); 261 262 if (values_y.length != rngTupLen) { 263 throw new VisADException("both fields must have same range tuple length"); 264 } 265 266 double[] stats = new double[rngTupLen]; 267 268 for (int k=0; k<rngTupLen; k++) { 269 double[][] newVals = removeMissingAND(values_x[k], values_y[k]); 270 stats[k] = pCorrelation.correlation(newVals[0], newVals[1]); 271 } 272 273 return makeStat(stats); 274 } 275 276 private Data makeStat(double[] stats) throws VisADException, RemoteException { 277 if (statType instanceof RealType) { 278 return new Real((RealType)statType, stats[0]); 279 } 280 else if (statType instanceof RealTupleType) { 281 return new RealTuple((RealTupleType)statType, stats); 282 } 283 return null; 284 } 285 286 private double[][] removeMissingAND(double[] vals_x, double[] vals_y) { 287 int cnt = 0; 288 int[] good = new int[vals_x.length]; 289 for (int k=0; k<vals_x.length; k++) { 290 if ( !(Double.isNaN(vals_x[k])) && !(Double.isNaN(vals_y[k])) ) { 291 good[cnt] = k; 292 cnt++; 293 } 294 } 295 296 if (cnt == vals_x.length) { 297 return new double[][] {vals_x, vals_y}; 298 } 299 else { 300 double[] newVals_x = new double[cnt]; 301 double[] newVals_y = new double[cnt]; 302 for (int k=0; k<cnt; k++) { 303 newVals_x[k] = vals_x[good[k]]; 304 newVals_y[k] = vals_y[good[k]]; 305 } 306 return new double[][] {newVals_x, newVals_y}; 307 } 308 } 309 310 public static Long[] histogram(FlatField field, int bins) throws VisADException, RemoteException { 311 Long[] histogram = new Long[bins]; 312 EmpiricalDistribution distribution = new EmpiricalDistribution(bins); 313 distribution.load(field.getValues(false)[0]); 314 int k = 0; 315 for (SummaryStatistics stats: distribution.getBinStats()) { 316 histogram[k++] = stats.getN(); 317 } 318 return histogram; 319 } 320 321 private static final Logger logger = LoggerFactory.getLogger(Statistics.class); 322 323 public static String describe(FlatField field) throws VisADException, RemoteException { 324 StringBuilder sb = new StringBuilder(1024); 325 Statistics s = new Statistics(field); 326 double max = ((Real)s.max()).getValue(); 327 double min = ((Real)s.min()).getValue(); 328 double q1 = ((Real)s.percentile(25.0)).getValue(); 329 double q3 = ((Real)s.percentile(75.0)).getValue(); 330 double[] modes = StatUtils.mode(field.getValues(false)[0]); 331 332 StringBuilder tmp = new StringBuilder(128); 333 for (int i = 0; i < modes.length; i++) { 334 tmp.append(fmtMe(modes[i])); 335 if ((i+1) < modes.length) { 336 tmp.append(", "); 337 } 338 } 339 340 char endl = '\n'; 341 sb.append("Histogram : ").append(sparkline(field)).append(endl) 342 .append("Length : ").append(String.format("%d", s.numPoints())).append(endl) 343 .append("Min : ").append(fmtMe(((Real) s.min()).getValue())).append(endl) 344 .append("Max : ").append(fmtMe(((Real) s.max()).getValue())).append(endl) 345 .append("Range : ").append(fmtMe(max - min)).append(endl) 346 .append("Q1 : ").append(fmtMe(q1)).append(endl) 347 .append("Q2 : ").append(fmtMe(((Real)s.percentile(50.0)).getValue())).append(endl) 348 .append("Q3 : ").append(fmtMe(q3)).append(endl) 349 .append("IQR : ").append(fmtMe(q3 - q1)).append(endl) 350 .append("Mean : ").append(fmtMe(((Real)s.mean()).getValue())).append(endl) 351 .append("Mode : ").append(tmp.toString()).append(endl) 352 .append("Kurtosis : ").append(fmtMe(((Real)s.kurtosis()).getValue())).append(endl) 353 .append("Skewness : ").append(fmtMe(((Real)s.skewness()).getValue())).append(endl) 354 .append("Std Dev : ").append(fmtMe(((Real)s.standardDeviation()).getValue())).append(endl) 355 .append("Variance : ").append(fmtMe(((Real)s.variance()).getValue())).append(endl); 356 return sb.toString(); 357 } 358 359 public static String describe(FlatField... fields) throws VisADException, RemoteException { 360 // 350 is just slightly more than required 361 StringBuilder buf = new StringBuilder(350 * fields.length); 362 for (FlatField field : fields) { 363 buf.append(describe(field)).append('\n'); 364 } 365 return buf.toString(); 366 } 367 368 public static String sparkline(FlatField field) throws VisADException, RemoteException { 369 Long[] values = histogram(field, 20); 370 Collection<Long> collection = asList(values); 371 long max = Collections.max(collection); 372 long min = Collections.min(collection); 373 float scale = (max - min) / 7f; 374 final StringBuilder buf = new StringBuilder(values.length); 375 for (Long value : values) { 376 int index = Math.round((value - min) / scale); 377 buf.append(CHARS.get(index)); 378 } 379 return buf.toString(); 380 } 381 382 public static String sparkline(FlatField... fields) throws VisADException, RemoteException { 383 // assumming sparkline is only using 20 bins 384 StringBuilder sb = new StringBuilder(25 * fields.length); 385 for (FlatField field : fields) { 386 sb.append(sparkline(field)).append('\n'); 387 } 388 return sb.toString(); 389 } 390}