001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2016
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see http://www.gnu.org/licenses.
027 */
028package edu.wisc.ssec.mcidasv.data.hydra;
029
030import static edu.wisc.ssec.mcidasv.data.StatsTable.fmtMe;
031import static java.util.Arrays.asList;
032
033import java.rmi.RemoteException;
034import java.util.Arrays;
035import java.util.Collection;
036import java.util.Collections;
037import java.util.List;
038
039import org.apache.commons.math3.random.EmpiricalDistribution;
040import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
041import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
042import org.apache.commons.math3.stat.StatUtils;
043
044import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047import visad.Data;
048import visad.FlatField;
049import visad.FunctionType;
050import visad.MathType;
051import visad.Real;
052import visad.RealTuple;
053import visad.RealTupleType;
054import visad.RealType;
055import visad.TupleType;
056import visad.VisADException;
057
058public class Statistics {
059
060    private static final List<Character> CHARS = asList('\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586', '\u2587', '\u2588');
061
062   DescriptiveStatistics[] descriptiveStats = null;
063   double[][] values_x;
064   double[][] rngVals;
065   int rngTupLen;
066   int numPoints;
067   int[] numGoodPoints;
068   MathType statType;
069
070   PearsonsCorrelation pCorrelation = null;
071
072
073   public Statistics(FlatField fltFld) throws VisADException, RemoteException {
074      rngVals = fltFld.getValues(false);
075      rngTupLen = rngVals.length;
076      numPoints = fltFld.getDomainSet().getLength();
077      numGoodPoints = new int[rngTupLen];
078
079      values_x = new double[rngTupLen][];
080
081      for (int k=0; k<rngTupLen; k++) {
082        values_x[k] = removeMissing(rngVals[k]);
083        numGoodPoints[k] = values_x[k].length;
084      }
085
086      descriptiveStats = new DescriptiveStatistics[rngTupLen];
087      for (int k=0; k<rngTupLen; k++) {
088        descriptiveStats[k] = new DescriptiveStatistics(values_x[k]);
089      }
090
091      MathType rangeType = ((FunctionType)fltFld.getType()).getRange();
092
093      if (rangeType instanceof RealTupleType) {
094        RealType[] rttypes = ((TupleType)rangeType).getRealComponents();
095        if (rngTupLen > 1) {
096          statType = new RealTupleType(rttypes);
097        }
098        else {
099          statType = (RealType) rttypes[0];
100        }
101      }
102      else if (rangeType instanceof RealType) {
103        statType = (RealType) rangeType;
104      }
105      else {
106         throw new VisADException("incoming type must be RealTupleType or RealType");
107      }
108
109      pCorrelation = new PearsonsCorrelation();
110   }
111
112   /** get the number of points in the domain of the FlatField
113   *
114   * @return number of points
115   */
116   public int numPoints() {
117     return numPoints;
118   }
119
120   /** get the number of non-missing points in each range component
121   *
122   * @return number of non-missing points
123   */
124   public int[] getNumGoodPoints() {
125     return numGoodPoints;
126   }
127
128   /* get the original range values
129   *
130   *@return the original range values
131   */
132   public double[][] getRngVals() {
133     return rngVals;
134   }
135
136   /* get the range values actually used (missing removed)
137   *
138   *@return range values used
139   */
140
141   public double[][] getValues() {
142     return values_x;
143   }
144
145   private double[] removeMissing(double[] vals) {
146     int num = vals.length;
147     int cnt = 0;
148     int[] good = new int[num];
149     for (int k=0; k<num; k++) {
150        if ( !(Double.isNaN(vals[k])) ) {
151          good[cnt] = k;
152          cnt++;
153        }
154     }
155
156     if (cnt == num) {
157        return vals;
158     }
159
160     double[] newVals = new double[cnt];
161     for (int k=0; k<cnt; k++) {
162       newVals[k] = vals[good[k]];
163     }
164
165     return newVals;
166   }
167
168   private double[][] removeMissing(double[][] vals) {
169     int tupLen = vals.length;
170     double[][] newVals = new double[tupLen][];
171     for (int k=0; k < tupLen; k++) {
172        newVals[k] = removeMissing(vals[k]);
173     }
174     return newVals;
175   }
176
177   public Data mean() throws VisADException, RemoteException {
178     double[] stats = new double[rngTupLen];
179     for (int k=0; k<rngTupLen; k++) {
180       stats[k] = descriptiveStats[k].getMean();
181     }
182     return makeStat(stats);
183   }
184
185   public Data geometricMean() throws VisADException, RemoteException {
186     double[] stats = new double[rngTupLen];
187     for (int k=0; k<rngTupLen; k++) {
188       stats[k] = descriptiveStats[k].getGeometricMean();
189     }
190     return makeStat(stats);
191   }
192
193
194   public Data max() throws VisADException, RemoteException {
195     double[] stats = new double[rngTupLen];
196     for (int k=0; k<rngTupLen; k++) {
197       stats[k] = descriptiveStats[k].getMax();
198     }
199     return makeStat(stats);
200   }
201
202   public Data min() throws VisADException, RemoteException {
203     double[] stats = new double[rngTupLen];
204     for (int k=0; k<rngTupLen; k++) {
205       stats[k] = descriptiveStats[k].getMin();
206     }
207     return makeStat(stats);
208   }
209
210   public Data median() throws VisADException, RemoteException {
211     double[] stats = new double[rngTupLen];
212     for (int k=0; k<rngTupLen; k++) {
213       stats[k] = descriptiveStats[k].getPercentile(50.0);
214     }
215     return makeStat(stats);
216   }
217
218    public Data percentile(double p) throws VisADException, RemoteException {
219        double[] stats = new double[rngTupLen];
220        for (int k=0; k<rngTupLen; k++) {
221            stats[k] = descriptiveStats[k].getPercentile(p);
222        }
223        return makeStat(stats);
224    }
225
226   public Data variance() throws VisADException, RemoteException {
227     double[] stats = new double[rngTupLen];
228     for (int k=0; k<rngTupLen; k++) {
229       stats[k] = descriptiveStats[k].getVariance();
230     }
231     return makeStat(stats);
232   }
233
234   public Data kurtosis() throws VisADException, RemoteException {
235     double[] stats = new double[rngTupLen];
236     for (int k=0; k<rngTupLen; k++) {
237       stats[k] = descriptiveStats[k].getKurtosis();
238     }
239     return makeStat(stats);
240   }
241
242   public Data standardDeviation() throws VisADException, RemoteException {
243     double[] stats = new double[rngTupLen];
244     for (int k=0; k<rngTupLen; k++) {
245       stats[k] = descriptiveStats[k].getStandardDeviation();
246     }
247     return makeStat(stats);
248   }
249
250   public Data skewness() throws VisADException, RemoteException {
251     double[] stats = new double[rngTupLen];
252     for (int k=0; k<rngTupLen; k++) {
253       stats[k] = descriptiveStats[k].getSkewness();
254     }
255     return makeStat(stats);
256   }
257
258   public Data correlation(FlatField fltFld) throws VisADException, RemoteException {
259     double[][] values_x = this.rngVals;
260     double[][] values_y = fltFld.getValues(false);
261
262     if (values_y.length != rngTupLen) {
263       throw new VisADException("both fields must have same range tuple length");
264     }
265
266     double[] stats = new double[rngTupLen];
267     
268     for (int k=0; k<rngTupLen; k++) {
269       double[][] newVals = removeMissingAND(values_x[k], values_y[k]);
270       stats[k] = pCorrelation.correlation(newVals[0], newVals[1]);
271     }
272
273     return makeStat(stats);
274   }
275
276   private Data makeStat(double[] stats) throws VisADException, RemoteException {
277     if (statType instanceof RealType) {
278       return new Real((RealType)statType, stats[0]);
279     }
280     else if (statType instanceof RealTupleType) {
281       return new RealTuple((RealTupleType)statType, stats);
282     }
283     return null;
284   }
285
286   private double[][] removeMissingAND(double[] vals_x, double[] vals_y) {
287     int cnt = 0;
288     int[] good = new int[vals_x.length];
289     for (int k=0; k<vals_x.length; k++) {
290       if ( !(Double.isNaN(vals_x[k])) && !(Double.isNaN(vals_y[k]))  ) {
291         good[cnt] = k;
292         cnt++;
293       }
294     }
295
296     if (cnt == vals_x.length) {
297       return new double[][] {vals_x, vals_y};
298     }
299     else {
300       double[] newVals_x = new double[cnt];
301       double[] newVals_y = new double[cnt];
302       for (int k=0; k<cnt; k++) {
303         newVals_x[k] = vals_x[good[k]];
304         newVals_y[k] = vals_y[good[k]];
305       }
306       return new double[][] {newVals_x, newVals_y};
307     }
308   }
309
310    public static Long[] histogram(FlatField field, int bins) throws VisADException, RemoteException {
311        Long[] histogram = new Long[bins];
312        EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
313        distribution.load(field.getValues(false)[0]);
314        int k = 0;
315        for (SummaryStatistics stats: distribution.getBinStats()) {
316            histogram[k++] = stats.getN();
317        }
318        return histogram;
319    }
320
321    private static final Logger logger = LoggerFactory.getLogger(Statistics.class);
322
323    public static String describe(FlatField field) throws VisADException, RemoteException {
324        StringBuilder sb = new StringBuilder(1024);
325        Statistics s = new Statistics(field);
326        double max = ((Real)s.max()).getValue();
327        double min = ((Real)s.min()).getValue();
328        double q1 = ((Real)s.percentile(25.0)).getValue();
329        double q3 = ((Real)s.percentile(75.0)).getValue();
330        double[] modes = StatUtils.mode(field.getValues(false)[0]);
331
332        StringBuilder tmp = new StringBuilder(128);
333        for (int i = 0; i < modes.length; i++) {
334            tmp.append(fmtMe(modes[i]));
335            if ((i+1) < modes.length) {
336                tmp.append(", ");
337            }
338        }
339
340        char endl = '\n';
341        sb.append("Histogram :  ").append(sparkline(field)).append(endl)
342            .append("Length    :  ").append(String.format("%d", s.numPoints())).append(endl)
343            .append("Min       :  ").append(fmtMe(((Real) s.min()).getValue())).append(endl)
344            .append("Max       :  ").append(fmtMe(((Real) s.max()).getValue())).append(endl)
345            .append("Range     :  ").append(fmtMe(max - min)).append(endl)
346            .append("Q1        :  ").append(fmtMe(q1)).append(endl)
347            .append("Q2        :  ").append(fmtMe(((Real)s.percentile(50.0)).getValue())).append(endl)
348            .append("Q3        :  ").append(fmtMe(q3)).append(endl)
349            .append("IQR       :  ").append(fmtMe(q3 - q1)).append(endl)
350            .append("Mean      :  ").append(fmtMe(((Real)s.mean()).getValue())).append(endl)
351            .append("Mode      :  ").append(tmp.toString()).append(endl)
352            .append("Kurtosis  :  ").append(fmtMe(((Real)s.kurtosis()).getValue())).append(endl)
353            .append("Skewness  :  ").append(fmtMe(((Real)s.skewness()).getValue())).append(endl)
354            .append("Std Dev   :  ").append(fmtMe(((Real)s.standardDeviation()).getValue())).append(endl)
355            .append("Variance  :  ").append(fmtMe(((Real)s.variance()).getValue())).append(endl);
356        return sb.toString();
357    }
358
359    public static String describe(FlatField... fields) throws VisADException, RemoteException {
360        // 350 is just slightly more than required
361        StringBuilder buf = new StringBuilder(350 * fields.length);
362        for (FlatField field : fields) {
363            buf.append(describe(field)).append('\n');
364        }
365        return buf.toString();
366    }
367
368    public static String sparkline(FlatField field) throws VisADException, RemoteException {
369        Long[] values = histogram(field, 20);
370        Collection<Long> collection = asList(values);
371        long max = Collections.max(collection);
372        long min = Collections.min(collection);
373        float scale = (max - min) / 7f;
374        final StringBuilder buf = new StringBuilder(values.length);
375        for (Long value : values) {
376            int index = Math.round((value - min) / scale);
377            buf.append(CHARS.get(index));
378        }
379        return buf.toString();
380    }
381
382    public static String sparkline(FlatField... fields) throws VisADException, RemoteException {
383        // assumming sparkline is only using 20 bins
384        StringBuilder sb = new StringBuilder(25 * fields.length);
385        for (FlatField field : fields) {
386            sb.append(sparkline(field)).append('\n');
387        }
388        return sb.toString();
389    }
390}