/*
 * Decompiled with CFR 0.152.
 */
package org.apache.datasketches.hive.tuple;

import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.stat.descriptive.StatisticalSummary;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.commons.math3.stat.inference.TTest;
import org.apache.datasketches.hive.common.BytesWritableHelper;
import org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchStats;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.ArrayOfDoublesSketches;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.BytesWritable;

@Description(name="ArrayOfDoublesSketchesTTest", value="_FUNC_(sketchA, sketchB)", extended="Performs t-test and returns a list of p-values given two ArrayOfDoublesSketches. The result will be N double values, where N is the number of double values kept in the sketch per key. The resulting p-values are probabilities that differences in means are due to chance")
public class ArrayOfDoublesSketchesTTestUDF
extends UDF {
    public List<Double> evaluate(BytesWritable serializedSketchA, BytesWritable serializedSketchB) {
        if (serializedSketchA == null || serializedSketchB == null) {
            return null;
        }
        ArrayOfDoublesSketch sketchA = ArrayOfDoublesSketches.wrapSketch((Memory)BytesWritableHelper.wrapAsMemory(serializedSketchA));
        ArrayOfDoublesSketch sketchB = ArrayOfDoublesSketches.wrapSketch((Memory)BytesWritableHelper.wrapAsMemory(serializedSketchB));
        if (sketchA.getNumValues() != sketchB.getNumValues()) {
            throw new IllegalArgumentException("Both sketches must have the same number of values");
        }
        if (sketchA.getRetainedEntries() < 2 || sketchB.getRetainedEntries() < 2) {
            return null;
        }
        SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
        SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);
        TTest tTest = new TTest();
        ArrayList<Double> pValues = new ArrayList<Double>(sketchA.getNumValues());
        for (int i = 0; i < sketchA.getNumValues(); ++i) {
            pValues.add(tTest.tTest((StatisticalSummary)summariesA[i], (StatisticalSummary)summariesB[i]));
        }
        return pValues;
    }
}

