/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import java.io.IOException;
import java.io.Serializable;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.common.RowKeySplitter;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.MapReduceUtil;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class SparkFilterRecommendCuboidDataJob
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkFilterRecommendCuboidDataJob.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_META_URL;
    private Options options = new Options();

    public SparkFilterRecommendCuboidDataJob() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_META_URL);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
        SparkConf sparkConf = SparkUtil.setKryoSerializerInConf();
        sparkConf.setAppName("Kylin_Filter_Recommend_Cuboid_Data_" + cubeName + "_With_Spark");
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(sparkConf);){
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
            KylinConfig config = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
            HadoopUtil.deletePath(HadoopUtil.getCurrentConfiguration(), new Path(outputPath));
            CubeManager cubeManager = CubeManager.getInstance(config);
            CubeInstance cube = cubeManager.getCube(cubeName);
            CubeSegment optSegment = cube.getSegmentById(segmentId);
            CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);
            final boolean enableSharding = originalSegment.isEnableSharding();
            final long baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();
            final Set<Long> recommendCuboids = cube.getCuboidsRecommend();
            Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null");
            FileSystem hdfs = FileSystem.get((Configuration)sc.hadoopConfiguration());
            if (!hdfs.exists(new Path(inputPath.substring(0, inputPath.length() - 1)))) {
                throw new IOException("OldCuboIdFilePath " + inputPath + " does not exists");
            }
            JavaPairRDD inputRDD = sc.sequenceFile(inputPath, Text.class, Text.class);
            logger.info("start to calculate nBaseReduceTasks");
            Pair<Integer, Integer> taskNums = MapReduceUtil.getConvergeCuboidDataReduceTaskNums(originalSegment);
            int reduceTasks = taskNums.getFirst();
            int nBaseReduceTasks = taskNums.getSecond();
            logger.info("nBaseReduceTasks is {}", (Object)nBaseReduceTasks);
            Job job = Job.getInstance((Configuration)sConf.get());
            SparkUtil.setHadoopConfForCuboid(job, originalSegment, metaUrl);
            JavaPairRDD baseCuboIdRDD = inputRDD.filter((Function)new Function<Tuple2<Text, Text>, Boolean>(){

                public Boolean call(Tuple2<Text, Text> v1) throws Exception {
                    long cuboidId = RowKeySplitter.getCuboidId(((Text)v1._1).getBytes(), enableSharding);
                    return cuboidId == baseCuboid;
                }
            });
            SparkUtil.configConvergeCuboidDataReduceOut(job, SparkUtil.generateFilePath("base_cuboid", outputPath));
            baseCuboIdRDD.coalesce(nBaseReduceTasks).saveAsNewAPIHadoopDataset(job.getConfiguration());
            JavaPairRDD reuseCuboIdRDD = inputRDD.filter((Function)new Function<Tuple2<Text, Text>, Boolean>(){

                public Boolean call(Tuple2<Text, Text> v1) throws Exception {
                    long cuboidId = RowKeySplitter.getCuboidId(((Text)v1._1).getBytes(), enableSharding);
                    return recommendCuboids.contains(cuboidId);
                }
            });
            SparkUtil.configConvergeCuboidDataReduceOut(job, SparkUtil.generateFilePath("old", outputPath));
            reuseCuboIdRDD.coalesce(reduceTasks).saveAsNewAPIHadoopDataset(job.getConfiguration());
        }
    }

    static {
        OptionBuilder.withArgName((String)"cubename");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_CUBE_NAME = OptionBuilder.create((String)"cubename");
        OptionBuilder.withArgName((String)"segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_SEGMENT_ID = OptionBuilder.create((String)"segmentId");
        OptionBuilder.withArgName((String)"input");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_INPUT_PATH = OptionBuilder.create((String)"input");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OPTION_OUTPUT_PATH = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"metaUrl");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create((String)"metaUrl");
    }
}

