/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.mapreduce;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.LlapHiveUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataTableScan;
import org.apache.iceberg.DataTask;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.IncrementalAppendScan;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Partitioning;
import org.apache.iceberg.Scan;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SnapshotRef;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.data.GenericDeleteFilter;
import org.apache.iceberg.data.IdentityPartitionConverters;
import org.apache.iceberg.data.InternalRecordWrapper;
import org.apache.iceberg.data.avro.DataReader;
import org.apache.iceberg.data.orc.GenericOrcReader;
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.encryption.EncryptedFiles;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hive.HiveVersion;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.mr.Catalogs;
import org.apache.iceberg.mr.InputFormatConfig;
import org.apache.iceberg.mr.hive.HiveIcebergInputFormat;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandler;
import org.apache.iceberg.mr.hive.IcebergAcidUtil;
import org.apache.iceberg.mr.mapreduce.HiveIdentityPartitionConverters;
import org.apache.iceberg.mr.mapreduce.IcebergInternalRecordWrapper;
import org.apache.iceberg.mr.mapreduce.IcebergSplit;
import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.PartitionUtil;
import org.apache.iceberg.util.SerializationUtil;

public class IcebergInputFormat<T>
extends InputFormat<Void, T> {
    public static InputFormatConfig.ConfigBuilder configure(Job job) {
        job.setInputFormatClass(IcebergInputFormat.class);
        return new InputFormatConfig.ConfigBuilder(job.getConfiguration());
    }

    private static TableScan createTableScan(Table table, Configuration conf) {
        long asOfTime;
        TableScan scan = table.newScan();
        long snapshotId = -1L;
        try {
            snapshotId = conf.getLong("iceberg.mr.snapshot.id", -1L);
        }
        catch (NumberFormatException e) {
            String version = conf.get("iceberg.mr.snapshot.id");
            SnapshotRef ref = table.refs().get(version);
            if (ref == null) {
                throw new RuntimeException("Cannot find matching snapshot ID or reference name for version " + version);
            }
            snapshotId = ref.snapshotId();
        }
        String refName = conf.get("iceberg.mr.output.table.snapshot.ref");
        if (StringUtils.isNotEmpty((CharSequence)refName)) {
            scan = scan.useRef(HiveUtils.getTableSnapshotRef((String)refName));
        }
        if (snapshotId != -1L) {
            scan = scan.useSnapshot(snapshotId);
        }
        if ((asOfTime = conf.getLong("iceberg.mr.as.of.time", -1L)) != -1L) {
            scan = scan.asOfTime(asOfTime);
        }
        return scan;
    }

    private static IncrementalAppendScan createIncrementalAppendScan(Table table, Configuration conf) {
        long fromSnapshot = conf.getLong("iceberg.mr.snapshot.id.interval.from", -1L);
        return (IncrementalAppendScan)table.newIncrementalAppendScan().fromSnapshotExclusive(fromSnapshot);
    }

    private static <T extends Scan<T, FileScanTask, CombinedScanTask>> Scan<T, FileScanTask, CombinedScanTask> applyConfig(Configuration conf, Scan<T, FileScanTask, CombinedScanTask> scanToConfigure) {
        Expression filter;
        String[] selectedColumns;
        String schemaStr;
        MapWork mapWork;
        Scan scan = (Scan)scanToConfigure.caseSensitive(conf.getBoolean("iceberg.mr.case.sensitive", true));
        long splitSize = conf.getLong("iceberg.mr.split.size", 0L);
        if (splitSize > 0L) {
            scan = (Scan)scan.option("read.split.target-size", String.valueOf(splitSize));
        }
        if ((mapWork = LlapHiveUtils.findMapWork((JobConf)((JobConf)conf))) != null && mapWork.getCacheAffinity()) {
            Long openFileCost = splitSize > 0L ? splitSize : 0x8000000L;
            scan = (Scan)scan.option("read.split.open-file-cost", String.valueOf(openFileCost));
        }
        if ((schemaStr = conf.get("iceberg.mr.read.schema")) != null) {
            scan.project(SchemaParser.fromJson(schemaStr));
        }
        if ((selectedColumns = conf.getStrings("iceberg.mr.selected.columns")) != null) {
            scan.select(selectedColumns);
        }
        if ((filter = (Expression)SerializationUtil.deserializeFromBase64(conf.get("iceberg.mr.filter.expression"))) != null) {
            scan = (Scan)((Scan)scan.filter(filter)).ignoreResiduals();
        }
        return scan;
    }

    public List<InputSplit> getSplits(JobContext context) {
        Configuration conf = context.getConfiguration();
        Table table = Optional.ofNullable(HiveIcebergStorageHandler.table(conf, conf.get("iceberg.mr.table.identifier"))).orElseGet(() -> {
            Table tbl = Catalogs.loadTable(conf);
            conf.set("iceberg.mr.table.identifier", tbl.name());
            conf.set("iceberg.mr.serialized.table." + tbl.name(), SerializationUtil.serializeToBase64(tbl));
            return tbl;
        });
        ArrayList<InputSplit> splits = Lists.newArrayList();
        boolean applyResidual = !conf.getBoolean("skip.residual.filtering", false);
        InputFormatConfig.InMemoryDataModel model = (InputFormatConfig.InMemoryDataModel)conf.getEnum("iceberg.mr.in.memory.data.model", (Enum)InputFormatConfig.InMemoryDataModel.GENERIC);
        long fromVersion = conf.getLong("iceberg.mr.snapshot.id.interval.from", -1L);
        Scan<Scan<IncrementalAppendScan, FileScanTask, CombinedScanTask>, FileScanTask, CombinedScanTask> scan = fromVersion != -1L ? IcebergInputFormat.applyConfig(conf, IcebergInputFormat.createIncrementalAppendScan(table, conf)) : IcebergInputFormat.applyConfig(conf, IcebergInputFormat.createTableScan(table, conf));
        boolean allowDataFilesWithinTableLocationOnly = conf.getBoolean(HiveConf.ConfVars.HIVE_ICEBERG_ALLOW_DATAFILES_IN_TABLE_LOCATION_ONLY.varname, HiveConf.ConfVars.HIVE_ICEBERG_ALLOW_DATAFILES_IN_TABLE_LOCATION_ONLY.defaultBoolVal);
        Path tableLocation = new Path(conf.get("iceberg.mr.table.location"));
        try (CloseableIterable<CombinedScanTask> tasksIterable = scan.planTasks();){
            tasksIterable.forEach(task -> {
                if (applyResidual && (model == InputFormatConfig.InMemoryDataModel.HIVE || model == InputFormatConfig.InMemoryDataModel.PIG)) {
                    IcebergInputFormat.checkResiduals(task);
                }
                if (allowDataFilesWithinTableLocationOnly) {
                    IcebergInputFormat.validateFileLocations(task, tableLocation);
                }
                splits.add(new IcebergSplit(conf, (CombinedScanTask)task));
            });
        }
        catch (IOException e) {
            throw new UncheckedIOException(String.format("Failed to close table scan: %s", scan), e);
        }
        if (scan instanceof DataTableScan) {
            HiveIcebergStorageHandler.checkAndSkipIoConfigSerialization(conf, table);
        }
        return splits;
    }

    private static void validateFileLocations(CombinedScanTask split, Path tableLocation) {
        for (FileScanTask fileScanTask : split.files()) {
            if (FileUtils.isPathWithinSubtree((Path)new Path(((DataFile)fileScanTask.file()).path().toString()), (Path)tableLocation)) continue;
            throw new AuthorizationException("The table contains paths which are outside the table location");
        }
    }

    private static void checkResiduals(CombinedScanTask task) {
        task.files().forEach(fileScanTask -> {
            Expression residual = fileScanTask.residual();
            if (residual != null && !residual.equals(Expressions.alwaysTrue())) {
                throw new UnsupportedOperationException(String.format("Filter expression %s is not completely satisfied. Additional rows can be returned not satisfied by the filter expression", residual));
            }
        });
    }

    public RecordReader<Void, T> createRecordReader(InputSplit split, TaskAttemptContext context) {
        return new IcebergRecordReader();
    }

    private static final class IcebergRecordReader<T>
    extends RecordReader<Void, T> {
        private static final String HIVE_VECTORIZED_READER_CLASS = "org.apache.iceberg.mr.hive.vector.HiveVectorizedReader";
        private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER = HiveVersion.min(HiveVersion.HIVE_3) ? DynMethods.builder("reader").impl("org.apache.iceberg.mr.hive.vector.HiveVectorizedReader", Table.class, Path.class, FileScanTask.class, Map.class, TaskAttemptContext.class, Expression.class, Schema.class).buildStatic() : null;
        private TaskAttemptContext context;
        private Configuration conf;
        private Schema expectedSchema;
        private String nameMapping;
        private boolean reuseContainers;
        private boolean caseSensitive;
        private InputFormatConfig.InMemoryDataModel inMemoryDataModel;
        private Iterator<FileScanTask> tasks;
        private T current;
        private CloseableIterator<T> currentIterator;
        private Table table;
        private boolean fetchVirtualColumns;

        private IcebergRecordReader() {
        }

        public void initialize(InputSplit split, TaskAttemptContext newContext) {
            CombinedScanTask task = ((IcebergSplit)split).task();
            this.context = newContext;
            this.conf = newContext.getConfiguration();
            this.table = (Table)SerializationUtil.deserializeFromBase64(this.conf.get("iceberg.mr.serialized.table." + this.conf.get("iceberg.mr.table.identifier")));
            HiveIcebergStorageHandler.checkAndSetIoConfig(this.conf, this.table);
            this.tasks = task.files().iterator();
            this.nameMapping = this.table.properties().get("schema.name-mapping.default");
            this.caseSensitive = this.conf.getBoolean("iceberg.mr.case.sensitive", true);
            this.expectedSchema = IcebergRecordReader.readSchema(this.conf, this.table, this.caseSensitive);
            this.reuseContainers = this.conf.getBoolean("iceberg.mr.reuse.containers", false);
            this.inMemoryDataModel = (InputFormatConfig.InMemoryDataModel)this.conf.getEnum("iceberg.mr.in.memory.data.model", (Enum)InputFormatConfig.InMemoryDataModel.GENERIC);
            this.fetchVirtualColumns = InputFormatConfig.fetchVirtualColumns(this.conf);
            this.currentIterator = this.nextTask();
        }

        private CloseableIterator<T> nextTask() {
            Iterator closeableIterator = this.open(this.tasks.next(), this.expectedSchema).iterator();
            if (!this.fetchVirtualColumns || Utilities.getIsVectorized((Configuration)this.conf)) {
                return closeableIterator;
            }
            return new IcebergAcidUtil.VirtualColumnAwareIterator(closeableIterator, this.expectedSchema, this.conf);
        }

        public boolean nextKeyValue() throws IOException {
            while (true) {
                if (this.currentIterator.hasNext()) {
                    this.current = this.currentIterator.next();
                    return true;
                }
                if (!this.tasks.hasNext()) break;
                this.currentIterator.close();
                this.currentIterator = this.nextTask();
            }
            this.currentIterator.close();
            return false;
        }

        public Void getCurrentKey() {
            return null;
        }

        public T getCurrentValue() {
            return this.current;
        }

        public float getProgress() {
            return this.context.getProgress();
        }

        public void close() throws IOException {
            this.currentIterator.close();
        }

        private CloseableIterable<T> openVectorized(FileScanTask task, Schema readSchema) {
            Preconditions.checkArgument(!((DataFile)task.file()).format().equals((Object)FileFormat.AVRO), "Vectorized execution is not yet supported for Iceberg avro tables. Please turn off vectorization and retry the query.");
            Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorized read is unsupported for Hive 2 integration.");
            Path path = new Path(((DataFile)task.file()).path().toString());
            Map<Integer, ?> idToConstant = this.constantsMap(task, HiveIdentityPartitionConverters::convertConstant);
            Expression residual = HiveIcebergInputFormat.residualForTask(task, this.context.getConfiguration());
            CloseableIterable iterator = (CloseableIterable)HIVE_VECTORIZED_READER_BUILDER.invoke(this.table, path, task, idToConstant, this.context, residual, readSchema);
            return this.applyResidualFiltering(iterator, residual, readSchema);
        }

        private CloseableIterable<T> openGeneric(FileScanTask task, Schema readSchema) {
            CloseableIterable<T> iterable;
            if (task.isDataTask()) {
                IcebergInternalRecordWrapper wrapper = new IcebergInternalRecordWrapper(this.table.schema().asStruct(), readSchema.asStruct());
                return CloseableIterable.transform(((DataTask)task).rows(), row -> wrapper.wrap((StructLike)row));
            }
            DataFile file = (DataFile)task.file();
            InputFile inputFile = this.table.encryption().decrypt(EncryptedFiles.encryptedInput(this.table.io().newInputFile(file.path().toString()), file.keyMetadata()));
            switch (file.format()) {
                case AVRO: {
                    iterable = this.newAvroIterable(inputFile, task, readSchema);
                    break;
                }
                case ORC: {
                    iterable = this.newOrcIterable(inputFile, task, readSchema);
                    break;
                }
                case PARQUET: {
                    iterable = this.newParquetIterable(inputFile, task, readSchema);
                    break;
                }
                default: {
                    throw new UnsupportedOperationException(String.format("Cannot read %s file: %s", file.format().name(), file.path()));
                }
            }
            return iterable;
        }

        private CloseableIterable<T> open(FileScanTask currentTask, Schema readSchema) {
            switch (this.inMemoryDataModel) {
                case PIG: {
                    throw new UnsupportedOperationException("Pig and Hive object models are not supported.");
                }
                case HIVE: {
                    return this.openVectorized(currentTask, readSchema);
                }
                case GENERIC: {
                    GenericDeleteFilter deletes = new GenericDeleteFilter(this.table.io(), currentTask, this.table.schema(), readSchema);
                    Schema requiredSchema = deletes.requiredSchema();
                    return deletes.filter(this.openGeneric(currentTask, requiredSchema));
                }
            }
            throw new UnsupportedOperationException("Unsupported memory model");
        }

        private CloseableIterable<T> applyResidualFiltering(CloseableIterable<T> iter, Expression residual, Schema readSchema) {
            boolean applyResidual;
            boolean bl = applyResidual = !this.context.getConfiguration().getBoolean("skip.residual.filtering", false);
            if (applyResidual && residual != null && residual != Expressions.alwaysTrue()) {
                InternalRecordWrapper wrapper = new InternalRecordWrapper(readSchema.asStruct());
                Evaluator filter = new Evaluator(readSchema.asStruct(), residual, this.caseSensitive);
                return CloseableIterable.filter(iter, record -> filter.eval(wrapper.wrap((StructLike)record)));
            }
            return iter;
        }

        private CloseableIterable<T> newAvroIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Expression residual = HiveIcebergInputFormat.residualForTask(task, this.context.getConfiguration());
            Avro.ReadBuilder avroReadBuilder = Avro.read(inputFile).project(readSchema).split(task.start(), task.length());
            if (this.reuseContainers) {
                avroReadBuilder.reuseContainers();
            }
            if (this.nameMapping != null) {
                avroReadBuilder.withNameMapping(NameMappingParser.fromJson(this.nameMapping));
            }
            avroReadBuilder.createReaderFunc((expIcebergSchema, expAvroSchema) -> DataReader.create(expIcebergSchema, expAvroSchema, this.constantsMap(task, IdentityPartitionConverters::convertConstant)));
            return this.applyResidualFiltering(avroReadBuilder.build(), residual, readSchema);
        }

        private CloseableIterable<T> newParquetIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Expression residual = HiveIcebergInputFormat.residualForTask(task, this.context.getConfiguration());
            Parquet.ReadBuilder parquetReadBuilder = Parquet.read(inputFile).project(readSchema).filter(residual).caseSensitive(this.caseSensitive).split(task.start(), task.length());
            if (this.reuseContainers) {
                parquetReadBuilder.reuseContainers();
            }
            if (this.nameMapping != null) {
                parquetReadBuilder.withNameMapping(NameMappingParser.fromJson(this.nameMapping));
            }
            parquetReadBuilder.createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(readSchema, fileSchema, this.constantsMap(task, IdentityPartitionConverters::convertConstant)));
            return this.applyResidualFiltering(parquetReadBuilder.build(), residual, readSchema);
        }

        private CloseableIterable<T> newOrcIterable(InputFile inputFile, FileScanTask task, Schema readSchema) {
            Map<Integer, ?> idToConstant = this.constantsMap(task, IdentityPartitionConverters::convertConstant);
            Schema readSchemaWithoutConstantAndMetadataFields = IcebergRecordReader.schemaWithoutConstantsAndMeta(readSchema, idToConstant);
            Expression residual = HiveIcebergInputFormat.residualForTask(task, this.context.getConfiguration());
            ORC.ReadBuilder orcReadBuilder = ORC.read(inputFile).project(readSchemaWithoutConstantAndMetadataFields).filter(residual).caseSensitive(this.caseSensitive).split(task.start(), task.length());
            if (this.nameMapping != null) {
                orcReadBuilder.withNameMapping(NameMappingParser.fromJson(this.nameMapping));
            }
            orcReadBuilder.createReaderFunc(fileSchema -> GenericOrcReader.buildReader(readSchema, fileSchema, idToConstant));
            return this.applyResidualFiltering(orcReadBuilder.build(), residual, readSchema);
        }

        private Map<Integer, ?> constantsMap(FileScanTask task, BiFunction<Type, Object, Object> converter) {
            boolean projectsIdentityPartitionColumns;
            PartitionSpec spec = task.spec();
            Set<Integer> idColumns = spec.identitySourceIds();
            Schema partitionSchema = TypeUtil.select(this.expectedSchema, idColumns);
            boolean bl = projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();
            if (this.expectedSchema.findField(0x7FFFFFFA) != null) {
                Types.StructType partitionType = Partitioning.partitionType(this.table);
                return PartitionUtil.constantsMap(task, partitionType, converter);
            }
            if (projectsIdentityPartitionColumns) {
                Types.StructType partitionType = Partitioning.partitionType(this.table);
                return PartitionUtil.constantsMap(task, partitionType, converter);
            }
            return Collections.emptyMap();
        }

        private static Schema readSchema(Configuration conf, Table table, boolean caseSensitive) {
            Schema readSchema = InputFormatConfig.readSchema(conf);
            if (readSchema != null) {
                return readSchema;
            }
            String[] selectedColumns = InputFormatConfig.selectedColumns(conf);
            readSchema = table.schema();
            if (selectedColumns != null) {
                Schema schema = readSchema = caseSensitive ? readSchema.select(selectedColumns) : readSchema.caseInsensitiveSelect(selectedColumns);
            }
            if (InputFormatConfig.fetchVirtualColumns(conf)) {
                return IcebergAcidUtil.createFileReadSchemaWithVirtualColums(readSchema.columns(), table);
            }
            return readSchema;
        }

        private static Schema schemaWithoutConstantsAndMeta(Schema readSchema, Map<Integer, ?> idToConstant) {
            Set partitionFields = Optional.ofNullable(readSchema.findField(0x7FFFFFFA)).map(Types.NestedField::type).map(Type::asStructType).map(Types.StructType::fields).map(fields -> fields.stream().map(Types.NestedField::fieldId).collect(Collectors.toSet())).orElseGet(Collections::emptySet);
            Set<Integer> collect = Stream.of(idToConstant.keySet(), MetadataColumns.metadataFieldIds(), partitionFields).flatMap(Collection::stream).collect(Collectors.toSet());
            return TypeUtil.selectNot(readSchema, collect);
        }
    }
}

