package org.apache.rya.indexing.pcj.fluo.app.batch;
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
import java.util.HashSet;
import java.util.Iterator;
import java.util.Optional;
import java.util.Set;

import org.apache.fluo.api.client.TransactionBase;
import org.apache.fluo.api.client.scanner.ColumnScanner;
import org.apache.fluo.api.client.scanner.RowScanner;
import org.apache.fluo.api.data.Bytes;
import org.apache.fluo.api.data.Column;
import org.apache.fluo.api.data.ColumnValue;
import org.apache.fluo.api.data.RowColumn;
import org.apache.fluo.api.data.Span;
import org.apache.log4j.Logger;
import org.apache.rya.api.function.join.IterativeJoin;
import org.apache.rya.api.function.join.LazyJoiningIterator.Side;
import org.apache.rya.api.function.join.LeftOuterJoin;
import org.apache.rya.api.function.join.NaturalJoin;
import org.apache.rya.api.model.VisibilityBindingSet;
import org.apache.rya.indexing.pcj.fluo.app.batch.BatchInformation.Task;
import org.apache.rya.indexing.pcj.fluo.app.query.FluoQueryColumns;
import org.apache.rya.indexing.pcj.fluo.app.query.JoinMetadata;
import org.apache.rya.indexing.pcj.fluo.app.util.BindingHashShardingFunction;
import org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder;
import org.apache.rya.indexing.pcj.storage.accumulo.VisibilityBindingSetSerDe;

import com.google.common.base.Preconditions;

/**
 * Performs updates to BindingSets in the JoinBindingSet column in batch fashion.
 */
public class JoinBatchBindingSetUpdater extends AbstractBatchBindingSetUpdater {

    private static final Logger log = Logger.getLogger(JoinBatchBindingSetUpdater.class);
    private static final VisibilityBindingSetSerDe BS_SERDE = new VisibilityBindingSetSerDe();

    /**
     * Processes {@link JoinBatchInformation}. Updates the BindingSets
     * associated with the specified nodeId. The BindingSets are processed in
     * batch fashion, where the number of results is indicated by
     * {@link JoinBatchInformation#getBatchSize()}. BindingSets are either
     * Added, Deleted, or Updated according to
     * {@link JoinBatchInformation#getTask()}. In the event that the number of
     * entries that need to be updated exceeds the batch size, the row of the
     * first unprocessed BindingSets is used to create a new JoinBatch job to
     * process the remaining BindingSets.
     * @throws Exception
     */
    @Override
    public void processBatch(final TransactionBase tx, final Bytes row, final BatchInformation batch) throws Exception {
        super.processBatch(tx, row, batch);
        final String nodeId = BatchRowKeyUtil.getNodeId(row);
        Preconditions.checkArgument(batch instanceof JoinBatchInformation);
        final JoinBatchInformation joinBatch = (JoinBatchInformation) batch;
        final Task task = joinBatch.getTask();

        // Figure out which join algorithm we are going to use.
        final IterativeJoin joinAlgorithm;
        switch (joinBatch.getJoinType()) {
        case NATURAL_JOIN:
            joinAlgorithm = new NaturalJoin();
            break;
        case LEFT_OUTER_JOIN:
            joinAlgorithm = new LeftOuterJoin();
            break;
        default:
            throw new RuntimeException("Unsupported JoinType: " + joinBatch.getJoinType());
        }

        final Set<VisibilityBindingSet> bsSet = new HashSet<>();
        final Optional<RowColumn> rowCol = fillSiblingBatch(tx, joinBatch, bsSet);

        // Iterates over the resulting BindingSets from the join.
        final Iterator<VisibilityBindingSet> newJoinResults;
        final VisibilityBindingSet bs = joinBatch.getBs();
        if (joinBatch.getSide() == Side.LEFT) {
            newJoinResults = joinAlgorithm.newLeftResult(bs, bsSet.iterator());
        } else {
            newJoinResults = joinAlgorithm.newRightResult(bsSet.iterator(), bs);
        }

        // Read join metadata, create new join BindingSets and insert them into the Fluo table.
        final JoinMetadata joinMetadata = CACHE.readJoinMetadata(tx, nodeId);
        final VariableOrder joinVarOrder = joinMetadata.getVariableOrder();
        while (newJoinResults.hasNext()) {
            final VisibilityBindingSet newJoinResult = newJoinResults.next();
            //create BindingSet value
            final Bytes bsBytes = BS_SERDE.serialize(newJoinResult);
            //make rowId
            Bytes rowKey = BindingHashShardingFunction.addShard(nodeId, joinVarOrder, newJoinResult);
            final Column col = FluoQueryColumns.JOIN_BINDING_SET;
            processTask(tx, task, rowKey, col, bsBytes);
        }

        // if batch limit met, there are additional entries to process
        // update the span and register updated batch job
        if (rowCol.isPresent()) {
            final Span newSpan = getNewSpan(rowCol.get(), joinBatch.getSpan());
            joinBatch.setSpan(newSpan);
            BatchInformationDAO.addBatch(tx, nodeId, joinBatch);
        }

    }

    private void processTask(final TransactionBase tx, final Task task, final Bytes row, final Column column, final Bytes value) {
        switch (task) {
        case Add:
            tx.set(row, column, value);
            break;
        case Delete:
            tx.delete(row, column);
            break;
        case Update:
            log.trace("The Task Update is not supported for JoinBatchBindingSetUpdater.  Batch will not be processed.");
            break;
        default:
            log.trace("Invalid Task type.  Aborting batch operation.");
            break;
        }
    }

    /**
     * Fetches batch to be processed by scanning over the Span specified by the
     * {@link JoinBatchInformation}. The number of results is less than or equal
     * to the batch size specified by the JoinBatchInformation.
     *
     * @param tx - Fluo transaction in which batch operation is performed
     * @param batch - batch order to be processed
     * @param bsSet- set that batch results are added to
     * @return Set - containing results of sibling scan.
     * @throws Exception
     */
    private Optional<RowColumn> fillSiblingBatch(final TransactionBase tx, final JoinBatchInformation batch, final Set<VisibilityBindingSet> bsSet) throws Exception {

        final Span span = batch.getSpan();
        final Column column = batch.getColumn();
        final int batchSize = batch.getBatchSize();

        final RowScanner rs = tx.scanner().over(span).fetch(column).byRow().build();
        final Iterator<ColumnScanner> colScannerIter = rs.iterator();

        boolean batchLimitMet = false;
        Bytes row = span.getStart().getRow();
        while (colScannerIter.hasNext() && !batchLimitMet) {
            final ColumnScanner colScanner = colScannerIter.next();
            row = colScanner.getRow();
            final Iterator<ColumnValue> iter = colScanner.iterator();
            while (iter.hasNext()) {
                if (bsSet.size() >= batchSize) {
                    batchLimitMet = true;
                    break;
                }
                bsSet.add(BS_SERDE.deserialize(iter.next().getValue()));
            }
        }

        if (batchLimitMet) {
            return Optional.of(new RowColumn(row, column));
        } else {
            return Optional.empty();
        }
    }
}
