/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractTokenizedStringDistance;
import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.StringWrapperIterator;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.log4j.Logger;

public abstract class AbstractStatisticalTokenDistance
extends AbstractTokenizedStringDistance {
    private static Logger log = Logger.getLogger(AbstractTokenizedStringDistance.class);
    private static final Integer ONE = new Integer(1);
    private static final Integer TWO = new Integer(2);
    private static final Integer THREE = new Integer(3);
    protected Map documentFrequency = new HashMap();
    protected int collectionSize = 0;
    protected int totalTokenCount = 0;
    private int warningCounter = 0;

    public AbstractStatisticalTokenDistance(Tokenizer tokenizer) {
        super(tokenizer);
    }

    public AbstractStatisticalTokenDistance() {
    }

    public void train(StringWrapperIterator i) {
        HashSet<Token> seenTokens = new HashSet<Token>();
        while (i.hasNext()) {
            BagOfTokens bag = this.asBagOfTokens(i.nextStringWrapper());
            seenTokens.clear();
            Iterator j = bag.tokenIterator();
            while (j.hasNext()) {
                ++this.totalTokenCount;
                Token tokj = (Token)j.next();
                if (seenTokens.contains(tokj)) continue;
                seenTokens.add(tokj);
                Integer df = (Integer)this.documentFrequency.get(tokj);
                if (df == null) {
                    this.documentFrequency.put(tokj, ONE);
                    continue;
                }
                if (df == ONE) {
                    this.documentFrequency.put(tokj, TWO);
                    continue;
                }
                if (df == TWO) {
                    this.documentFrequency.put(tokj, THREE);
                    continue;
                }
                this.documentFrequency.put(tokj, new Integer(df + 1));
            }
            ++this.collectionSize;
        }
    }

    protected void checkTrainingHasHappened(StringWrapper s, StringWrapper t) {
        if (this.collectionSize == 0 && ++this.warningCounter <= 10) {
            log.warn((Object)(this.getClass() + " not yet trained for sim('" + s + "','" + t + "')"));
            if (this.warningCounter == 10) {
                log.warn((Object)"(By the way, that's the last warning you'll get about this.)");
            }
        }
    }

    public int getDocumentFrequency(Token tok) {
        Integer freqInteger = (Integer)this.documentFrequency.get(tok);
        if (freqInteger == null) {
            return 0;
        }
        return freqInteger;
    }

    public Iterator tokenIterator() {
        return this.documentFrequency.keySet().iterator();
    }
}

