/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.lucene;

import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.TextUtility;
import com.hankcs.lucene.PorterStemmer;
import com.hankcs.lucene.SegmentWrapper;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

public class HanLPTokenizer
extends Tokenizer {
    private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    private final PositionIncrementAttribute positionAttr = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);
    private TypeAttribute typeAtt = (TypeAttribute)this.addAttribute(TypeAttribute.class);
    private SegmentWrapper segment;
    private BinTrie<String> filter;
    private boolean enablePorterStemming;
    private final PorterStemmer stemmer = new PorterStemmer();
    private int totalOffset = 0;

    public HanLPTokenizer(Segment segment, Set<String> filter, boolean enablePorterStemming) {
        this.segment = new SegmentWrapper(this.input, segment);
        if (filter != null && filter.size() > 0) {
            this.filter = new BinTrie();
            for (String stopWord : filter) {
                this.filter.put(stopWord, null);
            }
        }
        this.enablePorterStemming = enablePorterStemming;
    }

    public final boolean incrementToken() throws IOException {
        Term term;
        this.clearAttributes();
        int position = 0;
        boolean un_increased = true;
        while ((term = this.segment.next()) != null) {
            if (!TextUtility.isBlank((CharSequence)term.word)) {
                if (this.enablePorterStemming && term.nature == Nature.nx) {
                    term.word = this.stemmer.stem(term.word);
                }
                if (this.filter == null || !this.filter.containsKey(term.word)) {
                    ++position;
                    un_increased = false;
                }
            }
            if (un_increased) continue;
        }
        if (term != null) {
            this.positionAttr.setPositionIncrement(position);
            this.termAtt.setEmpty().append(term.word);
            this.offsetAtt.setOffset(this.correctOffset(this.totalOffset + term.offset), this.correctOffset(this.totalOffset + term.offset + term.word.length()));
            this.typeAtt.setType(term.nature == null ? "null" : term.nature.toString());
            return true;
        }
        this.totalOffset += this.segment.offset;
        return false;
    }

    public void end() throws IOException {
        super.end();
        this.offsetAtt.setOffset(this.totalOffset, this.totalOffset);
        this.totalOffset = 0;
    }

    public void reset() throws IOException {
        super.reset();
        this.segment.reset(new BufferedReader(this.input));
    }
}

