package com.hankcs.hanlp.seg;

import com.hankcs.hanlp.algorithm.Viterbi;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionaryTransformMatrixDictionary;
import com.hankcs.hanlp.dictionary.other.CharType;
import com.hankcs.hanlp.seg.NShort.Path.AtomNode;
import com.hankcs.hanlp.seg.common.Graph;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.seg.common.Vertex;
import com.hankcs.hanlp.seg.common.WordNet;
import com.hankcs.hanlp.utility.TextUtility;
import com.tencent.supersonic.common.pojo.Constants;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;

/* loaded from: input_file:com/hankcs/hanlp/seg/WordBasedSegment.class */
public abstract class WordBasedSegment extends Segment {
    protected static void generateWord(List<Vertex> list, WordNet wordNet) {
        fixResultByRule(list);
        wordNet.addAll(list);
    }

    protected static void fixResultByRule(List<Vertex> list) {
        mergeContinueNumIntoOne(list);
        changeDelimiterPOS(list);
        splitMiddleSlashFromDigitalWords(list);
        checkDateElements(list);
    }

    static void changeDelimiterPOS(List<Vertex> list) {
        for (Vertex vertex : list) {
            if (vertex.realWord.equals("－－") || vertex.realWord.equals("—") || vertex.realWord.equals(Constants.MINUS)) {
                vertex.confirmNature(Nature.w);
            }
        }
    }

    private static void splitMiddleSlashFromDigitalWords(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (true) {
            Vertex vertex = next;
            if (!listIterator.hasNext()) {
                return;
            }
            next = listIterator.next();
            if (vertex.getNature() == Nature.nx && (next.hasNature(Nature.q) || next.hasNature(Nature.n))) {
                String[] split = vertex.realWord.split(Constants.MINUS, 1);
                if (split.length == 2 && TextUtility.isAllNum(split[0]) && TextUtility.isAllNum(split[1])) {
                    Vertex copy = vertex.copy();
                    copy.realWord = split[0];
                    copy.confirmNature(Nature.m);
                    listIterator.previous();
                    listIterator.previous();
                    listIterator.set(copy);
                    listIterator.next();
                    listIterator.add(Vertex.newPunctuationInstance(Constants.MINUS));
                    listIterator.add(Vertex.newNumberInstance(split[1]));
                }
            }
        }
    }

    private static void checkDateElements(List<Vertex> list) {
        if (list.size() < 2) {
            return;
        }
        ListIterator<Vertex> listIterator = list.listIterator();
        Vertex next = listIterator.next();
        while (true) {
            Vertex vertex = next;
            if (!listIterator.hasNext()) {
                return;
            }
            next = listIterator.next();
            if (TextUtility.isAllNum(vertex.realWord) || TextUtility.isAllChineseNum(vertex.realWord)) {
                String str = next.realWord;
                if ((str.length() == 1 && "月日时分秒".contains(str)) || (str.length() == 2 && str.equals("月份"))) {
                    mergeDate(listIterator, next, vertex);
                } else if (str.equals("年")) {
                    if (TextUtility.isYearTime(vertex.realWord)) {
                        mergeDate(listIterator, next, vertex);
                    } else {
                        vertex.confirmNature(Nature.m);
                    }
                } else if (vertex.realWord.endsWith("点")) {
                    vertex.confirmNature(Nature.t, true);
                } else {
                    char[] charArray = vertex.realWord.toCharArray();
                    if (!"∶·．／./".contains(String.valueOf(charArray[charArray.length - 1]))) {
                        vertex.confirmNature(Nature.m, true);
                    } else if (vertex.realWord.length() > 1) {
                        char charAt = vertex.realWord.charAt(vertex.realWord.length() - 1);
                        Vertex newNumberInstance = Vertex.newNumberInstance(vertex.realWord.substring(0, vertex.realWord.length() - 1));
                        listIterator.previous();
                        listIterator.previous();
                        listIterator.set(newNumberInstance);
                        listIterator.next();
                        listIterator.add(Vertex.newPunctuationInstance(String.valueOf(charAt)));
                    }
                }
            }
        }
    }

    private static void mergeDate(ListIterator<Vertex> listIterator, Vertex vertex, Vertex vertex2) {
        Vertex newTimeInstance = Vertex.newTimeInstance(vertex2.realWord + vertex.realWord);
        listIterator.previous();
        listIterator.previous();
        listIterator.set(newTimeInstance);
        listIterator.next();
        listIterator.next();
        listIterator.remove();
    }

    protected static List<Term> convert(List<Vertex> list) {
        return Segment.convert(list, false);
    }

    protected static Graph generateBiGraph(WordNet wordNet) {
        return wordNet.toGraph();
    }

    private static List<AtomNode> atomSegment(String str, int i, int i2) {
        if (i2 < i) {
            throw new RuntimeException("start=" + i + " < end=" + i2);
        }
        ArrayList arrayList = new ArrayList();
        int i3 = 0;
        StringBuilder sb = new StringBuilder();
        char[] charArray = str.substring(i, i2).toCharArray();
        int[] iArr = new int[charArray.length];
        for (int i4 = 0; i4 < charArray.length; i4++) {
            char c = charArray[i4];
            iArr[i4] = CharType.get(c);
            if (c == '.' && i4 < charArray.length - 1 && CharType.get(charArray[i4 + 1]) == 9) {
                iArr[i4] = 9;
            } else if (c == '.' && i4 < charArray.length - 1 && charArray[i4 + 1] >= '0' && charArray[i4 + 1] <= '9') {
                iArr[i4] = 5;
            } else if (iArr[i4] == 8) {
                iArr[i4] = 5;
            }
        }
        while (i3 < charArray.length) {
            int i5 = iArr[i3];
            if (i5 == 7 || i5 == 10 || i5 == 6 || i5 == 17) {
                String valueOf = String.valueOf(charArray[i3]);
                if (valueOf.length() != 0) {
                    arrayList.add(new AtomNode(valueOf, i5));
                }
                i3++;
            } else if (i3 >= charArray.length - 1 || !(i5 == 5 || i5 == 9)) {
                arrayList.add(new AtomNode(charArray[i3], i5));
                i3++;
            } else {
                sb.delete(0, sb.length());
                sb.append(charArray[i3]);
                boolean z = true;
                while (true) {
                    if (i3 >= charArray.length - 1) {
                        break;
                    }
                    i3++;
                    if (iArr[i3] != i5) {
                        z = false;
                        break;
                    }
                    sb.append(charArray[i3]);
                }
                arrayList.add(new AtomNode(sb.toString(), i5));
                if (z) {
                    i3++;
                }
            }
        }
        return arrayList;
    }

    private static void mergeContinueNumIntoOne(List<Vertex> list) {
        if (list.size() >= 2) {
            ListIterator<Vertex> listIterator = list.listIterator();
            Vertex next = listIterator.next();
            while (listIterator.hasNext()) {
                Vertex next2 = listIterator.next();
                if ((TextUtility.isAllNum(next.realWord) || TextUtility.isAllChineseNum(next.realWord)) && (TextUtility.isAllNum(next2.realWord) || TextUtility.isAllChineseNum(next2.realWord))) {
                    next = Vertex.newNumberInstance(next.realWord + next2.realWord);
                    listIterator.previous();
                    listIterator.previous();
                    listIterator.set(next);
                    listIterator.next();
                    listIterator.next();
                    listIterator.remove();
                } else {
                    next = next2;
                }
            }
        }
    }

    protected void generateWordNet(final WordNet wordNet) {
        final char[] cArr = wordNet.charArray;
        DoubleArrayTrie.Searcher searcher = CoreDictionary.trie.getSearcher(cArr, 0);
        while (searcher.next()) {
            wordNet.add(searcher.begin + 1, new Vertex(new String(cArr, searcher.begin, searcher.length), (CoreDictionary.Attribute) searcher.value, searcher.index));
        }
        if (this.config.forceCustomDictionary) {
            this.customDictionary.parseText(cArr, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() { // from class: com.hankcs.hanlp.seg.WordBasedSegment.1
                public void hit(int i, int i2, CoreDictionary.Attribute attribute) {
                    wordNet.add(i + 1, new Vertex(new String(cArr, i, i2 - i), attribute));
                }
            });
        }
        LinkedList[] vertexes = wordNet.getVertexes();
        int i = 1;
        while (true) {
            int i2 = i;
            if (i2 >= vertexes.length) {
                return;
            }
            if (vertexes[i2].isEmpty()) {
                int i3 = i2 + 1;
                while (i3 < vertexes.length - 1 && (vertexes[i3].isEmpty() || CharType.get(cArr[i3 - 1]) == 11)) {
                    i3++;
                }
                wordNet.add(i2, Segment.quickAtomSegment(cArr, i2 - 1, i3 - 1));
                i = i3;
            } else {
                i = i2 + ((Vertex) vertexes[i2].getLast()).realWord.length();
            }
        }
    }

    protected List<Term> decorateResultForIndexMode(List<Vertex> list, WordNet wordNet) {
        LinkedList linkedList = new LinkedList();
        int i = 1;
        ListIterator<Vertex> listIterator = list.listIterator();
        listIterator.next();
        int size = list.size() - 2;
        for (int i2 = 0; i2 < size; i2++) {
            Vertex next = listIterator.next();
            Term convert = Segment.convert(next);
            addTerms(linkedList, next, i - 1);
            convert.offset = i - 1;
            if (next.realWord.length() > 2) {
                for (int i3 = i; i3 < i + next.realWord.length(); i3++) {
                    Iterator descendingIterator = wordNet.descendingIterator(i3);
                    while (descendingIterator.hasNext()) {
                        Vertex vertex = (Vertex) descendingIterator.next();
                        if ((convert.nature == Nature.mq && vertex.hasNature(Nature.q)) || vertex.realWord.length() >= this.config.indexMode) {
                            if (vertex != next && i3 + vertex.realWord.length() <= i + next.realWord.length()) {
                                listIterator.add(vertex);
                                addTerms(linkedList, vertex, i3 - 1);
                            }
                        }
                    }
                }
            }
            i += next.realWord.length();
        }
        return linkedList;
    }

    protected static void speechTagging(List<Vertex> list) {
        Viterbi.compute(list, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
    }

    protected void addTerms(List<Term> list, Vertex vertex, int i) {
        for (int i2 = 0; i2 < vertex.attribute.nature.length; i2++) {
            Term term = new Term(vertex.realWord, vertex.attribute.nature[i2]);
            term.setFrequency(vertex.attribute.frequency[i2]);
            term.offset = i;
            list.add(term);
        }
    }
}
