package com.tencent.supersonic.headless.chat.knowledge.helper;

import com.google.common.collect.Lists;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
import com.tencent.supersonic.headless.chat.knowledge.DatabaseMapResult;
import com.tencent.supersonic.headless.chat.knowledge.DictWord;
import com.tencent.supersonic.headless.chat.knowledge.EmbeddingResult;
import com.tencent.supersonic.headless.chat.knowledge.HadoopFileIOAdapter;
import com.tencent.supersonic.headless.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.chat.knowledge.MapResult;
import com.tencent.supersonic.headless.chat.knowledge.MultiCustomDictionary;
import com.tencent.supersonic.headless.chat.knowledge.SearchService;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
import org.springframework.util.ResourceUtils;

/* loaded from: input_file:com/tencent/supersonic/headless/chat/knowledge/helper/HanlpHelper.class */
public class HanlpHelper {
    private static final Logger log = LoggerFactory.getLogger(HanlpHelper.class);
    public static final String FILE_SPILT = File.separator;
    public static final String SPACE_SPILT = "#";
    private static volatile DynamicCustomDictionary CustomDictionary;
    private static volatile Segment segment;

    public static Segment getSegment() {
        if (segment == null) {
            synchronized (HanlpHelper.class) {
                if (segment == null) {
                    segment = HanLP.newSegment().enableIndexMode(true).enableIndexMode(4).enableCustomDictionary(true).enableCustomDictionaryForcing(true).enableOffset(true).enableJapaneseNameRecognize(false).enableNameRecognize(false).enableAllNamedEntityRecognize(false).enableJapaneseNameRecognize(false).enableNumberQuantifierRecognize(false).enablePlaceRecognize(false).enableOrganizationRecognize(false).enableCustomDictionary(getDynamicCustomDictionary());
                }
            }
        }
        return segment;
    }

    public static DynamicCustomDictionary getDynamicCustomDictionary() {
        if (CustomDictionary == null) {
            synchronized (HanlpHelper.class) {
                if (CustomDictionary == null) {
                    CustomDictionary = new MultiCustomDictionary(HanLP.Config.CustomDictionaryPath);
                }
            }
        }
        return CustomDictionary;
    }

    public static boolean reloadCustomDictionary() throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        if (HanLP.Config.CustomDictionaryPath == null || HanLP.Config.CustomDictionaryPath.length == 0) {
            return false;
        }
        if (HanLP.Config.IOAdapter instanceof HadoopFileIOAdapter) {
            HdfsFileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
            HdfsFileHelper.resetCustomPath(getDynamicCustomDictionary());
        } else {
            FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
            FileHelper.resetCustomPath(getDynamicCustomDictionary());
        }
        SearchService.clear();
        boolean reload = getDynamicCustomDictionary().reload();
        if (reload) {
            log.info("Custom dictionary has been reloaded in {} milliseconds", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        }
        return reload;
    }

    private static void resetHanlpConfig() throws FileNotFoundException {
        if (HanLP.Config.IOAdapter instanceof HadoopFileIOAdapter) {
            return;
        }
        String hanlpPropertiesPath = getHanlpPropertiesPath();
        HanLP.Config.CustomDictionaryPath = (String[]) Arrays.stream(HanLP.Config.CustomDictionaryPath).map(str -> {
            return hanlpPropertiesPath + FILE_SPILT + str;
        }).toArray(i -> {
            return new String[i];
        });
        log.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath, HanLP.Config.CustomDictionaryPath);
        HanLP.Config.CoreDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
        HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath;
        HanLP.Config.BiGramDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
        HanLP.Config.CoreStopWordDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CoreStopWordDictionaryPath;
        HanLP.Config.CoreSynonymDictionaryDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CoreSynonymDictionaryDictionaryPath;
        HanLP.Config.PersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PersonDictionaryPath;
        HanLP.Config.PersonDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PersonDictionaryTrPath;
        HanLP.Config.PinyinDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PinyinDictionaryPath;
        HanLP.Config.TranslatedPersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.TranslatedPersonDictionaryPath;
        HanLP.Config.JapanesePersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.JapanesePersonDictionaryPath;
        HanLP.Config.PlaceDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PlaceDictionaryPath;
        HanLP.Config.PlaceDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PlaceDictionaryTrPath;
        HanLP.Config.OrganizationDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.OrganizationDictionaryPath;
        HanLP.Config.OrganizationDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.OrganizationDictionaryTrPath;
        HanLP.Config.CharTypePath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CharTypePath;
        HanLP.Config.CharTablePath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CharTablePath;
        HanLP.Config.PartOfSpeechTagDictionary = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PartOfSpeechTagDictionary;
        HanLP.Config.WordNatureModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.WordNatureModelPath;
        HanLP.Config.MaxEntModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.MaxEntModelPath;
        HanLP.Config.NNParserModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.NNParserModelPath;
        HanLP.Config.PerceptronParserModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronParserModelPath;
        HanLP.Config.CRFSegmentModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFSegmentModelPath;
        HanLP.Config.HMMSegmentModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.HMMSegmentModelPath;
        HanLP.Config.CRFCWSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFCWSModelPath;
        HanLP.Config.CRFPOSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFPOSModelPath;
        HanLP.Config.CRFNERModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFNERModelPath;
        HanLP.Config.PerceptronCWSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronCWSModelPath;
        HanLP.Config.PerceptronPOSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronPOSModelPath;
        HanLP.Config.PerceptronNERModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronNERModelPath;
    }

    public static String getHanlpPropertiesPath() throws FileNotFoundException {
        return ResourceUtils.getFile("classpath:hanlp.properties").getParent();
    }

    public static boolean addToCustomDictionary(DictWord dictWord) {
        log.debug("dictWord:{}", dictWord);
        return getDynamicCustomDictionary().insert(dictWord.getWord(), dictWord.getNatureWithFrequency());
    }

    public static void removeFromCustomDictionary(DictWord dictWord) {
        log.debug("dictWord:{}", dictWord);
        CoreDictionary.Attribute attribute = getDynamicCustomDictionary().get(dictWord.getWord());
        if (attribute == null) {
            return;
        }
        log.info("get attribute:{}", attribute);
        getDynamicCustomDictionary().remove(dictWord.getWord());
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < attribute.nature.length; i++) {
            if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
                sb.append(attribute.nature[i].toString() + " ");
                sb.append(attribute.frequency[i] + " ");
                arrayList.add(attribute.nature[i]);
            }
        }
        String sb2 = sb.toString();
        int length = sb2.length();
        log.info("filtered natureWithFrequency:{}", sb2);
        if (StringUtils.isNotBlank(sb2)) {
            getDynamicCustomDictionary().add(dictWord.getWord(), sb2.substring(0, length - 1));
        }
        SearchService.remove(dictWord, (Nature[]) arrayList.toArray(new Nature[0]));
    }

    public static <T extends MapResult> void transLetterOriginal(List<T> list) {
        CoreDictionary.Attribute attribute;
        if (CollectionUtils.isEmpty(list)) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        for (T t : list) {
            boolean z = false;
            if (MultiCustomDictionary.isLowerLetter(t.getName()) && CustomDictionary.contains(t.getName()) && (attribute = CustomDictionary.get(t.getName())) != null) {
                z = addLetterOriginal(arrayList, t, attribute);
            }
            if (!z) {
                arrayList.add(t);
            }
        }
        list.clear();
        list.addAll(arrayList);
    }

    public static <T extends MapResult> boolean addLetterOriginal(List<T> list, T t, CoreDictionary.Attribute attribute) {
        boolean z = false;
        if (attribute != null) {
            if (t instanceof HanlpMapResult) {
                HanlpMapResult hanlpMapResult = (HanlpMapResult) t;
                for (String str : hanlpMapResult.getNatures()) {
                    String original = attribute.getOriginal(Nature.fromString(str));
                    if (original != null) {
                        list.add(new HanlpMapResult(original, Arrays.asList(str), hanlpMapResult.getDetectWord()));
                        z = true;
                    }
                }
            } else if (t instanceof DatabaseMapResult) {
                List<String> originals = attribute.getOriginals();
                if (!CollectionUtils.isEmpty(originals)) {
                    for (String str2 : originals) {
                        DatabaseMapResult databaseMapResult = new DatabaseMapResult();
                        databaseMapResult.setName(str2);
                        databaseMapResult.setSchemaElement(((DatabaseMapResult) t).getSchemaElement());
                        databaseMapResult.setDetectWord(t.getDetectWord());
                        list.add(databaseMapResult);
                        z = true;
                    }
                }
            } else if (t instanceof EmbeddingResult) {
                List<String> originals2 = attribute.getOriginals();
                if (!CollectionUtils.isEmpty(originals2)) {
                    for (String str3 : originals2) {
                        EmbeddingResult embeddingResult = new EmbeddingResult();
                        embeddingResult.setName(str3);
                        embeddingResult.setDetectWord(t.getDetectWord());
                        embeddingResult.setId(((EmbeddingResult) t).getId());
                        embeddingResult.setMetadata(((EmbeddingResult) t).getMetadata());
                        embeddingResult.setDistance(((EmbeddingResult) t).getDistance());
                        list.add(embeddingResult);
                        z = true;
                    }
                }
            }
        }
        return z;
    }

    public static List<S2Term> getTerms(String str, Map<Long, List<Long>> map) {
        return (List) getSegment().seg(str.toLowerCase()).stream().filter(term -> {
            return term.getNature().startsWith("_");
        }).map(term2 -> {
            return transform2ApiTerm(term2, map);
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
    }

    public static List<S2Term> transform2ApiTerm(Term term, Map<Long, List<Long>> map) {
        ArrayList newArrayList = Lists.newArrayList();
        for (String str : NatureHelper.changeModel2DataSet(String.valueOf(term.getNature()), map)) {
            S2Term s2Term = new S2Term();
            BeanUtils.copyProperties(term, s2Term);
            s2Term.setNature(Nature.create(str));
            s2Term.setFrequency(term.getFrequency());
            newArrayList.add(s2Term);
        }
        return newArrayList;
    }

    static {
        try {
            resetHanlpConfig();
        } catch (FileNotFoundException e) {
            log.error("resetHanlpConfig error", e);
        }
    }
}
