/*
 * Decompiled with CFR 0.152.
 */
package com.github.tjake.jlama.safetensors.tokenizer;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer;
import com.google.common.base.Preconditions;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TokenizerModel {
    private static final Logger logger = LoggerFactory.getLogger(TokenizerModel.class);
    @JsonProperty(value="type")
    public final String type;
    @JsonProperty(value="unk_token")
    public final String unkToken;
    @JsonProperty(value="fuse_unk")
    public final boolean fuseUnk;
    @JsonProperty(value="byte_fallback")
    public final boolean byteFallback;
    @JsonProperty(value="vocab")
    public final BiMap<String, Long> vocabLookup;
    @JsonProperty(value="merges")
    public final Map<String, Long> merges;
    private PreTokenizer preTokenizer;
    private Normalizer normalizer;
    private BiMap<String, Long> addedTokens = HashBiMap.create();
    private BiMap<String, Long> specialTokens = HashBiMap.create();
    private java.util.regex.Pattern addedTokenPattern;
    private boolean legacy = false;
    private Optional<Map<String, String>> promptTemplates = Optional.empty();
    private boolean hasToolSupport = false;
    private String eosToken = "";
    private String bosToken = "";
    private final boolean ignoreMerges;

    @JsonCreator
    public TokenizerModel(@JsonProperty(value="type") String type, @JsonProperty(value="unk_token") String unkToken, @JsonProperty(value="fuse_unk") boolean fuseUnk, @JsonProperty(value="byte_fallback") boolean byteFallback, @JsonProperty(value="vocab") Map<String, Long> vocabLookup, @JsonProperty(value="ignore_merges") Boolean ignoreMerges, @JsonProperty(value="merges") List<String> merges) {
        this.type = type;
        this.unkToken = unkToken;
        this.fuseUnk = fuseUnk;
        this.byteFallback = byteFallback;
        this.vocabLookup = HashBiMap.create(vocabLookup);
        this.ignoreMerges = ignoreMerges != null && ignoreMerges != false;
        this.merges = new HashMap<String, Long>();
        if (merges != null) {
            for (int i = 0; i < merges.size(); ++i) {
                this.merges.put(merges.get(i), Long.valueOf(i));
            }
        }
    }

    public PreTokenizer preTokenizer() {
        return this.preTokenizer;
    }

    public void setPreTokenizer(PreTokenizer preTokenizer) {
        if (preTokenizer != null) {
            this.preTokenizer = preTokenizer;
            this.legacy = preTokenizer.isLegacy;
        }
    }

    public Normalizer normalizer() {
        return this.normalizer;
    }

    public void setNormalizer(Normalizer normalizer) {
        this.normalizer = normalizer;
    }

    public void setAddedTokens(List<Map<String, Object>> addedTokens) {
        if (addedTokens != null && !addedTokens.isEmpty()) {
            for (Map<String, Object> token : addedTokens) {
                this.addedTokens.put((Object)((String)token.get("content")), (Object)((Integer)token.get("id")).longValue());
                this.vocabLookup.put((Object)((String)token.get("content")), (Object)((Integer)token.get("id")).longValue());
                if (!token.containsKey("special") || !((Boolean)token.get("special")).booleanValue()) continue;
                this.specialTokens.put((Object)((String)token.get("content")), (Object)((Integer)token.get("id")).longValue());
            }
            this.addedTokens = ImmutableBiMap.copyOf(this.addedTokens);
            this.specialTokens = ImmutableBiMap.copyOf(this.specialTokens);
            StringBuilder regex = new StringBuilder();
            ArrayList delimiters = new ArrayList(this.addedTokens.keySet());
            for (int i = 0; i < delimiters.size(); ++i) {
                if (i != 0) {
                    regex.append("|");
                }
                regex.append(java.util.regex.Pattern.quote((String)delimiters.get(i)));
            }
            this.addedTokenPattern = java.util.regex.Pattern.compile(regex.toString());
        }
    }

    public boolean ignoreMerges() {
        return this.ignoreMerges;
    }

    public Map<String, Long> addedTokens() {
        return this.addedTokens;
    }

    public java.util.regex.Pattern addedTokenPattern() {
        return this.addedTokenPattern;
    }

    public boolean isLegacy() {
        return this.legacy;
    }

    public void setLegacy(boolean legacy) {
        this.legacy = legacy;
    }

    public Optional<Map<String, String>> promptTemplates() {
        return this.promptTemplates;
    }

    public void setPromptTemplates(Map<String, String> promptTemplates) {
        if (promptTemplates != null) {
            this.hasToolSupport = promptTemplates.values().stream().anyMatch(s -> s.toLowerCase().contains("tools"));
            this.promptTemplates = Optional.of(promptTemplates);
        }
    }

    public boolean hasToolSupport() {
        return this.hasToolSupport;
    }

    public void setEosToken(String eosToken) {
        this.eosToken = eosToken;
    }

    public String eosToken() {
        return this.eosToken;
    }

    public void setBosToken(String bosToken) {
        this.bosToken = bosToken;
    }

    public String bosToken() {
        return this.bosToken;
    }

    public boolean isSpecialToken(long token) {
        return this.specialTokens.containsValue((Object)token);
    }

    public boolean isSpecialToken(String token) {
        return this.specialTokens.containsKey((Object)token);
    }

    static String[] split(java.util.regex.Pattern p, CharSequence input, int limit, boolean withDelimiters) {
        int resultSize;
        int matchCount = 0;
        int index = 0;
        boolean matchLimited = limit > 0;
        ArrayList<String> matchList = new ArrayList<String>();
        Matcher m = p.matcher(input);
        while (m.find()) {
            String match;
            if (!matchLimited || matchCount < limit - 1) {
                if (index == 0 && index == m.start() && m.start() == m.end()) continue;
                match = input.subSequence(index, m.start()).toString();
                matchList.add(match);
                index = m.end();
                if (withDelimiters) {
                    matchList.add(input.subSequence(m.start(), index).toString());
                }
                ++matchCount;
                continue;
            }
            if (matchCount != limit - 1) continue;
            match = input.subSequence(index, input.length()).toString();
            matchList.add(match);
            index = m.end();
            ++matchCount;
        }
        if (index == 0) {
            return new String[]{input.toString()};
        }
        if (!matchLimited || matchCount < limit) {
            matchList.add(input.subSequence(index, input.length()).toString());
        }
        if (limit == 0) {
            for (resultSize = matchList.size(); resultSize > 0 && ((String)matchList.get(resultSize - 1)).isEmpty(); --resultSize) {
            }
        }
        String[] result = new String[resultSize];
        return matchList.subList(0, resultSize).toArray(result);
    }

    public static class PreTokenizer {
        public final String type;
        public final String replacement;
        public final String prependScheme;
        public final boolean isLegacy;
        public final List<PretokenizerItem> pretokenizers;

        @JsonCreator
        public PreTokenizer(@JsonProperty(value="type") String type, @JsonProperty(value="replacement") String replacement, @JsonProperty(value="prepend_scheme") String prependScheme, @JsonProperty(value="pretokenizers") List<PretokenizerItem> pretokenizers) {
            this.type = type;
            this.replacement = replacement;
            this.prependScheme = prependScheme;
            this.pretokenizers = pretokenizers == null ? Collections.emptyList() : ImmutableList.copyOf(pretokenizers);
            this.isLegacy = this.pretokenizers.stream().map(p -> p.type).anyMatch(t -> t.equals("ByteLevel"));
        }

        public List<String> pretokenize(String sentence) {
            if (this.type.equalsIgnoreCase("MetaSpace")) {
                if (this.prependScheme.equalsIgnoreCase("first")) {
                    sentence = " " + (String)sentence;
                }
                return Collections.singletonList(((String)sentence).replaceAll("[ \t]+", this.replacement));
            }
            if (this.pretokenizers.isEmpty()) {
                return Collections.singletonList(sentence);
            }
            Preconditions.checkArgument((boolean)this.type.equalsIgnoreCase("Sequence"), (Object)("Invalid pre-tokenizer type: " + this.type));
            List<Object> pieces = List.of(sentence);
            ArrayList<Object> tmp = new ArrayList<Object>();
            for (PretokenizerItem item : this.pretokenizers) {
                for (String string : pieces) {
                    tmp.addAll(item.pretokenize(string));
                }
                pieces = tmp;
                tmp = new ArrayList();
            }
            return pieces;
        }
    }

    public static class Normalizer {
        public final String type;
        public final List<NormalizerItem> normalizerItems;

        @JsonCreator
        public Normalizer(@JsonProperty(value="type") String type, @JsonProperty(value="normalizers") List<NormalizerItem> normalizerItems) {
            this.type = type;
            this.normalizerItems = normalizerItems == null ? Collections.emptyList() : ImmutableList.copyOf(normalizerItems);
        }

        public String normalize(String sentence) {
            if (this.normalizerItems.isEmpty()) {
                return sentence;
            }
            Preconditions.checkArgument((boolean)this.type.equalsIgnoreCase("Sequence"), (Object)("Invalid normalizer type: " + this.type));
            for (NormalizerItem item : this.normalizerItems) {
                sentence = item.normalize(sentence);
            }
            return sentence;
        }
    }

    public static class Pattern {
        public final java.util.regex.Pattern regex;

        @JsonCreator
        public Pattern(@JsonProperty(value="Regex") String regex) {
            this.regex = java.util.regex.Pattern.compile(regex);
        }
    }

    public static class PretokenizerItem {
        public final String type;
        public final Pattern pattern;
        public final String behavior;
        public final Boolean invert;
        public final Boolean individual_digits;
        public final Boolean add_prefix_space;
        public final Boolean trim_offsets;
        public final Boolean use_regex;

        @JsonCreator
        public PretokenizerItem(@JsonProperty(value="type") String type, @JsonProperty(value="pattern") Pattern pattern, @JsonProperty(value="behavior") String behavior, @JsonProperty(value="invert") Boolean invert, @JsonProperty(value="individual_digits") Boolean individual_digits, @JsonProperty(value="add_prefix_space") Boolean add_prefix_space, @JsonProperty(value="trim_offsets") Boolean trim_offsets, @JsonProperty(value="use_regex") Boolean use_regex) {
            this.type = type;
            this.pattern = pattern;
            this.behavior = behavior;
            this.invert = invert;
            this.individual_digits = individual_digits;
            this.add_prefix_space = add_prefix_space;
            this.trim_offsets = trim_offsets;
            this.use_regex = use_regex;
        }

        public List<String> pretokenize(String sentence) {
            switch (this.type) {
                case "Split": {
                    return this.splitRegex(sentence);
                }
                case "Digits": {
                    return this.splitDigits(sentence);
                }
                case "ByteLevel": {
                    return Collections.singletonList(sentence);
                }
            }
            throw new IllegalArgumentException("Invalid pre-tokenizer type: " + this.type);
        }

        private List<String> byteLevel(String sentence) {
            return List.of(sentence.codePoints().map(c -> (Integer)BPETokenizer.alteredBytes.getOrDefault((Object)c, (Object)c)).mapToObj(Character::toString).collect(Collectors.joining()));
        }

        private List<String> splitRegex(String s) {
            String p;
            Matcher m = this.pattern.regex.matcher(s);
            ArrayList<String> ret = new ArrayList<String>();
            int start = 0;
            while (m.find()) {
                String r = s.substring(start, m.start());
                if (!r.isEmpty()) {
                    ret.add(r);
                }
                ret.add(m.group());
                start = m.end();
            }
            String string = p = start >= s.length() ? "" : s.substring(start);
            if (!p.isEmpty()) {
                ret.add(p);
            }
            return ret;
        }

        private List<String> splitDigits(String sentence) {
            return List.of(sentence.split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)"));
        }
    }

    public static class NormalizerItem {
        public final String type;
        public final String prepend;
        public final Map<String, String> pattern;
        public final String content;

        @JsonCreator
        public NormalizerItem(@JsonProperty(value="type") String type, @JsonProperty(value="prepend") String prepend, @JsonProperty(value="pattern") Map<String, String> pattern, @JsonProperty(value="content") String content) {
            this.type = type;
            this.prepend = prepend;
            this.pattern = pattern;
            this.content = content;
        }

        public String normalize(String sentence) {
            switch (this.type) {
                case "Replace": {
                    return this.replace(sentence);
                }
                case "Prepend": {
                    return this.prepend(sentence);
                }
            }
            throw new IllegalArgumentException("Invalid normalizer type: " + this.type);
        }

        private String replace(String sentence) {
            for (Map.Entry<String, String> entry : this.pattern.entrySet()) {
                if (!entry.getKey().equalsIgnoreCase("String")) {
                    logger.warn("Ignoring unknown pattern key: " + entry.getKey());
                }
                sentence = sentence.replaceAll(entry.getValue(), this.content);
            }
            return sentence;
        }

        private String prepend(String sentence) {
            return this.prepend + sentence;
        }
    }
}

