/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language.identifier;

import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.language.identifier.LanguageIdentifier;
import org.languagetool.language.identifier.LanguageIdentifierService;
import org.languagetool.language.identifier.detector.FastTextDetector;
import org.languagetool.language.identifier.detector.NGramDetector;
import org.languagetool.noop.NoopLanguage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DefaultLanguageIdentifier
extends LanguageIdentifier {
    private static final Logger logger = LoggerFactory.getLogger(DefaultLanguageIdentifier.class);
    private static final double MINIMAL_CONFIDENCE = 0.9;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo", "crh");
    private static final float FASTTEXT_CONFIDENCE_THRESHOLD = 0.85f;
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private final AtomicInteger fasttextInitCounter = new AtomicInteger(0);
    private FastTextDetector fastTextDetector;
    private NGramDetector ngram;

    DefaultLanguageIdentifier() {
        this(1000);
    }

    DefaultLanguageIdentifier(int maxLength) {
        super(maxLength);
        try {
            List<LanguageProfile> profiles = this.loadProfiles(DefaultLanguageIdentifier.getLanguageCodes());
            this.languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).minimalConfidence(0.9).shortTextAlgorithm(50).withProfiles(profiles).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter(LanguageIdentifier.REMOVE_URL_FILTER).withTextFilter((TextFilter)RemoveMinorityScriptsTextFilter.forThreshold((double)0.3)).withTextFilter(LanguageIdentifier.REMOVE_EMAIL_SIGNATURE_FILTER).withTextFilter(LanguageIdentifier.REMOVE_MENTION_FILTER).withTextFilter(LanguageIdentifier.REMOVE_NON_BREAKING_SPACES_FILTER).build();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
    }

    void enableFasttext(File fasttextBinary, File fasttextModel) {
        if (fasttextBinary != null && fasttextModel != null) {
            try {
                this.fastTextDetector = new FastTextDetector(fasttextModel, fasttextBinary);
                logger.info("Started fastText process for language identification: Binary {} with model @ {}", (Object)fasttextBinary, (Object)fasttextModel);
            }
            catch (IOException e) {
                throw new RuntimeException("Could not start fasttext process for language identification @ " + String.valueOf(fasttextBinary) + " with model @ " + String.valueOf(fasttextModel), e);
            }
        } else {
            logger.warn("fastText not configured - language detection performance will be degraded. See https://dev.languagetool.org/http-server#starting-from-command-line for instructions.");
        }
    }

    public void setFastTextDetector(FastTextDetector fastTextDetector) {
        this.fastTextDetector = fastTextDetector;
    }

    public AtomicInteger getFasttextInitCounter() {
        return this.fasttextInitCounter;
    }

    public boolean isFastTextEnabled() {
        return this.fastTextDetector != null;
    }

    void enableNgrams(File ngramDir) {
        if (ngramDir != null) {
            try {
                logger.info("Loading ngram data for language identification from " + String.valueOf(ngramDir) + "...");
                this.ngram = new NGramDetector(ngramDir, 50);
                logger.info("Loaded ngram data for language identification from " + String.valueOf(ngramDir));
            }
            catch (IOException e) {
                throw new RuntimeException("Could not load ngram data language identification from " + String.valueOf(ngramDir), e);
            }
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList<String> langCodes = new ArrayList<String>();
        for (Language lang : Languages.get()) {
            String langCode = lang.getShortCode();
            boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
            if (ignore) continue;
            if ("zh".equals(langCode)) {
                langCodes.add("zh-CN");
                langCodes.add("zh-TW");
                continue;
            }
            if (langCodes.contains(langCode)) continue;
            langCodes.add(langCode);
        }
        return langCodes;
    }

    private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
        LanguageProfileReader profileReader = new LanguageProfileReader();
        List profiles = profileReader.read(langCodes);
        for (String externalLangCode : externalLangCodes) {
            String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
            if (!JLanguageTool.getDataBroker().resourceExists(profilePath)) continue;
            InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath);
            try {
                profiles.add(new LanguageProfileReader().read(profile));
            }
            finally {
                if (profile == null) continue;
                profile.close();
            }
        }
        return profiles;
    }

    @Override
    @Nullable
    public Language detectLanguage(String cleanText) {
        DetectedLanguage detectedLanguage = this.detectLanguage(cleanText, Collections.emptyList(), Collections.emptyList());
        if (detectedLanguage == null) {
            return null;
        }
        return detectedLanguage.getDetectedLanguage();
    }

    @Override
    public DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp) {
        return this.detectLanguage(cleanText, noopLangsTmp, preferredLangsTmp, false);
    }

    @Override
    @Nullable
    public DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp, boolean limitOnPreferredLangs) {
        List<DetectedLanguage> detectedLanguageScores = this.getDetectedLanguageScores(cleanText, noopLangsTmp, preferredLangsTmp, limitOnPreferredLangs, 1);
        return detectedLanguageScores.stream().findFirst().orElse(null);
    }

    @Override
    @NotNull
    public List<DetectedLanguage> getDetectedLanguageScores(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp, boolean limitOnPreferredLangs, int count) {
        String text = cleanText;
        LanguageIdentifier.ParsedLanguageLists parsedLanguageLists = this.prepareDetectLanguage(text, noopLangsTmp, preferredLangsTmp);
        if (parsedLanguageLists == null) {
            return Collections.singletonList(new DetectedLanguage(null, new NoopLanguage()));
        }
        List<String> additionalLangs = parsedLanguageLists.getAdditionalLangs();
        List<String> preferredLangs = parsedLanguageLists.getPreferredLangs();
        Map<String, Double> scores = null;
        boolean fasttextFailed = false;
        Object source = "";
        if (this.fastTextDetector != null || this.ngram != null) {
            try {
                boolean usingFastText = false;
                if ((text.length() <= 50 || this.fastTextDetector == null) && this.ngram != null) {
                    scores = this.ngram.detectLanguages(text.trim(), additionalLangs);
                    source = (String)source + "ngram";
                } else {
                    usingFastText = true;
                    scores = this.fastTextDetector.runFasttext(text, additionalLangs);
                    source = (String)source + "fasttext";
                }
                Map.Entry<String, Double> fasttextHighestScoringResult = this.getHighestScoringResult(scores);
                if (usingFastText && fasttextHighestScoringResult.getValue().floatValue() < 0.85f || fasttextHighestScoringResult.getKey().equals("zz")) {
                    Map<Language, Integer> lang2Count = COMMON_WORDS_LANG_IDENTIFIER.getKnownWordsPerLanguage(text);
                    HashSet<String> baseLangAlreadyHandled = new HashSet<String>();
                    for (Map.Entry<Language, Integer> entry : lang2Count.entrySet()) {
                        String langCode = entry.getKey().getShortCode();
                        if (baseLangAlreadyHandled.contains(langCode)) continue;
                        baseLangAlreadyHandled.add(langCode);
                        if (scores.containsKey(langCode)) {
                            scores.put(langCode, scores.get(langCode) + Double.valueOf(entry.getValue().intValue()));
                            continue;
                        }
                        scores.put(langCode, (double)entry.getValue());
                    }
                    source = (String)source + "+commonwords";
                }
                if (preferredLangs.contains("no") && !preferredLangs.contains("da")) {
                    scores.keySet().removeIf(k -> k.equals("da"));
                }
                if (!preferredLangs.isEmpty() && (text.length() <= 50 || limitOnPreferredLangs)) {
                    boolean wasRemoved = scores.keySet().removeIf(k -> !preferredLangs.contains(k));
                    if (wasRemoved && scores.isEmpty() && limitOnPreferredLangs) {
                        logger.warn("No language detected for text after remove all not preferred languages from score.");
                    }
                    source = (String)source + "+prefLang(forced: " + limitOnPreferredLangs + ")";
                }
            }
            catch (FastTextDetector.FastTextException e) {
                if (e.isDisabled()) {
                    fasttextFailed = true;
                    this.reinitFasttextAfterFailure(e);
                } else {
                    logger.error("Fasttext failed, fallback used", (Throwable)e);
                    fasttextFailed = true;
                }
            }
            catch (Exception e) {
                fasttextFailed = true;
                this.reinitFasttextAfterFailure(e);
            }
        }
        if (this.fastTextDetector == null && this.ngram == null || fasttextFailed) {
            Map.Entry<String, Double> localResult;
            text = this.textObjectFactory.forText((CharSequence)text).toString();
            source = (String)source + "+fallback";
            if (scores == null) {
                scores = new HashMap<String, Double>();
            }
            if ((localResult = this.detectLanguageCode(text, preferredLangs, limitOnPreferredLangs)) != null) {
                scores.put(localResult.getKey(), localResult.getValue());
            }
            if (!additionalLangs.isEmpty()) {
                logger.warn("Cannot consider noopLanguages because not in fastText mode: {}", additionalLangs);
            }
        }
        LinkedList<DetectedLanguage> detectedLanguages = new LinkedList<DetectedLanguage>();
        if (count > 1) {
            Map<String, Double> orderedScores = this.getOrderedScores(scores, count);
            for (Map.Entry<String, Double> entry : orderedScores.entrySet()) {
                if (entry.getKey() == null || !LanguageIdentifierService.INSTANCE.canLanguageBeDetected(entry.getKey(), additionalLangs)) continue;
                float rate = (float)Math.round(entry.getValue() * 100.0) / 100.0f;
                detectedLanguages.add(new DetectedLanguage(null, Languages.getLanguageForShortCode(entry.getKey(), additionalLangs), rate, (String)source));
            }
        } else {
            Map.Entry<String, Double> highestScoringResult = this.getHighestScoringResult(scores);
            if (highestScoringResult.getKey() != null && LanguageIdentifierService.INSTANCE.canLanguageBeDetected(highestScoringResult.getKey(), additionalLangs)) {
                float newScore = ((String)source).contains("fasttext") ? (float)(0.99 / (30.0 / (double)Math.min(text.length(), 30))) : highestScoringResult.getValue().floatValue();
                detectedLanguages.add(new DetectedLanguage(null, Languages.getLanguageForShortCode(highestScoringResult.getKey(), additionalLangs), newScore, (String)source));
            }
        }
        if (detectedLanguages.isEmpty() && !preferredLangs.isEmpty() && preferredLangs.get(0) != null && !preferredLangs.get(0).trim().isEmpty() && Languages.isLanguageSupported(preferredLangs.get(0))) {
            source = (String)source + "+fallbackToPrefLang";
            detectedLanguages.add(new DetectedLanguage(null, Languages.getLanguageForShortCode(preferredLangs.get(0)), 0.1f, (String)source));
        }
        return detectedLanguages;
    }

    private void reinitFasttextAfterFailure(Exception e) {
        if (this.fastTextDetector != null) {
            int newCounter = this.fasttextInitCounter.incrementAndGet();
            try {
                boolean wasRestarted = this.fastTextDetector.restartProcess();
                if (wasRestarted) {
                    logger.debug("Fasttext was new initialized after failure {}", (Object)newCounter);
                } else {
                    this.fasttextInitCounter.decrementAndGet();
                }
            }
            catch (IOException ex) {
                logger.warn("Restarting fasttext failed {}", (Object)newCounter);
            }
        }
    }

    @Nullable
    private Map.Entry<String, Double> detectLanguageCode(String text, List<String> preferredLangs, boolean limitOnPreferredLangs) {
        boolean wasRemoved;
        List lang = this.languageDetector.getProbabilities((CharSequence)text);
        if (limitOnPreferredLangs && preferredLangs != null && !preferredLangs.isEmpty() && (wasRemoved = lang.removeIf(l -> !preferredLangs.contains(l.getLocale().getLanguage()))) && lang.isEmpty()) {
            logger.warn("No language detected for text after remove all not preferred languages from score.");
        }
        if (lang.size() > 0) {
            String code = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getLocale().getLanguage();
            double prob = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getProbability();
            return new AbstractMap.SimpleImmutableEntry<String, Double>(code, prob);
        }
        return null;
    }
}

