package pl.edu.icm.synat.process.common.utils;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.cybozu.labs.langdetect.Language;
import com.google.common.collect.Sets;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.model.bwmeta.y.YLanguage;
import pl.edu.icm.synat.application.exception.GeneralBusinessException;

/* loaded from: input_file:pl/edu/icm/synat/process/common/utils/LangDetectUtil.class */
public class LangDetectUtil {
    private static boolean loaded;
    private static final float MINIMUM_PROBABILITY = 0.95f;
    private static final float MINIMUM_PROBABILITY_FROM_SET = 0.75f;
    private static final Logger log = LoggerFactory.getLogger(LangDetectUtil.class);
    private static final Set<YLanguage> detectableLanguages = Sets.immutableEnumSet(YLanguage.Polish, new YLanguage[]{YLanguage.English, YLanguage.Spanish, YLanguage.French, YLanguage.German, YLanguage.Italian, YLanguage.Russian, YLanguage.Czech, YLanguage.Hungarian});
    private static final Set<YLanguage> supportedLanguages = Sets.immutableEnumSet(YLanguage.Polish, new YLanguage[]{YLanguage.English});
    private static final Set<YLanguage> unknownLanguages = Sets.immutableEnumSet(YLanguage.Undetermined, new YLanguage[]{YLanguage.NoLinguisticContent});

    public static synchronized void loadData() {
        if (loaded) {
            return;
        }
        loaded = true;
        ArrayList arrayList = new ArrayList();
        try {
            Charset forName = Charset.forName("UTF-8");
            Iterator<YLanguage> it = detectableLanguages.iterator();
            while (it.hasNext()) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + it.next().getShortCode()), forName));
                arrayList.add(new String(IOUtils.toCharArray(bufferedReader)));
                bufferedReader.close();
            }
            DetectorFactory.loadProfile(arrayList);
            DetectorFactory.setSeed(System.currentTimeMillis());
        } catch (IOException | LangDetectException e) {
            throw new GeneralBusinessException(e);
        }
    }

    public static boolean isSupported(YLanguage yLanguage) {
        return supportedLanguages.contains(yLanguage);
    }

    public static boolean isUndefined(YLanguage yLanguage) {
        return unknownLanguages.contains(yLanguage);
    }

    public static YLanguage processLanguage(Collection<String> collection, YLanguage yLanguage) {
        loadData();
        boolean z = true;
        Iterator<String> it = collection.iterator();
        while (it.hasNext()) {
            z = z && it.next().isEmpty();
        }
        if (yLanguage.getShortCode().isEmpty() && unknownLanguages.contains(yLanguage) && !z) {
            try {
                Detector create = DetectorFactory.create(0.5d);
                Iterator<String> it2 = collection.iterator();
                while (it2.hasNext()) {
                    create.append(it2.next());
                }
                Iterator it3 = create.getProbabilities().iterator();
                while (it3.hasNext()) {
                    YLanguage byCode = YLanguage.byCode(((Language) it3.next()).lang);
                    if (isSupported(byCode)) {
                        return byCode;
                    }
                }
                return yLanguage;
            } catch (LangDetectException e) {
                log.debug("Couldn't determine content language", e);
            }
        }
        return yLanguage;
    }

    public static YLanguage getLanguage(String str, Set<YLanguage> set) {
        try {
            Detector create = DetectorFactory.create(0.5d);
            create.append(str);
            return detectLanguage(set, create);
        } catch (LangDetectException e) {
            log.debug("Couldn't determine content language", e);
            return YLanguage.Undetermined;
        }
    }

    public static YLanguage getLanguage(Reader reader, Set<YLanguage> set) {
        try {
            Detector create = DetectorFactory.create(0.5d);
            create.append(reader);
            return detectLanguage(set, create);
        } catch (LangDetectException | IOException e) {
            log.debug("Couldn't determine content language", e);
            return YLanguage.Undetermined;
        }
    }

    private static YLanguage detectLanguage(Set<YLanguage> set, Detector detector) throws LangDetectException {
        Iterator it = detector.getProbabilities().iterator();
        while (it.hasNext()) {
            Language language = (Language) it.next();
            YLanguage byCode = YLanguage.byCode(language.lang);
            if (language.prob > 0.949999988079071d) {
                return byCode;
            }
            if (language.prob > 0.75d && set.contains(byCode)) {
                return byCode;
            }
        }
        return YLanguage.Undetermined;
    }
}
