package pl.edu.icm.synat.process.common.harvesting.acs.impl;

import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.synat.process.common.harvesting.HarvestingResult;
import pl.edu.icm.synat.process.common.harvesting.MetadataHarvester;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.content.PrepareContentUtils;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.yelement.PrepareYElementUtils;
import pl.edu.icm.synat.process.common.model.api.Document;

/* loaded from: input_file:pl/edu/icm/synat/process/common/harvesting/acs/impl/AcsMetadataHarvester.class */
public class AcsMetadataHarvester implements MetadataHarvester {
    private static final String ACS_HOST = "http://pubs.acs.org";
    private PrepareContentUtils contentUtil = new PrepareContentUtils();
    private Logger logger = LoggerFactory.getLogger(getClass());
    private PrepareYElementUtils yelementUtil = new PrepareYElementUtils();

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public boolean isApplicable(Document document) {
        return this.yelementUtil.isApplicableFor(document, "acs", false);
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCover(Document document) {
        return fetchJournalDataByDocument(document, true);
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCoverAndMetadata(Document document) {
        return fetchJournalDataByDocument(document, false);
    }

    private HarvestingResult fetchJournalDataByDocument(Document document, boolean z) {
        List ids = this.yelementUtil.prepareYElement(document).getIds("bwmeta1.id-class.ISSN");
        return !ids.isEmpty() ? fetchJournalDataByIssn((String) ids.get(0), z) : new HarvestingResult();
    }

    private HarvestingResult fetchJournalDataByIssn(String str, boolean z) {
        HarvestingResult harvestingResult = new HarvestingResult();
        String findAboutUrl = findAboutUrl(str);
        String parseJId = parseJId(findAboutUrl);
        if (findAboutUrl != null) {
            harvestingResult.setSiteUrl(buildSiteUrl(parseJId));
            String fetchRemoteContent = this.contentUtil.fetchRemoteContent(findAboutUrl);
            if (fetchRemoteContent != null) {
                org.jsoup.nodes.Document parse = Jsoup.parse(fetchRemoteContent);
                harvestingResult.setCoverUrl(parseCoverUrl(parse, parseJId));
                if (!z) {
                    harvestingResult.setDesciption(parseDescription(parse));
                }
            } else {
                this.logger.warn("ACS About site for issn: {} does not exists [{}]", str, findAboutUrl);
            }
        } else {
            this.logger.warn("ACS About site not found for issn: {}", str);
        }
        return harvestingResult;
    }

    private String buildSiteUrl(String str) {
        return "http://pubs.acs.org/loi/" + str;
    }

    private String parseDescription(org.jsoup.nodes.Document document) {
        Elements select = document.select("div[id=textArea] :containsOwn(Journal Scope) ~ p");
        if (select.size() > 0) {
            return select.first().html();
        }
        this.logger.warn("Description not found");
        return null;
    }

    private String parseCoverUrl(org.jsoup.nodes.Document document, String str) {
        String parseCoverUrlInDocument = parseCoverUrlInDocument(document, str);
        return parseCoverUrlInDocument != null ? parseCoverUrlInDocument : "http://pubs.acs.org/action/showCoverImage?journalCode=" + str;
    }

    private String parseCoverUrlInDocument(org.jsoup.nodes.Document document, String str) {
        Elements select = document.select("div[id=textArea] img[alt=Journal Cover]");
        if (select.size() == 1) {
            return addAcsHostIfNeeded(select.first().attr("src"));
        }
        this.logger.warn("Image not found");
        return null;
    }

    private String addAcsHostIfNeeded(String str) {
        return str.startsWith("http") ? str : ACS_HOST + str;
    }

    private String findAboutUrl(String str) {
        String fetchRemoteContent = this.contentUtil.fetchRemoteContent("http://pubs.acs.org/openurl?genre=journal&issn=" + str);
        if (fetchRemoteContent == null) {
            return null;
        }
        Elements select = Jsoup.parse(fetchRemoteContent).select("#navAbout > a");
        if (select.size() <= 0) {
            return null;
        }
        String attr = select.first().attr("href");
        if (StringUtils.isNotBlank(attr)) {
            return addAcsHostIfNeeded(attr);
        }
        return null;
    }

    private String parseJId(String str) {
        if (str == null || !str.endsWith("/about.html")) {
            return null;
        }
        String[] split = str.split("/");
        if (split.length > 1) {
            return split[split.length - 2];
        }
        return null;
    }
}
