package pl.edu.icm.synat.process.common.harvesting.elsevier.impl;

import java.math.BigDecimal;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pl.edu.icm.model.bwmeta.y.YElement;
import pl.edu.icm.synat.process.common.harvesting.HarvestingResult;
import pl.edu.icm.synat.process.common.harvesting.MetadataHarvester;
import pl.edu.icm.synat.process.common.harvesting.TitleHistory;
import pl.edu.icm.synat.process.common.harvesting.TitleHistoryElement;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.content.PrepareContentUtils;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.content.PrepareContentUtilsImpl;
import pl.edu.icm.synat.process.common.harvesting.metadata.utils.preapre.yelement.PrepareYElementUtils;
import pl.edu.icm.synat.process.common.model.api.Document;

/* loaded from: input_file:pl/edu/icm/synat/process/common/harvesting/elsevier/impl/ElsevierMetadataHarvester.class */
public class ElsevierMetadataHarvester implements MetadataHarvester {
    private static final String SCIENCE_JOURNAL_LINK_PREFIX = "/science/journal/";
    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private static final boolean WITHOUT_BOOKS = true;
    private static final int FIRST = 0;
    private PrepareYElementUtils yelementUtil;
    private PrepareContentUtils contentUtil;
    private static String[] TERMINATION_PHRASES = {"Availability", "Benefits to authors", "Authors should consult", "Hide full aims", "Type of Contributions", "Prospective authors", "Selection of topics to be reviewed"};

    public ElsevierMetadataHarvester() {
        this.yelementUtil = null;
        this.contentUtil = null;
        this.yelementUtil = new PrepareYElementUtils();
        this.contentUtil = new PrepareContentUtilsImpl();
    }

    public void setContentUtil(PrepareContentUtils prepareContentUtils) {
        this.contentUtil = prepareContentUtils;
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public boolean isApplicable(Document document) {
        return this.yelementUtil.isApplicableFor(document, "elsevier", true);
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCover(Document document) {
        HarvestingResult findCoverAndMetadata = findCoverAndMetadata(document);
        if (findCoverAndMetadata != null) {
            findCoverAndMetadata.setDesciption(null);
            findCoverAndMetadata.getCategories().clear();
            findCoverAndMetadata.setImpactFactor((BigDecimal) null);
            findCoverAndMetadata.setOtherTitles(null);
            findCoverAndMetadata.setSiteUrl(null);
        }
        return findCoverAndMetadata;
    }

    @Override // pl.edu.icm.synat.process.common.harvesting.MetadataHarvester
    public HarvestingResult findCoverAndMetadata(Document document) {
        YElement prepareYElement = this.yelementUtil.prepareYElement(document);
        if (FIRST == prepareYElement) {
            return null;
        }
        String issn = getIssn(prepareYElement);
        HarvestingResult harvestingResult = new HarvestingResult();
        if (!harvestFromElsevier(issn, harvestingResult) && !harvestFromScienceDirect(issn, harvestingResult)) {
            return null;
        }
        return harvestingResult;
    }

    private boolean harvestFromScienceDirect(String str, HarvestingResult harvestingResult) {
        String fetchRemoteContent = this.contentUtil.fetchRemoteContent(preparePageUrlScienceDirect(str));
        if (fetchRemoteContent == null) {
            return false;
        }
        org.jsoup.nodes.Document parse = Jsoup.parse(fetchRemoteContent);
        if (StringUtils.isBlank(harvestingResult.getCoverUrl())) {
            harvestingResult.setCoverUrl(findCoverUrlScienceDirect(parse));
        }
        harvestingResult.setOtherTitles(buildHistoryFromScienceDirect(parse));
        return true;
    }

    private TitleHistory buildHistoryFromScienceDirect(org.jsoup.nodes.Document document) {
        TitleHistoryElement parseHistoryElement;
        TitleHistory titleHistory = new TitleHistory();
        Element first = document.select("div > div.info > p").first();
        if (first != null) {
            List<TextNode> childNodes = first.childNodes();
            List<TitleHistoryElement> list = FIRST;
            for (TextNode textNode : childNodes) {
                if (textNode instanceof TextNode) {
                    String text = textNode.text();
                    if (text.startsWith("Formerly known as")) {
                        list = titleHistory.getPreviousTitles();
                    } else if (text.contains("Continued as")) {
                        list = titleHistory.getNextTitles();
                    }
                } else if ((textNode instanceof Element) && (parseHistoryElement = parseHistoryElement((Element) textNode)) != null && list != null) {
                    list.add(parseHistoryElement);
                }
            }
        }
        return titleHistory;
    }

    private TitleHistoryElement parseHistoryElement(Element element) {
        Element first;
        TitleHistoryElement titleHistoryElement = FIRST;
        if (element.tagName().equals("b") && (first = element.getElementsByTag("a").first()) != null) {
            titleHistoryElement = new TitleHistoryElement();
            String text = first.text();
            String attr = first.attr("href");
            titleHistoryElement.setTitleName(text);
            if (attr == null || !attr.startsWith(SCIENCE_JOURNAL_LINK_PREFIX)) {
                this.LOGGER.warn("Link to journal has different syntax {}", attr);
            } else {
                titleHistoryElement.setIssn(attr.substring(SCIENCE_JOURNAL_LINK_PREFIX.length()));
            }
        }
        return titleHistoryElement;
    }

    private String findCoverUrlScienceDirect(org.jsoup.nodes.Document document) {
        Elements select = document.select("div.coverImageDiv > img");
        if (select.isEmpty()) {
            return null;
        }
        return select.attr("src");
    }

    private boolean harvestFromElsevier(String str, HarvestingResult harvestingResult) {
        String preparePageUrlElsevier = preparePageUrlElsevier(str);
        String fetchRemoteContent = this.contentUtil.fetchRemoteContent(preparePageUrlElsevier);
        if (FIRST == fetchRemoteContent) {
            return false;
        }
        this.LOGGER.debug("Processing {}", preparePageUrlElsevier);
        org.jsoup.nodes.Document parse = Jsoup.parse(fetchRemoteContent);
        harvestingResult.setSiteUrl(preparePageUrlElsevier);
        harvestingResult.setCoverUrl(findCoverUrlElsevier(parse));
        harvestingResult.setDesciption(parseAbstractElsevier(parse));
        return true;
    }

    private String findCoverUrlElsevier(org.jsoup.nodes.Document document) {
        Elements select = document.select("img[id=cphContent_imgCoverLink]");
        if (select.isEmpty()) {
            select = document.select("img[id=cphContent_imgCover]");
        }
        if (select.isEmpty()) {
            select = document.select("[class=cover]").select("img");
        }
        return select.attr("src");
    }

    private String getIssn(YElement yElement) {
        List ids = yElement.getIds("bwmeta1.id-class.ISSN");
        if (!ids.isEmpty()) {
            return (String) ids.get(FIRST);
        }
        this.LOGGER.info("No issn for {}", yElement.getId());
        return null;
    }

    private String preparePageUrlScienceDirect(String str) {
        return "http://www.sciencedirect.com/science/journal/" + str;
    }

    private String preparePageUrlElsevier(String str) {
        return "http://www.elsevier.com/locate/issn/" + str;
    }

    private String parseAbstractElsevier(org.jsoup.nodes.Document document) {
        Elements select = document.select("div.full-scope > p");
        StringBuilder sb = new StringBuilder();
        Iterator it = select.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String text = element.text();
            if (isTerminateParagraph(text)) {
                break;
            }
            if (StringUtils.isNotBlank(text)) {
                sb.append("<p>" + element.html() + "</p>");
            }
        }
        String sb2 = sb.toString();
        if (StringUtils.isBlank(sb2)) {
            return null;
        }
        return sb2;
    }

    private static boolean isTerminateParagraph(String str) {
        String[] strArr = TERMINATION_PHRASES;
        int length = strArr.length;
        for (int i = FIRST; i < length; i += WITHOUT_BOOKS) {
            if (str.startsWith(strArr[i])) {
                return true;
            }
        }
        return false;
    }
}
