package pl.edu.icm.cermine.bibref.parsing.tools;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.bibref.model.BibEntry;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.bibref.parsing.model.CitationTokenLabel;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.exception.TransformationException;

/* loaded from: input_file:pl/edu/icm/cermine/bibref/parsing/tools/CoraRefToNLM.class */
public final class CoraRefToNLM {
    private static String nlmDir = "/home/domin/cermine-tests/mixed.citations.xml";
    private static String outNLM = "/home/domin/phd-metadata-extraction/results/citations/citations.nxml";
    private static String outBT = "/home/domin/phd-metadata-extraction/results/citations/citations.bibtex";
    private static String outTXT = "/home/domin/phd-metadata-extraction/results/citations/citations.txt";

    /* JADX WARN: Finally extract failed */
    public static void main(String[] strArr) throws JDOMException, IOException, TransformationException {
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(new File(nlmDir));
            List<Citation> extractCitations = NlmCitationExtractor.extractCitations(new InputSource(fileInputStream));
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            HashSet hashSet = new HashSet();
            int i = 0;
            int i2 = 0;
            for (Citation citation : extractCitations) {
                int i3 = 0;
                boolean z = true;
                for (CitationToken citationToken : citation.getTokens()) {
                    hashSet.add(citationToken.getLabel());
                    if (CitationTokenLabel.YEAR.equals(citationToken.getLabel()) && citationToken.getText().length() < 4) {
                        citationToken.setLabel(CitationTokenLabel.TEXT);
                    }
                    if (CitationTokenLabel.ARTICLE_TITLE.equals(citationToken.getLabel()) && i3 < citation.getTokens().size() - 1) {
                        CitationToken citationToken2 = citation.getTokens().get(i3 + 1);
                        if ((citationToken.getText().equals(".") || citationToken.getText().equals(",")) && !CitationTokenLabel.ARTICLE_TITLE.equals(citationToken2.getLabel())) {
                            citationToken.setLabel(CitationTokenLabel.TEXT);
                        }
                    }
                    if (CitationTokenLabel.SOURCE.equals(citationToken.getLabel()) && i3 < citation.getTokens().size() - 1) {
                        CitationToken citationToken3 = citation.getTokens().get(i3 + 1);
                        if ((citationToken.getText().equals(".") || citationToken.getText().equals(",")) && !CitationTokenLabel.SOURCE.equals(citationToken3.getLabel())) {
                            citationToken.setLabel(CitationTokenLabel.TEXT);
                        }
                    }
                    if (CitationTokenLabel.PAGEF.equals(citationToken.getLabel())) {
                        if (!citationToken.getText().matches(".*\\d")) {
                            citationToken.setLabel(CitationTokenLabel.TEXT);
                        } else if (z) {
                            citationToken.setLabel(CitationTokenLabel.PAGEF);
                            z = false;
                        } else {
                            citationToken.setLabel(CitationTokenLabel.PAGEL);
                            z = true;
                        }
                    }
                    i3++;
                }
                int i4 = 0;
                CitationTokenLabel citationTokenLabel = CitationTokenLabel.TEXT;
                for (CitationToken citationToken4 : citation.getTokens()) {
                    if (CitationTokenLabel.SOURCE.equals(citationTokenLabel) && CitationTokenLabel.TEXT.equals(citationToken4.getLabel()) && citationToken4.getText().matches("\\d+")) {
                        citationToken4.setLabel(CitationTokenLabel.VOLUME);
                    }
                    if (CitationTokenLabel.VOLUME.equals(citationTokenLabel) && CitationTokenLabel.TEXT.equals(citationToken4.getLabel()) && citationToken4.getText().matches("\\d+")) {
                        citationToken4.setLabel(CitationTokenLabel.ISSUE);
                    }
                    if (!CitationTokenLabel.TEXT.equals(citationToken4.getLabel())) {
                        citationTokenLabel = citationToken4.getLabel();
                    }
                    i4++;
                }
                BibEntry citationToBibref = CitationUtils.citationToBibref(citation);
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                Iterator<String> it = citationToBibref.getAllFieldValues(BibEntry.FIELD_AUTHOR).iterator();
                while (true) {
                    if (it.hasNext()) {
                        i++;
                        String replaceAll = it.next().trim().replaceAll(" +,", ",").replaceAll("\\.", " ").replaceAll(",", ", ").replaceAll(" +", " ");
                        Pattern compile = Pattern.compile("^[A-Z] [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        Pattern compile2 = Pattern.compile("^[A-Z] [A-Z] [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        Pattern compile3 = Pattern.compile("^[A-Z][-A-Za-z]+, [A-Z] ?($|,|and|AND|&)");
                        Pattern compile4 = Pattern.compile("^[A-Z][-A-Za-z]+, [A-Z] ?[A-Z] ?($|,|and|AND|&)");
                        Pattern compile5 = Pattern.compile("^[A-Z][-A-Za-z]+ [A-Z] [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        Pattern compile6 = Pattern.compile("^[A-Z][-A-Za-z]+ [A-Z] [A-Z] [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        Pattern compile7 = Pattern.compile("^[A-Z][-A-Za-z]+ [A-Z] ?($|,|and|AND|&)");
                        Pattern compile8 = Pattern.compile("^[A-Z][-A-Za-z]+ [A-Z] [A-Z] ?($|,|and|AND|&)");
                        Pattern compile9 = Pattern.compile("^[A-Z][-A-Za-z]+, [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        Pattern compile10 = Pattern.compile("^[A-Z][-A-Za-z]+ [A-Z][-A-Za-z]+ ?($|,|and|AND|&)");
                        while (!replaceAll.isEmpty()) {
                            replaceAll = replaceAll.trim().replaceAll("^, ", "").replaceAll("^and ", "").replaceAll("^& ", "");
                            Matcher matcher = compile.matcher(replaceAll);
                            Matcher matcher2 = compile2.matcher(replaceAll);
                            Matcher matcher3 = compile3.matcher(replaceAll);
                            Matcher matcher4 = compile4.matcher(replaceAll);
                            Matcher matcher5 = compile5.matcher(replaceAll);
                            Matcher matcher6 = compile6.matcher(replaceAll);
                            Matcher matcher7 = compile7.matcher(replaceAll);
                            Matcher matcher8 = compile8.matcher(replaceAll);
                            Matcher matcher9 = compile9.matcher(replaceAll);
                            Matcher matcher10 = compile10.matcher(replaceAll);
                            if (matcher.find() && matcher.start() == 0) {
                                String group = matcher.group();
                                arrayList.add(group.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                d(group.substring(2).replaceAll("[^-A-Za-z].*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                replaceAll = replaceAll.substring(matcher.end());
                            } else if (matcher2.find() && matcher2.start() == 0) {
                                String group2 = matcher2.group();
                                arrayList.add(group2.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                arrayList.add(group2.substring(2, 3));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                d(group2.substring(4).replaceAll("[^-A-Za-z].*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                replaceAll = replaceAll.substring(matcher2.end());
                            } else if (matcher3.find() && matcher3.start() == 0) {
                                String group3 = matcher3.group();
                                d(group3.replaceAll(",.*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                arrayList.add(group3.replaceAll("[^, ]+, ", "").substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                replaceAll = replaceAll.substring(matcher3.end());
                            } else if (matcher4.find() && matcher4.start() == 0) {
                                String group4 = matcher4.group();
                                d(group4.replaceAll(",.*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                String replaceAll2 = group4.replaceAll("[^, ]+, ", "");
                                arrayList.add(replaceAll2.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                arrayList.add(replaceAll2.substring(1).trim().substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                replaceAll = replaceAll.substring(matcher4.end());
                            } else if (matcher5.find() && matcher5.start() == 0) {
                                String group5 = matcher5.group();
                                d(group5.replaceAll(" .*", ""), arrayList, arrayList2, CitationTokenLabel.GIVENNAME);
                                String replaceAll3 = group5.replaceAll("^[-a-zA-Z]+ ", "");
                                arrayList.add(replaceAll3.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                d(replaceAll3.substring(1).trim().replaceAll("( |,|&).*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                replaceAll = replaceAll.substring(matcher5.end());
                            } else if (matcher6.find() && matcher6.start() == 0) {
                                String group6 = matcher6.group();
                                d(group6.replaceAll(" .*", ""), arrayList, arrayList2, CitationTokenLabel.GIVENNAME);
                                String replaceAll4 = group6.replaceAll("^[-a-zA-Z]+ ", "");
                                arrayList.add(replaceAll4.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                String trim = replaceAll4.substring(1).trim();
                                arrayList.add(trim.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                d(trim.substring(1).trim().replaceAll("( |,|&).*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                replaceAll = replaceAll.substring(matcher6.end());
                            } else if (matcher7.find() && matcher7.start() == 0) {
                                String group7 = matcher7.group();
                                d(group7.replaceAll(" .*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                arrayList.add(group7.replaceAll("^[^ ]+ ", "").substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                replaceAll = replaceAll.substring(matcher7.end());
                            } else if (matcher8.find() && matcher8.start() == 0) {
                                String group8 = matcher8.group();
                                d(group8.replaceAll(" .*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                String replaceAll5 = group8.replaceAll("^[^ ]+ ", "");
                                arrayList.add(replaceAll5.substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                arrayList.add(replaceAll5.substring(1).trim().substring(0, 1));
                                arrayList2.add(CitationTokenLabel.GIVENNAME);
                                replaceAll = replaceAll.substring(matcher8.end());
                            } else if (matcher9.find() && matcher9.start() == 0) {
                                String group9 = matcher9.group();
                                d(group9.replaceAll(",.*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                d(group9.replaceAll("^[^ ]+ ", "").replaceAll("( |,|&).*", ""), arrayList, arrayList2, CitationTokenLabel.GIVENNAME);
                                replaceAll = replaceAll.substring(matcher9.end());
                            } else {
                                if (!matcher10.find() || matcher10.start() != 0) {
                                    break;
                                }
                                String group10 = matcher10.group();
                                d(group10.replaceAll(" .*", ""), arrayList, arrayList2, CitationTokenLabel.GIVENNAME);
                                d(group10.replaceAll("^[^ ]+ ", "").replaceAll("( |,|&).*", ""), arrayList, arrayList2, CitationTokenLabel.SURNAME);
                                replaceAll = replaceAll.substring(matcher10.end());
                            }
                        }
                        if (replaceAll.isEmpty()) {
                            i2++;
                        }
                    } else {
                        int i5 = 0;
                        CitationToken citationToken5 = null;
                        for (CitationToken citationToken6 : citation.getTokens()) {
                            if (CitationTokenLabel.SURNAME.equals(citationToken6.getLabel())) {
                                if (arrayList.isEmpty()) {
                                    break;
                                }
                                if (i5 < arrayList.size() && citationToken6.getText().equals(arrayList.get(i5))) {
                                    citationToken6.setLabel((CitationTokenLabel) arrayList2.get(i5));
                                    i5++;
                                } else if (citationToken5 != null && CitationTokenLabel.GIVENNAME.equals(citationToken5.getLabel()) && citationToken6.getText().equals(".")) {
                                    citationToken6.setLabel(CitationTokenLabel.GIVENNAME);
                                } else {
                                    citationToken6.setLabel(CitationTokenLabel.TEXT);
                                }
                            }
                            citationToken5 = citationToken6;
                        }
                    }
                }
            }
            File file = new File(outNLM);
            File file2 = new File(outBT);
            File file3 = new File(outTXT);
            BibEntryToNLMElementConverter bibEntryToNLMElementConverter = new BibEntryToNLMElementConverter();
            XMLOutputter xMLOutputter = new XMLOutputter(Format.getRawFormat());
            int i6 = 3421;
            for (Citation citation2 : extractCitations) {
                Element convert = bibEntryToNLMElementConverter.convert(CitationUtils.citationToBibref(citation2), new Object[0]);
                int i7 = i6;
                i6++;
                convert.setAttribute("id", String.valueOf(i7));
                FileUtils.writeStringToFile(file, xMLOutputter.outputString(convert), true);
                FileUtils.writeStringToFile(file2, CitationUtils.citationToBibref(citation2).toBibTeX(), true);
                FileUtils.writeStringToFile(file3, CitationUtils.citationToBibref(citation2).getText(), true);
                FileUtils.writeStringToFile(file, "\n", true);
                FileUtils.writeStringToFile(file2, "\n", true);
                FileUtils.writeStringToFile(file3, "\n", true);
            }
        } catch (Throwable th) {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            throw th;
        }
    }

    private static void d(String str, List<String> list, List<CitationTokenLabel> list2, CitationTokenLabel citationTokenLabel) {
        while (!str.isEmpty()) {
            if (str.charAt(0) == '-') {
                list.add("-");
                list2.add(citationTokenLabel);
                str = str.substring(1);
            } else {
                list.add(str.replaceAll("-.*", ""));
                list2.add(citationTokenLabel);
                str = str.replaceAll("^[a-zA-Z]+", "");
            }
        }
    }

    private CoraRefToNLM() {
    }
}
