package pl.edu.icm.cermine.bibref.parsing.tools;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
import pl.edu.icm.cermine.bibref.parsing.model.Citation;
import pl.edu.icm.cermine.bibref.parsing.model.CitationToken;
import pl.edu.icm.cermine.bibref.transformers.BibEntryToNLMElementConverter;
import pl.edu.icm.cermine.exception.TransformationException;

/* loaded from: input_file:pl/edu/icm/cermine/bibref/parsing/tools/CrossMalletTrainingFileGenerator.class */
public final class CrossMalletTrainingFileGenerator {
    private static String nlmFile = "/home/domin/phd-metadata-extraction/results/citations/dataset/citations.nxml";
    private static String outFile = "/home/domin/phd-metadata-extraction/results/citations/training/training";
    private static String outValidFile = "/home/domin/phd-metadata-extraction/results/citations/training/validation";

    /* JADX WARN: Finally extract failed */
    public static void main(String[] strArr) throws JDOMException, IOException, TransformationException {
        File file = new File(nlmFile);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(file);
            List<Citation> extractCitations = NlmCitationExtractor.extractCitations(new InputSource(fileInputStream));
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            Collections.shuffle(extractCitations, new Random(5394L));
            arrayList.addAll(extractCitations.subList(0, 2000));
            arrayList2.addAll(extractCitations.subList(2000, 4000));
            File file2 = new File(outValidFile);
            Iterator<Citation> it = extractCitations.iterator();
            while (it.hasNext()) {
                for (CitationToken citationToken : it.next().getTokens()) {
                    if (citationToken.getText().matches("^[a-zA-Z]+$")) {
                        FileUtils.writeStringToFile(file2, citationToken.getText().toLowerCase(), true);
                        FileUtils.writeStringToFile(file2, "\n", true);
                    }
                }
            }
            List[] listArr = new List[5];
            for (int i = 0; i < arrayList.size(); i++) {
                if (i < 5) {
                    listArr[i] = new ArrayList();
                }
                listArr[i % 5].add(arrayList.get(i));
            }
            Writer[] writerArr = new Writer[5];
            Writer[] writerArr2 = new Writer[5];
            Writer[] writerArr3 = new Writer[5];
            for (int i2 = 0; i2 < 5; i2++) {
                writerArr[i2] = new FileWriter(outFile + ".train." + i2);
                writerArr2[i2] = new FileWriter(outFile + ".test." + i2);
                writerArr3[i2] = new FileWriter(outFile + ".nlm." + i2);
            }
            for (int i3 = 0; i3 < listArr.length; i3++) {
                XMLOutputter xMLOutputter = new XMLOutputter(Format.getPrettyFormat());
                BibEntryToNLMElementConverter bibEntryToNLMElementConverter = new BibEntryToNLMElementConverter();
                Element element = new Element("refs");
                int i4 = 1;
                Iterator it2 = listArr[i3].iterator();
                while (it2.hasNext()) {
                    Element convert = bibEntryToNLMElementConverter.convert(CitationUtils.citationToBibref((Citation) it2.next()), new Object[0]);
                    convert.setAttribute("id", String.valueOf(i4));
                    element.addContent(convert);
                    i4++;
                }
                writerArr3[i3].write(xMLOutputter.outputString(element));
            }
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                String str = StringUtils.join(CitationUtils.citationToMalletInputFormat((Citation) arrayList.get(i5)), "\n") + "\n\n";
                for (int i6 = 0; i6 < 5; i6++) {
                    if (i6 == i5 % 5) {
                        writerArr2[i6].write(str);
                    } else {
                        writerArr[i6].write(str);
                    }
                }
            }
            for (int i7 = 0; i7 < 5; i7++) {
                writerArr[i7].flush();
                writerArr2[i7].flush();
                writerArr3[i7].flush();
            }
        } catch (Throwable th) {
            if (fileInputStream != null) {
                fileInputStream.close();
            }
            throw th;
        }
    }

    private CrossMalletTrainingFileGenerator() {
    }
}
