/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.formats.leipzig;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.stream.Collectors;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;

public class LeipzigLanguageSampleStream
implements ObjectStream<LanguageSample> {
    private final int sentencesPerSample;
    private Map<String, Integer> langSampleCounts;
    private File[] sentencesFiles;
    private Iterator<File> sentencesFilesIt;
    private ObjectStream<LanguageSample> sampleStream;

    public LeipzigLanguageSampleStream(File leipzigFolder, int sentencesPerSample, int samplesPerLanguage) throws IOException {
        this.sentencesPerSample = sentencesPerSample;
        this.sentencesFiles = leipzigFolder.listFiles();
        Arrays.sort(this.sentencesFiles);
        Map<String, Integer> langCounts = Arrays.stream(this.sentencesFiles).map(file -> file.getName().substring(0, 3)).collect(Collectors.groupingBy(String::toString, Collectors.summingInt(v -> 1)));
        this.langSampleCounts = langCounts.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> samplesPerLanguage / (Integer)e.getValue()));
        this.reset();
    }

    @Override
    public LanguageSample read() throws IOException {
        LanguageSample sample;
        if (this.sampleStream != null && (sample = this.sampleStream.read()) != null) {
            return sample;
        }
        if (this.sentencesFilesIt.hasNext()) {
            File sentencesFile = this.sentencesFilesIt.next();
            System.out.println(sentencesFile);
            String lang = sentencesFile.getName().substring(0, 3);
            this.sampleStream = new LeipzigSentencesStream(lang, sentencesFile, this.sentencesPerSample, this.langSampleCounts.get(lang));
            return this.read();
        }
        return null;
    }

    @Override
    public void reset() throws IOException {
        this.sentencesFilesIt = Arrays.asList(this.sentencesFiles).iterator();
        this.sampleStream = null;
    }

    public static void main(String[] args) throws Exception {
        new LeipzigLanguageSampleStream(new File("/home/blue/opennlp-data-dir/leipzig-lang"), 10, 100000);
    }

    private class LeipzigSentencesStream
    implements ObjectStream<LanguageSample> {
        private final String lang;
        private int sentencesPerSample;
        private int numberOfSamples;
        private ObjectStream<String> lineStream;
        private int sampleCount;

        LeipzigSentencesStream(String lang, File sentencesFile, int sentencesPerSample, int numberOfSamples) throws IOException {
            this.lang = sentencesFile.getName().substring(0, 3);
            this.sentencesPerSample = sentencesPerSample;
            this.numberOfSamples = numberOfSamples;
            this.lineStream = new PlainTextByLineStream((InputStreamFactory)new MarkableFileInputStreamFactory(sentencesFile), StandardCharsets.UTF_8);
        }

        @Override
        public LanguageSample read() throws IOException {
            if (this.sampleCount < this.numberOfSamples) {
                String line;
                StringBuilder sampleString = new StringBuilder();
                for (int count = 0; count < this.sentencesPerSample && (line = this.lineStream.read()) != null; ++count) {
                    int textStart = line.indexOf(9) + 1;
                    sampleString.append(line.substring(textStart) + " ");
                }
                if (sampleString.length() > 0) {
                    ++this.sampleCount;
                    return new LanguageSample(new Language(this.lang), sampleString);
                }
            }
            return null;
        }
    }
}

