package org.languagetool.chunking;

import edu.washington.cs.knowitall.regex.Match;
import edu.washington.cs.knowitall.regex.RegularExpression;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedTokenReadings;

/* loaded from: classes2.dex */
public class RussianChunker implements Chunker {
    private static final List<RegularExpressionWithPhraseType> REGEXES1;
    private static final List<RegularExpressionWithPhraseType> REGEXES2;
    private static final Map<String, String> SYNTAX_EXPANSION;
    private static boolean debug;
    private static final Set<String> FILTER_TAGS = new HashSet(Arrays.asList("PP", "NPP", "NPS", "MayMissingYO", "VP", "SBAR", "ADJP", "DPT"));
    private static final TokenExpressionFactory FACTORY = new TokenExpressionFactory(false);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes2.dex */
    public static class AffectedSpans {
        final List<Span> spans;

        AffectedSpans(List<Span> list) {
            this.spans = list;
        }

        boolean isAffected(int i) {
            for (Span span : this.spans) {
                if (i >= span.startIndex && i < span.endIndex) {
                    return true;
                }
            }
            return false;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: classes2.dex */
    public enum PhraseType {
        NP,
        NPS,
        NPP,
        PP,
        MayMissingYO,
        VP,
        SBAR,
        ADJP,
        DPT
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes2.dex */
    public static class RegularExpressionWithPhraseType {
        final RegularExpression<ChunkTaggedToken> expression;
        final boolean overwrite;
        final PhraseType phraseType;

        RegularExpressionWithPhraseType(RegularExpression<ChunkTaggedToken> regularExpression, PhraseType phraseType, boolean z) {
            this.expression = regularExpression;
            this.phraseType = phraseType;
            this.overwrite = z;
        }

        public String toString() {
            return this.phraseType + " <= " + this.expression + " (overwrite: " + this.overwrite + ")";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes2.dex */
    public static class Span {
        final int endIndex;
        final int startIndex;

        Span(int i, int i2) {
            this.startIndex = i;
            this.endIndex = i2;
        }
    }

    static {
        HashMap hashMap = new HashMap();
        SYNTAX_EXPANSION = hashMap;
        hashMap.put("<NP>", "<chunk=B-NP> <chunk=I-NP>*");
        hashMap.put("<VP>", "<chunk=B-VP> <chunk=I-VP>*");
        hashMap.put("<ADJP>", "<chunk=B-ADJP> <chunk=I-ADJP>*");
        hashMap.put("<DPT>", "<chunk=B-DPT> <chunk=I-DPT>*");
        debug = false;
        REGEXES1 = Arrays.asList(build("<posre='NN:(Name|Fam|Patr):.*'> <posre='NN:(Name|Fam|Patr):.*'>+ ", PhraseType.NP, true), build("<posre='NN:Fam:.*'> <regexCS=[А-ЯЁ]> <.> <regexCS=[А-ЯЁ]> <.> ", PhraseType.NP, true), build("<regexCS=[А-ЯЁ]> <.> <regexCS=[А-ЯЁ]> <.> <posre='NN:Fam:.*'> ", PhraseType.NP, true), build("<posre='VB:.*:.*' & !posre='NN:.*'>* ", PhraseType.VP, false), build("<если>", PhraseType.SBAR), build("<поэтому>", PhraseType.SBAR), build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.NP, true), build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> <posre='NN:(Anim|Inanim):.*'> ", PhraseType.NP, true), build("<posre='ADJ:Posit:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(Nom|V)'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), build("<posre='DPT:.*:.*' & !pos='PREP'> ", PhraseType.DPT), build("<posre='DPT:.*:.*' & !pos='PREP'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.DPT, true), build("<posre='DPT:.*:.*' & !pos='PREP'> <posre='PREP'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.DPT, true), build("<posre='PT:.*:.*'> ", PhraseType.ADJP), build("<posre='PT:.*:.*'> <pos='ADV' > ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='NN:.*:.*:(R|D|T|P)' > ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='PREP'> <posre='NN:.*:.*:(R|D|T|P|V)' > ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='PREP'> <posre='ADJ:.*:.*:(R|D|T|P|V)' > <posre='NN:.*:.*:(R|D|T|P|V)' > ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='NN:(Anim|Inanim):.*' & !posre='NN:(Anim|Inanim):.*:(Nom|V)'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='PNN:.*' & !posre='PNN:.*:Nom:.*'> <posre='NN:(Anim|Inanim):.*:(Nom|V)' & !posre='NN:(Anim|Inanim):.*:(R|D|T|P)'> ", PhraseType.ADJP, true), build("<posre='PT:.*:.*'> <posre='ADJ:.*:.*' > ", PhraseType.ADJP, false), build("<тов>", PhraseType.NP));
        REGEXES2 = Arrays.asList(build("<posre=NN:Name:.*> <и> <posre=NN:Name:.*>", PhraseType.NPP, true), build("<posre=NN:Name:.*> <или> <posre=NN:Name:.*>", PhraseType.NPP, true), build("<не> <posre='VB:.*:.*' & !posre='NN:.*'>* ", PhraseType.VP, false));
    }

    private void apply(RegularExpressionWithPhraseType regularExpressionWithPhraseType, List<ChunkTaggedToken> list) {
        String debugString = getDebugString(list);
        try {
            AffectedSpans doApplyRegex = doApplyRegex(regularExpressionWithPhraseType, list);
            String debugString2 = getDebugString(list);
            if (debugString2.equals(debugString)) {
                return;
            }
            printDebugInfo(regularExpressionWithPhraseType, doApplyRegex, debugString2);
        } catch (Exception e) {
            throw new RuntimeException("Could not apply chunk regexp '" + regularExpressionWithPhraseType + "' to tokens: " + list, e);
        }
    }

    private void assignChunksToReadings(List<ChunkTaggedToken> list) {
        for (ChunkTaggedToken chunkTaggedToken : list) {
            AnalyzedTokenReadings readings = chunkTaggedToken.getReadings();
            if (readings != null) {
                readings.setChunkTags(chunkTaggedToken.getChunkTags());
            }
        }
    }

    private static RegularExpressionWithPhraseType build(String str, PhraseType phraseType) {
        return build(str, phraseType, false);
    }

    private static RegularExpressionWithPhraseType build(String str, PhraseType phraseType, boolean z) {
        for (Map.Entry<String, String> entry : SYNTAX_EXPANSION.entrySet()) {
            str = str.replace(entry.getKey(), entry.getValue());
        }
        return new RegularExpressionWithPhraseType(RegularExpression.compile(str, FACTORY), phraseType, z);
    }

    private AffectedSpans doApplyRegex(RegularExpressionWithPhraseType regularExpressionWithPhraseType, List<ChunkTaggedToken> list) {
        List<Match<ChunkTaggedToken>> findAll = regularExpressionWithPhraseType.expression.findAll(list);
        ArrayList arrayList = new ArrayList();
        for (Match<ChunkTaggedToken> match : findAll) {
            arrayList.add(new Span(match.startIndex(), match.endIndex()));
            for (int startIndex = match.startIndex(); startIndex < match.endIndex(); startIndex++) {
                ChunkTaggedToken chunkTaggedToken = list.get(startIndex);
                ArrayList<ChunkTag> arrayList2 = new ArrayList();
                arrayList2.addAll(chunkTaggedToken.getChunkTags());
                if (regularExpressionWithPhraseType.overwrite) {
                    ArrayList arrayList3 = new ArrayList();
                    for (ChunkTag chunkTag : arrayList2) {
                        if (!FILTER_TAGS.contains(chunkTag.getChunkTag())) {
                            arrayList3.add(chunkTag);
                        }
                    }
                    arrayList2 = arrayList3;
                }
                ChunkTag chunkTag2 = getChunkTag(regularExpressionWithPhraseType, match, startIndex);
                if (!arrayList2.contains(chunkTag2)) {
                    arrayList2.add(chunkTag2);
                    arrayList2.remove(new ChunkTag("O"));
                }
                list.set(startIndex, new ChunkTaggedToken(chunkTaggedToken.getToken(), arrayList2, chunkTaggedToken.getReadings()));
            }
        }
        return new AffectedSpans(arrayList);
    }

    private ChunkTag getChunkTag(RegularExpressionWithPhraseType regularExpressionWithPhraseType, Match<ChunkTaggedToken> match, int i) {
        return regularExpressionWithPhraseType.phraseType == PhraseType.NP ? i == match.startIndex() ? new ChunkTag("B-NP") : new ChunkTag("I-NP") : regularExpressionWithPhraseType.phraseType == PhraseType.NPP ? i == match.startIndex() ? new ChunkTag("B-NP-plural") : new ChunkTag("I-NP-plural") : regularExpressionWithPhraseType.phraseType == PhraseType.VP ? i == match.startIndex() ? new ChunkTag("B-VP") : new ChunkTag("I-VP") : regularExpressionWithPhraseType.phraseType == PhraseType.ADJP ? i == match.startIndex() ? new ChunkTag("B-ADJP") : new ChunkTag("I-ADJP") : regularExpressionWithPhraseType.phraseType == PhraseType.DPT ? i == match.startIndex() ? new ChunkTag("B-DPT") : new ChunkTag("I-DPT") : new ChunkTag(regularExpressionWithPhraseType.phraseType.name());
    }

    private String getDebugString(List<ChunkTaggedToken> list) {
        if (!debug) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (ChunkTaggedToken chunkTaggedToken : list) {
            sb.append("  ").append(chunkTaggedToken).append(" -- ").append(chunkTaggedToken.getReadings().toString().replaceFirst(Pattern.quote(chunkTaggedToken.getToken()) + "\\[", "[")).append('\n');
        }
        return sb.toString();
    }

    public static boolean isDebug() {
        return debug;
    }

    private void printDebugInfo(RegularExpressionWithPhraseType regularExpressionWithPhraseType, AffectedSpans affectedSpans, String str) {
        System.out.println("=== Applied " + regularExpressionWithPhraseType + " ===");
        if (regularExpressionWithPhraseType.overwrite) {
            System.out.println("Note: overwrite mode, replacing old " + FILTER_TAGS + " tags");
        }
        int i = 0;
        for (String str2 : str.split("\n")) {
            if (affectedSpans.isAffected(i)) {
                System.out.println(str2.replaceFirst("^  ", " *"));
            } else {
                System.out.println(str2);
            }
            i++;
        }
        System.out.println();
    }

    public static void setDebug(boolean z) {
        debug = z;
    }

    public void addChunkTags(List<AnalyzedTokenReadings> list) {
        List<ChunkTaggedToken> basicChunks = getBasicChunks(list);
        Iterator<RegularExpressionWithPhraseType> it = REGEXES2.iterator();
        while (it.hasNext()) {
            apply(it.next(), basicChunks);
        }
        assignChunksToReadings(basicChunks);
    }

    List<ChunkTaggedToken> getBasicChunks(List<AnalyzedTokenReadings> list) {
        ArrayList arrayList = new ArrayList();
        for (AnalyzedTokenReadings analyzedTokenReadings : list) {
            if (!analyzedTokenReadings.isWhitespace() && !analyzedTokenReadings.getChunkTags().contains(new ChunkTag("MayMissingYO"))) {
                arrayList.add(new ChunkTaggedToken(analyzedTokenReadings.getToken(), Collections.singletonList(new ChunkTag("O")), analyzedTokenReadings));
            }
        }
        if (debug) {
            System.out.println("=============== CHUNKER INPUT ===============");
            System.out.println(getDebugString(arrayList));
        }
        Iterator<RegularExpressionWithPhraseType> it = REGEXES1.iterator();
        while (it.hasNext()) {
            apply(it.next(), arrayList);
        }
        return arrayList;
    }
}
