/*
 * Decompiled with CFR 0.152.
 */
package ext.lib.nlp;

import com.atilika.kuromoji.TokenizerBase;
import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;
import ext.base.core.BcUnicode;
import ext.lib.nlp.KrmjTokenWrap;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

public class KuromojiUtil {
    static List<String> speechLevel1 = new ArrayList<String>();
    static List<String> speechLevel2;

    static {
        speechLevel1.add("\u52a9\u52d5\u8a5e");
        speechLevel1.add("\u52a9\u52d5\u8a5e");
        speechLevel2 = new ArrayList<String>();
        speechLevel2.add("\u53e5\u70b9");
        speechLevel2.add("\u8aad\u70b9");
        speechLevel2.add("\u4ee3\u540d\u8a5e");
        speechLevel2.add("\u526f\u8a5e\u53ef\u80fd");
        speechLevel2.add("\u6570");
    }

    private KuromojiUtil() {
    }

    public static List<Token> tokenize(String src) throws Exception {
        Tokenizer.Builder tokenizerBuilder = new Tokenizer.Builder().mode(TokenizerBase.Mode.NORMAL);
        InputStream is = KuromojiUtil.class.getResourceAsStream("person_name_dic.csv");
        tokenizerBuilder.userDictionary(is);
        Tokenizer tokenizer = tokenizerBuilder.build();
        List tokenList = tokenizer.tokenize(src);
        return tokenList;
    }

    public static List<KrmjTokenWrap> tokenWrapList(List<Token> tokenList) throws Exception {
        ArrayList<KrmjTokenWrap> tokenWrapList = new ArrayList<KrmjTokenWrap>();
        int i = 0;
        while (i < tokenList.size()) {
            Token token = tokenList.get(i);
            KrmjTokenWrap tokenWrap = new KrmjTokenWrap();
            tokenWrap.index = i++;
            tokenWrap.token = token;
            tokenWrapList.add(tokenWrap);
        }
        return tokenWrapList;
    }

    public static String toString(List<Token> tokenList) throws Exception {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < tokenList.size()) {
            Token token = tokenList.get(i);
            sb.append(token.getSurface());
            ++i;
        }
        return sb.toString();
    }

    public static String toString2(List<KrmjTokenWrap> tokenWrapList) throws Exception {
        List<Token> tokenList = KuromojiUtil.toTokenList(tokenWrapList, false);
        return KuromojiUtil.toString(tokenList);
    }

    public static List<Token> toTokenList(List<KrmjTokenWrap> tokenWrapList, boolean spaseExclusion) throws Exception {
        ArrayList<Token> tokenList = new ArrayList<Token>();
        int i = 0;
        while (i < tokenWrapList.size()) {
            KrmjTokenWrap tokenWrap = tokenWrapList.get(i);
            if (spaseExclusion) {
                if (!tokenWrap.token.getSurface().equals(" ")) {
                    tokenList.add(tokenWrap.token);
                }
            } else {
                tokenList.add(tokenWrap.token);
            }
            ++i;
        }
        return tokenList;
    }

    public static void check(List<KrmjTokenWrap> tokenWrapList) throws Exception {
        int i = 0;
        while (i < tokenWrapList.size()) {
            KrmjTokenWrap tokenWrap = tokenWrapList.get(i);
            Token token = tokenWrap.token;
            if (speechLevel1.contains(token.getPartOfSpeechLevel1())) {
                ++tokenWrap.cnt;
            } else if (speechLevel2.contains(token.getPartOfSpeechLevel2())) {
                ++tokenWrap.cnt;
            } else if ("=".equals(token.getSurface())) {
                ++tokenWrap.cnt;
            }
            ++i;
        }
    }

    public static String toTokenizedString(List<KrmjTokenWrap> tokenWrapList) throws Exception {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < tokenWrapList.size()) {
            KrmjTokenWrap tokenWrap = tokenWrapList.get(i);
            if (i > 0) {
                sb.append((Object)BcUnicode.Unicode.SPACE);
            }
            sb.append(tokenWrap.token.getSurface());
            ++i;
        }
        return sb.toString();
    }

    public static String createInfo(Token token) throws Exception {
        StringBuilder sb = new StringBuilder();
        sb.append("pos=" + token.getPosition());
        sb.append(", urface: " + token.getSurface());
        sb.append(", reading: " + token.getReading());
        sb.append(", getPartOfSpeechLevel: " + token.getPartOfSpeechLevel1() + "-" + token.getPartOfSpeechLevel2() + "-" + token.getPartOfSpeechLevel3() + "-" + token.getPartOfSpeechLevel4());
        sb.append(", isKnown: " + token.isKnown());
        sb.append(", user: " + token.isUser());
        return sb.toString();
    }

    public static String createInfo(List<Token> tokenList) throws Exception {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < tokenList.size()) {
            Token token = tokenList.get(i);
            if (i > 0) {
                sb.append("\n");
            }
            sb.append(KuromojiUtil.createInfo(token));
            ++i;
        }
        return sb.toString();
    }

    public static String createInfo2(List<KrmjTokenWrap> tokenWrapList) throws Exception {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < tokenWrapList.size()) {
            KrmjTokenWrap tokenWrap = tokenWrapList.get(i);
            if (i > 0) {
                sb.append("\n");
            }
            sb.append(KuromojiUtil.createInfo(tokenWrap.token));
            ++i;
        }
        return sb.toString();
    }
}

