Ticket #29984: patch-mecab.diff
File patch-mecab.diff, 928 bytes (added by humem (humem), 13 years ago) |
---|
-
app/guts/pipes/DocumentPipe.java
old new 16 16 import cc.mallet.types.Instance; 17 17 import cc.mallet.util.CharSequenceLexer; 18 18 19 import dualist.ja.SimpleMecabPipe; 20 19 21 public class DocumentPipe extends Pipe { 20 22 21 23 private Pipe myPipe = new SerialPipes(new Pipe[] { … … 27 29 new CharSequenceReplace(Pattern.compile("&(.*?);"), ""), 28 30 new CharSequenceReplace(Pattern.compile("[0-9]+"), "00"), 29 31 new CharSequenceLowercase(), 32 (System.getProperty("dualist.lang") != null && 33 System.getProperty("dualist.lang").equals("ja")) ? 34 new SimpleMecabPipe() : 30 35 new CharSequence2TokenSequence(CharSequenceLexer.LEX_WORD_CLASSES), 31 36 new TokenSequenceRemoveStopwords(), 32 37 new TokenSequence2FeatureSequence(),