Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E574D10ECC for ; Sun, 20 Oct 2013 08:22:04 +0000 (UTC) Received: (qmail 60897 invoked by uid 500); 20 Oct 2013 08:22:04 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 60890 invoked by uid 99); 20 Oct 2013 08:22:03 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 20 Oct 2013 08:22:03 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 20 Oct 2013 08:22:02 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 1B0EA2388980; Sun, 20 Oct 2013 08:21:42 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1533858 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko: IndexWord.java KoreanFilter.java Token.java Date: Sun, 20 Oct 2013 08:21:42 -0000 To: commits@lucene.apache.org From: uschindler@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131020082142.1B0EA2388980@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: uschindler Date: Sun Oct 20 08:21:41 2013 New Revision: 1533858 URL: http://svn.apache.org/r1533858 Log: LUCENE-4956: Rename IndexWord to Token like in Kuromoji! Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/Token.java - copied, changed from r1533856, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/IndexWord.java Removed: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/IndexWord.java Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java?rev=1533858&r1=1533857&r2=1533858&view=diff ============================================================================== --- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java (original) +++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanFilter.java Sun Oct 20 08:21:41 2013 @@ -44,7 +44,7 @@ import org.apache.lucene.analysis.tokena public final class KoreanFilter extends TokenFilter { - private final LinkedList morphQueue = new LinkedList();; + private final LinkedList morphQueue = new LinkedList();; private final MorphAnalyzer morph = new MorphAnalyzer(); private final WordSpaceAnalyzer wsAnal = new WordSpaceAnalyzer(); private final CompoundNounAnalyzer cnAnalyzer = new CompoundNounAnalyzer(); @@ -125,7 +125,7 @@ public final class KoreanFilter extends private void setAttributesFromQueue(boolean isFirst) { - final IndexWord iw = morphQueue.removeFirst(); + final Token iw = morphQueue.removeFirst(); final String word = iw.getWord(); final int ofs = iw.getOffset(); @@ -155,8 +155,8 @@ public final class KoreanFilter extends List outputs = morph.analyze(input); if(outputs.size()==0) return; - Map map = new LinkedHashMap(); - if(hasOrigin) map.put("0:"+input, new IndexWord(input,0)); + Map map = new LinkedHashMap(); + if(hasOrigin) map.put("0:"+input, new Token(input,0)); if(outputs.get(0).getScore()>=AnalysisOutput.SCORE_COMPOUNDS) { extractKeyword(outputs,offsetAtt.startOffset(), map, 0); @@ -170,7 +170,7 @@ public final class KoreanFilter extends if(list.size()>1 && wsAnal.getOutputScore(list)>AnalysisOutput.SCORE_ANALYSIS) { int offset = 0; for(AnalysisOutput o : list) { - if(hasOrigin) map.put(o.getSource(), new IndexWord(o.getSource(),offsetAtt.startOffset()+offset,1)); + if(hasOrigin) map.put(o.getSource(), new Token(o.getSource(),offsetAtt.startOffset()+offset,1)); results.addAll(morph.analyze(o.getSource())); offset += o.getSource().length(); } @@ -194,7 +194,7 @@ public final class KoreanFilter extends } - private void extractKeyword(List outputs, int startoffset, Map map, int position) { + private void extractKeyword(List outputs, int startoffset, Map map, int position) { int maxDecompounds = 0; int maxStem = 0; @@ -204,7 +204,7 @@ public final class KoreanFilter extends if(output.getPos()==PatternConstants.POS_VERB) continue; // extract keywords from only noun if(!originCNoun&&output.getCNounList().size()>0) continue; // except compound nound int inc = map.size()>0 ? 0 : 1; - map.put(position+":"+output.getStem(), new IndexWord(output.getStem(),startoffset,inc)); + map.put(position+":"+output.getStem(), new Token(output.getStem(),startoffset,inc)); if(output.getStem().length()>maxStem) maxStem = output.getStem().length(); if(output.getCNounList().size()>maxDecompounds) maxDecompounds = output.getCNounList().size(); @@ -226,7 +226,7 @@ public final class KoreanFilter extends int cStartoffset = getStartOffset(output, i) + startoffset; int inc = i==0 ? 0 : 1; map.put((cPosition)+":"+cEntry.getWord(), - new IndexWord(cEntry.getWord(),cStartoffset,inc)); + new Token(cEntry.getWord(),cStartoffset,inc)); if(bigrammable&&!cEntry.isExist()) cPosition = addBiagramToMap(cEntry.getWord(), cStartoffset, map, cPosition); @@ -245,7 +245,7 @@ public final class KoreanFilter extends } } - private int addBiagramToMap(String input, int startoffset, Map map, int position) { + private int addBiagramToMap(String input, int startoffset, Map map, int position) { int offset = 0; int strlen = input.length(); if(strlen<2) return position; @@ -256,12 +256,12 @@ public final class KoreanFilter extends if(isAlphaNumChar(input.charAt(offset))) { String text = findAlphaNumeric(input.substring(offset)); - map.put(position+":"+text, new IndexWord(text,startoffset+offset,inc)); + map.put(position+":"+text, new Token(text,startoffset+offset,inc)); offset += text.length(); } else { String text = input.substring(offset, offset+2>strlen?strlen:offset+2); - map.put(position+":"+text, new IndexWord(text,startoffset+offset,inc)); + map.put(position+":"+text, new Token(text,startoffset+offset,inc)); offset++; } @@ -303,7 +303,7 @@ public final class KoreanFilter extends */ private void analysisChinese(String term) { - morphQueue.add(new IndexWord(term,0)); + morphQueue.add(new Token(term,0)); if(term.length()<2) return; // 1글자 한자는 색인어로 한글을 추출하지 않는다. List candiList = new ArrayList(); @@ -350,7 +350,7 @@ public final class KoreanFilter extends if(candiList.size() cnounMap = new HashMap(); @@ -368,13 +368,13 @@ public final class KoreanFilter extends // 한글과 매치되는 한자를 짤라서 큐에 저장한다. // nocommit: this is avoiding AIOOBE, original code: // morphQueue.add(new IndexWord(term.substring(offset,pos),offset)); - morphQueue.add(new IndexWord(term.substring(offset,Math.min(pos, term.length())),offset)); + morphQueue.add(new Token(term.substring(offset,Math.min(pos, term.length())),offset)); cnounMap.put(entry.getWord(), entry.getWord()); if(entry.getWord().length()<2) continue; // 한글은 2글자 이상만 저장한다. // 분리된 한글을 큐에 저장한다. - morphQueue.add(new IndexWord(entry.getWord(),offset)); + morphQueue.add(new Token(entry.getWord(),offset)); offset = pos; } @@ -402,7 +402,7 @@ public final class KoreanFilter extends buffer[bufferLength-2] == '\'' && (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) { // Strip last 2 characters off - morphQueue.add(new IndexWord(term.substring(0,bufferLength - 2),0)); + morphQueue.add(new Token(term.substring(0,bufferLength - 2),0)); } else if (type == ACRONYM_TYPE) { // remove dots int upto = 0; for(int i=0;i type private String type; - public IndexWord() { + public Token() { } - public IndexWord(String word, int pos) { + public Token(String word, int pos) { this.word = word; this.offset = pos; } - public IndexWord(String word, int pos, int inc) { + public Token(String word, int pos, int inc) { this(word, pos); this.increment = inc; } - public IndexWord(String word, int pos, int inc, String t) { + public Token(String word, int pos, int inc, String t) { this(word, pos, inc); this.type = t; }