lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From k...@apache.org
Subject svn commit: r1346439 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ lucene/analysis/common/src/java/org/apache/lucene/analys...
Date Tue, 05 Jun 2012 15:48:46 GMT
Author: koji
Date: Tue Jun  5 15:48:44 2012
New Revision: 1346439

URL: http://svn.apache.org/viewvc?rev=1346439&view=rev
Log:
LUCENE-4113: FVH: FieldTermStack.TermInfo should provide term-weight, part of LUCENE-3440

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/package.html
  (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/package.html
  (props changed)
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/lib/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/testlogging.properties   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=1346439&r1=1346438&r2=1346439&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
Tue Jun  5 15:48:44 2012
@@ -105,12 +105,17 @@ public class FieldPhraseList {
       }
     }
   }
-  
+
   public void addIfNoOverlap( WeightedPhraseInfo wpi ){
-    for( WeightedPhraseInfo existWpi : phraseList ){
-      if( existWpi.isOffsetOverlap( wpi ) ) return;
+    for( WeightedPhraseInfo existWpi : getPhraseList() ){
+      if( existWpi.isOffsetOverlap( wpi ) ) {
+        // WeightedPhraseInfo.addIfNoOverlap() dumps the second part of, for example, hyphenated
words (social-economics). 
+        // The result is that all informations in TermInfo are lost and not available for
further operations. 
+        existWpi.getTermsInfos().addAll( wpi.getTermsInfos() );
+        return;
+      }
     }
-    phraseList.add( wpi );
+    getPhraseList().add( wpi );
   }
   
   public static class WeightedPhraseInfo {
@@ -121,6 +126,8 @@ public class FieldPhraseList {
     private float boost;  // query boost
     private int seqnum;
     
+    private ArrayList<TermInfo> termsInfos;
+    
     /**
      * @return the text
      */
@@ -142,6 +149,13 @@ public class FieldPhraseList {
       return boost;
     }
 
+    /**
+     * @return the termInfos
+     */    
+    public List<TermInfo> getTermsInfos() {
+      return termsInfos;
+    }
+
     public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
       this( terms, boost, 0 );
     }
@@ -149,6 +163,10 @@ public class FieldPhraseList {
     public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum
){
       this.boost = boost;
       this.seqnum = seqnum;
+      
+      // now we keep TermInfos for further operations
+      termsInfos = new ArrayList<TermInfo>( terms );
+      
       termsOffsets = new ArrayList<Toffs>( terms.size() );
       TermInfo ti = terms.get( 0 );
       termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=1346439&r1=1346438&r2=1346439&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
Tue Jun  5 15:48:44 2012
@@ -94,6 +94,10 @@ public class FieldTermStack {
     final TermsEnum termsEnum = vector.iterator(null);
     DocsAndPositionsEnum dpEnum = null;
     BytesRef text;
+    
+    int numDocs = reader.numDocs() - reader.numDeletedDocs();
+    float weight = 0;
+    
     while ((text = termsEnum.next()) != null) {
       UnicodeUtil.UTF8toUTF16(text, spare);
       final String term = spare.toString();
@@ -112,7 +116,9 @@ public class FieldTermStack {
       
       for(int i = 0;i < freq;i++) {
         int pos = dpEnum.nextPosition();
-        termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos));
+        // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
+        weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( fieldName,
text ) + 1 ) ) + 1.0 );
+        termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos,
weight ) );
       }
     }
     
@@ -152,22 +158,27 @@ public class FieldTermStack {
   
   public static class TermInfo implements Comparable<TermInfo>{
 
-    final String text;
-    final int startOffset;
-    final int endOffset;
-    final int position;
+    private final String text;
+    private final int startOffset;
+    private final int endOffset;
+    private final int position;    
 
-    TermInfo( String text, int startOffset, int endOffset, int position ){
+    // IDF-weight of this term
+    private final float weight;
+
+    public TermInfo( String text, int startOffset, int endOffset, int position, float weight
){
       this.text = text;
       this.startOffset = startOffset;
       this.endOffset = endOffset;
       this.position = position;
+      this.weight = weight;
     }
     
     public String getText(){ return text; }
     public int getStartOffset(){ return startOffset; }
     public int getEndOffset(){ return endOffset; }
     public int getPosition(){ return position; }
+    public float getWeight(){ return weight; }
     
     @Override
     public String toString(){
@@ -176,7 +187,8 @@ public class FieldTermStack {
       return sb.toString();
     }
 
-    public int compareTo( TermInfo o ) {
+    @Override
+    public int compareTo( TermInfo o ){
       return ( this.position - o.position );
     }
   }

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1346439&r1=1346438&r2=1346439&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
Tue Jun  5 15:48:44 2012
@@ -810,13 +810,13 @@ public class FieldQueryTest extends Abst
     
     // "a"
     List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
-    phraseCandidate.add( new TermInfo( "a", 0, 1, 0 ) );
+    phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
     assertNull( fq.searchPhrase( F, phraseCandidate ) );
     // "a b"
-    phraseCandidate.add( new TermInfo( "b", 2, 3, 1 ) );
+    phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
     assertNull( fq.searchPhrase( F, phraseCandidate ) );
     // "a b c"
-    phraseCandidate.add( new TermInfo( "c", 4, 5, 2 ) );
+    phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
     assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
     assertNull( fq.searchPhrase( "x", phraseCandidate ) );
 
@@ -832,13 +832,13 @@ public class FieldQueryTest extends Abst
     
     // "a"
     phraseCandidate.clear();
-    phraseCandidate.add( new TermInfo( "a", 0, 1, 0 ) );
+    phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
     assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
     // "a b"
-    phraseCandidate.add( new TermInfo( "b", 2, 3, 1 ) );
+    phraseCandidate.add( new TermInfo( "b", 2, 3, 1, 1 ) );
     assertNull( fq.searchPhrase( F, phraseCandidate ) );
     // "a b c"
-    phraseCandidate.add( new TermInfo( "c", 4, 5, 2 ) );
+    phraseCandidate.add( new TermInfo( "c", 4, 5, 2, 1 ) );
     assertNotNull( fq.searchPhrase( F, phraseCandidate ) );
     assertNull( fq.searchPhrase( "x", phraseCandidate ) );
   }
@@ -852,9 +852,9 @@ public class FieldQueryTest extends Abst
     
     // "a b c" w/ position-gap = 2
     List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
-    phraseCandidate.add( new TermInfo( "a", 0, 1, 0 ) );
-    phraseCandidate.add( new TermInfo( "b", 2, 3, 2 ) );
-    phraseCandidate.add( new TermInfo( "c", 4, 5, 4 ) );
+    phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
+    phraseCandidate.add( new TermInfo( "b", 2, 3, 2, 1 ) );
+    phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
     assertNull( fq.searchPhrase( F, phraseCandidate ) );
 
     // "a b c"~1
@@ -868,9 +868,9 @@ public class FieldQueryTest extends Abst
     
     // "a b c" w/ position-gap = 3
     phraseCandidate.clear();
-    phraseCandidate.add( new TermInfo( "a", 0, 1, 0 ) );
-    phraseCandidate.add( new TermInfo( "b", 2, 3, 3 ) );
-    phraseCandidate.add( new TermInfo( "c", 4, 5, 6 ) );
+    phraseCandidate.add( new TermInfo( "a", 0, 1, 0, 1 ) );
+    phraseCandidate.add( new TermInfo( "b", 2, 3, 3, 1 ) );
+    phraseCandidate.add( new TermInfo( "c", 4, 5, 6, 1 ) );
     assertNull( fq.searchPhrase( F, phraseCandidate ) );
   }
   
@@ -901,7 +901,7 @@ public class FieldQueryTest extends Abst
     assertNotNull (qpm);
     assertNull (fq.getFieldTermMap(F, "dog"));
     List<TermInfo> phraseCandidate = new ArrayList<TermInfo>();
-    phraseCandidate.add( new TermInfo( "defg", 0, 12, 0 ) );
+    phraseCandidate.add( new TermInfo( "defg", 0, 12, 0, 1 ) );
     assertNotNull (fq.searchPhrase(F, phraseCandidate));
   }
   



Mime
View raw message