lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bugzi...@apache.org
Subject DO NOT REPLY [Bug 30382] - Problem with Sort logic
Date Thu, 29 Jul 2004 13:55:51 GMT
DO NOT REPLY TO THIS EMAIL, BUT PLEASE POST YOUR BUG 
RELATED COMMENTS THROUGH THE WEB INTERFACE AVAILABLE AT
<http://issues.apache.org/bugzilla/show_bug.cgi?id=30382>.
ANY REPLY MADE TO THIS MESSAGE WILL NOT BE COLLECTED AND 
INSERTED IN THE BUG DATABASE.

http://issues.apache.org/bugzilla/show_bug.cgi?id=30382

Problem with Sort logic





------- Additional Comments From amordo@infosciences.com  2004-07-29 13:55 -------
I wrote a fix that uses the stored values in case the sort field is tokenized
and stored, or uses the Terms in case the sort field is a Keyword.

Index: FieldCacheImpl.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java,v
retrieving revision 1.3
diff -u -r1.3 FieldCacheImpl.java
--- FieldCacheImpl.java	21 Jul 2004 19:05:46 -0000	1.3
+++ FieldCacheImpl.java	28 Jul 2004 17:45:41 -0000
@@ -25,6 +25,8 @@
 import java.util.Map;
 import java.util.WeakHashMap;
 import java.util.HashMap;
+import org.apache.lucene.document.Field;
+import java.util.Arrays;
 
 /**
  * Expert: The default cache implementation, storing all values in memory. @@
-80,6 +82,29 @@
     }
   }
 
+  class FieldEntry implements Comparable {
+    String val;
+    int ind;
+    FieldEntry(int ind, String val)
+    {
+        this.ind = ind;
+        this.val = val;
+    }
+    public String getVal()
+    {
+        return val;
+    }
+    public int getInd()
+    {
+        return ind;
+    }
+    public int compareTo(Object obj)
+    {
+        return val.compareToIgnoreCase(((FieldEntry)obj).getVal());
+    }
+}
+
+
 
   /** The internal cache. Maps Entry to array of interpreted term values. **/
   final Map cache = new WeakHashMap();
@@ -240,54 +265,92 @@
     if (ret == null) {
       final int[] retArray = new int[reader.maxDoc()];
       String[] mterms = new String[reader.maxDoc()+1];
-      if (retArray.length > 0) {
-        TermDocs termDocs = reader.termDocs();
-        TermEnum termEnum = reader.terms (new Term (field, ""));
-        int t = 0;  // current term number
-
-        // an entry for documents that have no terms in this field
-        // should a document with no terms be at top or bottom?
-        // this puts them at the top - if it is changed, FieldDocSortedHitQueue
-        // needs to change as well.
-        mterms[t++] = null;
 
-        try {
-          if (termEnum.term() == null) {
-            throw new RuntimeException ("no terms in field " + field);
-          }
-          do {
-            Term term = termEnum.term();
-            if (term.field() != field) break;
-
-            // store term text
-            // we expect that there is at most one term per document
-            if (t >= mterms.length) throw new RuntimeException ("there are more
terms than documents in field \"" + field + "\"");
-            mterms[t] = term.text();
-
-            termDocs.seek (termEnum);
-            while (termDocs.next()) {
-              retArray[termDocs.doc()] = t;
-            }
-
-            t++;
-          } while (termEnum.next());
-        } finally {
-          termDocs.close();
-          termEnum.close();
+      Field docField = reader.document(0).getField(field);
+      if (docField.isStored() && docField.isTokenized()) {
+          // Fill entries
+        FieldEntry[] entries = new FieldEntry[reader.maxDoc()];
+        for (int i=0; i<reader.maxDoc(); i++) {
+          String fieldValue;
+          if (!reader.isDeleted(i))
+            fieldValue = reader.document(i).get(field);
+          else
+            fieldValue = "";
+          entries[i] = new FieldEntry (i,fieldValue);
         }
 
-        if (t == 0) {
-          // if there are no terms, make the term array
-          // have a single null entry
-          mterms = new String[1];
-        } else if (t < mterms.length) {
-          // if there are less terms than documents,
-          // trim off the dead array space
-          String[] terms = new String[t];
-          System.arraycopy (mterms, 0, terms, 0, t);
-          mterms = terms;
+        Arrays.sort(entries);
+        for (int i=0;i<reader.maxDoc();i++)
+        {
+          int ind = entries[i].getInd();
+          retArray[ind] = i;
+          mterms[ind]=entries[i].getVal();
         }
       }
+      else
+      {
+          if (retArray.length > 0)
+          {
+              TermDocs termDocs = reader.termDocs();
+              TermEnum termEnum = reader.terms(new Term(field, ""));
+              int t = 0; // current term number
+
+              // an entry for documents that have no terms in this field
+              // should a document with no terms be at top or bottom?
+              // this puts them at the top - if it is changed,
FieldDocSortedHitQueue
+              // needs to change as well.
+              mterms[t++] = null;
+
+              try
+              {
+                  if (termEnum.term() == null)
+                  {
+                      throw new RuntimeException("no terms in field " + field);
+                  }
+                  do
+                  {
+                      Term term = termEnum.term();
+                      if (term.field() != field)
+                          break;
+
+                      // store term text
+                      // we expect that there is at most one term per document
+                      if (t >= mterms.length)
+                          throw new RuntimeException("there are more terms than
documents in field \"" + field +
+                                                     "\"");
+                      mterms[t] = term.text();
+                      termDocs.seek(termEnum);
+                      while (termDocs.next())
+                      {
+                          retArray[termDocs.doc()] = t;
+                      }
+
+                      t++;
+                  }
+                  while (termEnum.next());
+              }
+              finally
+              {
+                  termDocs.close();
+                  termEnum.close();
+              }
+
+              if (t == 0)
+              {
+                  // if there are no terms, make the term array
+                  // have a single null entry
+                  mterms = new String[1];
+              }
+              else if (t < mterms.length)
+              {
+                  // if there are less terms than documents,
+                  // trim off the dead array space
+                  String[] terms = new String[t];
+                  System.arraycopy(mterms, 0, terms, 0, t);
+                  mterms = terms;
+              }
+          }
+      }
       StringIndex value = new StringIndex (retArray, mterms);
       store (reader, field, STRING_INDEX, value);
       return value;
@@ -309,7 +372,7 @@
   // inherit javadocs
   public Object getAuto (IndexReader reader, String field)
   throws IOException {
-    field = field.intern();
+  field = field.intern();
     Object ret = lookup (reader, field, SortField.AUTO);
     if (ret == null) {
       TermEnum enumerator = reader.terms (new Term (field, ""));

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message