lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Aviran" <amo...@infosciences.com>
Subject RE: Problem with Sort logic ?
Date Wed, 28 Jul 2004 17:49:41 GMT
I've made the modification to FieldCacheImpl to use the actual stored value
in case the field is tokenized and stored.
For keyword fields the behavior stayed the same.

Index: FieldCacheImpl.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/FieldCacheI
mpl.java,v
retrieving revision 1.3
diff -u -r1.3 FieldCacheImpl.java
--- FieldCacheImpl.java	21 Jul 2004 19:05:46 -0000	1.3
+++ FieldCacheImpl.java	28 Jul 2004 17:45:41 -0000
@@ -25,6 +25,8 @@
 import java.util.Map;
 import java.util.WeakHashMap;
 import java.util.HashMap;
+import org.apache.lucene.document.Field;
+import java.util.Arrays;
 
 /**
  * Expert: The default cache implementation, storing all values in memory.
@@ -80,6 +82,29 @@
     }
   }
 
+  class FieldEntry implements Comparable {
+    String val;
+    int ind;
+    FieldEntry(int ind, String val)
+    {
+        this.ind = ind;
+        this.val = val;
+    }
+    public String getVal()
+    {
+        return val;
+    }
+    public int getInd()
+    {
+        return ind;
+    }
+    public int compareTo(Object obj)
+    {
+        return val.compareToIgnoreCase(((FieldEntry)obj).getVal());
+    }
+}
+
+
 
   /** The internal cache. Maps Entry to array of interpreted term values.
**/
   final Map cache = new WeakHashMap();
@@ -240,54 +265,92 @@
     if (ret == null) {
       final int[] retArray = new int[reader.maxDoc()];
       String[] mterms = new String[reader.maxDoc()+1];
-      if (retArray.length > 0) {
-        TermDocs termDocs = reader.termDocs();
-        TermEnum termEnum = reader.terms (new Term (field, ""));
-        int t = 0;  // current term number
-
-        // an entry for documents that have no terms in this field
-        // should a document with no terms be at top or bottom?
-        // this puts them at the top - if it is changed,
FieldDocSortedHitQueue
-        // needs to change as well.
-        mterms[t++] = null;
 
-        try {
-          if (termEnum.term() == null) {
-            throw new RuntimeException ("no terms in field " + field);
-          }
-          do {
-            Term term = termEnum.term();
-            if (term.field() != field) break;
-
-            // store term text
-            // we expect that there is at most one term per document
-            if (t >= mterms.length) throw new RuntimeException ("there are
more terms than documents in field \"" + field + "\"");
-            mterms[t] = term.text();
-
-            termDocs.seek (termEnum);
-            while (termDocs.next()) {
-              retArray[termDocs.doc()] = t;
-            }
-
-            t++;
-          } while (termEnum.next());
-        } finally {
-          termDocs.close();
-          termEnum.close();
+      Field docField = reader.document(0).getField(field);
+      if (docField.isStored() && docField.isTokenized()) {
+          // Fill entries
+        FieldEntry[] entries = new FieldEntry[reader.maxDoc()];
+        for (int i=0; i<reader.maxDoc(); i++) {
+          String fieldValue;
+          if (!reader.isDeleted(i))
+            fieldValue = reader.document(i).get(field);
+          else
+            fieldValue = "";
+          entries[i] = new FieldEntry (i,fieldValue);
         }
 
-        if (t == 0) {
-          // if there are no terms, make the term array
-          // have a single null entry
-          mterms = new String[1];
-        } else if (t < mterms.length) {
-          // if there are less terms than documents,
-          // trim off the dead array space
-          String[] terms = new String[t];
-          System.arraycopy (mterms, 0, terms, 0, t);
-          mterms = terms;
+        Arrays.sort(entries);
+        for (int i=0;i<reader.maxDoc();i++)
+        {
+          int ind = entries[i].getInd();
+          retArray[ind] = i;
+          mterms[ind]=entries[i].getVal();
         }
       }
+      else
+      {
+          if (retArray.length > 0)
+          {
+              TermDocs termDocs = reader.termDocs();
+              TermEnum termEnum = reader.terms(new Term(field, ""));
+              int t = 0; // current term number
+
+              // an entry for documents that have no terms in this field
+              // should a document with no terms be at top or bottom?
+              // this puts them at the top - if it is changed,
FieldDocSortedHitQueue
+              // needs to change as well.
+              mterms[t++] = null;
+
+              try
+              {
+                  if (termEnum.term() == null)
+                  {
+                      throw new RuntimeException("no terms in field " +
field);
+                  }
+                  do
+                  {
+                      Term term = termEnum.term();
+                      if (term.field() != field)
+                          break;
+
+                      // store term text
+                      // we expect that there is at most one term per
document
+                      if (t >= mterms.length)
+                          throw new RuntimeException("there are more terms
than documents in field \"" + field +
+                                                     "\"");
+                      mterms[t] = term.text();
+                      termDocs.seek(termEnum);
+                      while (termDocs.next())
+                      {
+                          retArray[termDocs.doc()] = t;
+                      }
+
+                      t++;
+                  }
+                  while (termEnum.next());
+              }
+              finally
+              {
+                  termDocs.close();
+                  termEnum.close();
+              }
+
+              if (t == 0)
+              {
+                  // if there are no terms, make the term array
+                  // have a single null entry
+                  mterms = new String[1];
+              }
+              else if (t < mterms.length)
+              {
+                  // if there are less terms than documents,
+                  // trim off the dead array space
+                  String[] terms = new String[t];
+                  System.arraycopy(mterms, 0, terms, 0, t);
+                  mterms = terms;
+              }
+          }
+      }
       StringIndex value = new StringIndex (retArray, mterms);
       store (reader, field, STRING_INDEX, value);
       return value;
@@ -309,7 +372,7 @@
   // inherit javadocs
   public Object getAuto (IndexReader reader, String field)
   throws IOException {
-    field = field.intern();
+  field = field.intern();
     Object ret = lookup (reader, field, SortField.AUTO);
     if (ret == null) {
       TermEnum enumerator = reader.terms (new Term (field, ""));


-----Original Message-----
From: Doug Cutting [mailto:cutting@apache.org] 
Sent: Monday, July 26, 2004 14:10 PM
To: Lucene Developers List
Subject: Re: Problem with Sort logic ?


Aviran wrote:
> Do you think that another FieldCache implementation will be 
> beneficiary for those who want to sort on any field other than 
> keyword. I bet that a lot of  developers will want to have the ability 
> to sort on ANY field, without having to duplicate all the fields as 
> keywords.

Yes, it would probably be useful to have the option to cache values of 
unindexed fields, or stored and tokenized fields.

Doug

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org




---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message