lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r895640 - in /lucene/solr/trunk/contrib/clustering: ./ src/main/java/org/apache/solr/handler/clustering/carrot2/ src/test/java/org/apache/solr/handler/clustering/carrot2/
Date Mon, 04 Jan 2010 14:29:00 GMT
Author: gsingers
Date: Mon Jan  4 14:28:59 2010
New Revision: 895640

URL: http://svn.apache.org/viewvc?rev=895640&view=rev
Log:
SOLR-1692: fix produceSummary issue with Carrot2 clustering

Modified:
    lucene/solr/trunk/contrib/clustering/CHANGES.txt
    lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java

Modified: lucene/solr/trunk/contrib/clustering/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/CHANGES.txt?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/clustering/CHANGES.txt Mon Jan  4 14:28:59 2010
@@ -12,6 +12,8 @@
 
 * SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set<String>) method b/c it
can use the document cache (gsingers)
 
+* SOLR-1692: Fix bug relating to carrot.produceSummary option (gsingers)
+
 ================== Release 1.4.0 ==================
 
 Solr Clustering will be released for the first time in Solr 1.4.  See http://wiki.apache.org/solr/ClusteringComponent

Modified: lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
(original)
+++ lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
Mon Jan  4 14:28:59 2010
@@ -113,7 +113,7 @@
    */
   private List<Document> getDocuments(DocList docList,
                                       Query query, final SolrQueryRequest sreq) throws IOException
{
-    SolrHighlighter highligher = null;
+    SolrHighlighter highlighter = null;
     SolrParams solrParams = sreq.getParams();
     SolrCore core = sreq.getCore();
 
@@ -137,17 +137,25 @@
     SolrQueryRequest req = null;
     String[] snippetFieldAry = null;
     if (produceSummary == true) {
-      highligher = core.getHighlighter();
-      Map args = new HashMap();
-      snippetFieldAry = new String[]{snippetField};
-      args.put(HighlightParams.FIELDS, snippetFieldAry);
-      args.put(HighlightParams.HIGHLIGHT, "true");
-      req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
-        @Override
-        public SolrIndexSearcher getSearcher() {
-          return sreq.getSearcher();
-        }
-      };
+      highlighter = core.getHighlighter();
+      if (highlighter != null){
+        Map args = new HashMap();
+        snippetFieldAry = new String[]{snippetField};
+        args.put(HighlightParams.FIELDS, snippetFieldAry);
+        args.put(HighlightParams.HIGHLIGHT, "true");
+        args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about actually highlighting
the area
+        args.put(HighlightParams.SIMPLE_POST, "");
+        args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE,
solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
+        req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
+          @Override
+          public SolrIndexSearcher getSearcher() {
+            return sreq.getSearcher();
+          }
+        };
+      } else {
+        log.warn("No highlighter configured, cannot produce summary");
+        produceSummary = false;
+      }
     }
 
     SolrIndexSearcher searcher = sreq.getSearcher();
@@ -165,11 +173,19 @@
       if (produceSummary == true) {
         docsHolder[0] = id.intValue();
         DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
-        highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+        NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+        if (highlights != null && highlights.size() == 1) {//should only be one value
given our setup
+          //should only be one document with one field
+          NamedList tmp = (NamedList) highlights.getVal(0);
+          String [] highlt = (String[]) tmp.get(snippetField);
+          if (highlt != null && highlt.length == 1) {
+            snippet = highlt[0];
+          }
+        }
       }
       Document carrotDocument = new Document(getValue(doc, titleField),
               snippet, doc.get(urlField));
-      carrotDocument.addField("solrId", doc.get(idFieldName));
+      carrotDocument.setField("solrId", doc.get(idFieldName));
       result.add(carrotDocument);
     }
 

Modified: lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
(original)
+++ lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
Mon Jan  4 14:28:59 2010
@@ -3,6 +3,7 @@
 import java.util.Set;
 
 import com.google.common.collect.ImmutableSet;
+import org.apache.solr.common.params.HighlightParams;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -33,8 +34,9 @@
   String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
   String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
   String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
+  String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragzise";
 
   public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
           ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
-          PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
+          PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS, SUMMARY_FRAGSIZE);
 }

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=895640&r1=895639&r2=895640&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
(original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Mon Jan  4 14:28:59 2010
@@ -17,9 +17,11 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -43,6 +45,13 @@
     checkEngine(getClusteringEngine("default"), 10);
   }
 
+  public void testProduceSummary() throws Exception {
+    ModifiableSolrParams solrParams = new ModifiableSolrParams();
+    solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
+    solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we validate this?
+    checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two don't have mining in
the snippet*/, 16, new TermQuery(new Term("snippet", "mine")), solrParams);
+  }
+
   public void testCarrotStc() throws Exception {
     checkEngine(getClusteringEngine("stc"), 1);
   }
@@ -55,8 +64,7 @@
   public void testWithSubclusters() throws Exception {
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
-    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
-            params), 1, 1, 2);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs), 1, 1, 2);
   }
 
   public void testNumDescriptions() throws Exception {
@@ -87,21 +95,27 @@
   }
 
   private List checkEngine(CarrotClusteringEngine engine,
-                           int expectedNumClusters) throws IOException {
-    return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams());
+                            int expectedNumClusters) throws IOException {
+    return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(),
new ModifiableSolrParams());
   }
 
   private List checkEngine(CarrotClusteringEngine engine,
-                           int expectedNumClusters, SolrParams clusteringParams) throws IOException
{
+                            int expectedNumClusters, SolrParams clusteringParams) throws
IOException {
+    return checkEngine(engine, numberOfDocs, expectedNumClusters, new MatchAllDocsQuery(),
clusteringParams);
+  }
+
+
+  private List checkEngine(CarrotClusteringEngine engine, int expectedNumDocs,
+                           int expectedNumClusters, Query query, SolrParams clusteringParams)
throws IOException {
     // Get all documents to cluster
     RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
-    MatchAllDocsQuery query = new MatchAllDocsQuery();
+
     DocList docList;
     try {
       SolrIndexSearcher searcher = ref.get();
       docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
               numberOfDocs);
-      assertEquals("docList size", this.numberOfDocs, docList.matches());
+      assertEquals("docList size", expectedNumDocs, docList.matches());
     } finally {
       ref.decref();
     }
@@ -114,7 +128,7 @@
     LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
     List results = (List) engine.cluster(query, docList, req);
     req.close();
-    assertEquals("number of clusters", expectedNumClusters, results.size());
+    assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
     checkClusters(results, false);
     return results;
   }



Mime
View raw message