lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Dawid Weiss <dawid.we...@gmail.com>
Subject Re: svn commit: r1153399 - in /lucene/dev/trunk/solr/contrib/clustering/src: java/org/apache/solr/handler/clustering/carrot2/ test-files/clustering/solr/conf/ test/org/apache/solr/handler/clustering/carrot2/
Date Wed, 03 Aug 2011 09:11:52 GMT
Staszek, add an entry to CHANGES as well?
Dawid

On Wed, Aug 3, 2011 at 11:08 AM, <stanislaw@apache.org> wrote:

> Author: stanislaw
> Date: Wed Aug  3 09:08:39 2011
> New Revision: 1153399
>
> URL: http://svn.apache.org/viewvc?rev=1153399&view=rev
> Log:
> SOLR-1692: CarrotClusteringEngine produce summary does nothing: improved
> unit tests
>
> Added:
>
>  lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
> Modified:
>
>  lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
>
>  lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml
>
>  lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
>
> Modified:
> lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1153399&r1=1153398&r2=1153399&view=diff
>
> ==============================================================================
> ---
> lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
> (original)
> +++
> lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
> Wed Aug  3 09:08:39 2011
> @@ -264,7 +264,7 @@ public class CarrotClusteringEngine exte
>
>     SolrQueryRequest req = null;
>     String[] snippetFieldAry = null;
> -    if (produceSummary == true) {
> +    if (produceSummary) {
>       highlighter = HighlightComponent.getHighlighter(core);
>       if (highlighter != null){
>         Map<String, Object> args = Maps.newHashMap();
>
> Modified:
> lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml?rev=1153399&r1=1153398&r2=1153399&view=diff
>
> ==============================================================================
> ---
> lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml
> (original)
> +++
> lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/conf/solrconfig.xml
> Wed Aug  3 09:08:39 2011
> @@ -397,6 +397,10 @@
>       <str
> name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
>     </lst>
>     <lst name="engine">
> +      <str name="name">echo</str>
> +      <str
> name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str>
> +    </lst>
> +    <lst name="engine">
>       <str name="name">lexical-resource-check</str>
>       <str
> name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
>     </lst>
>
> Modified:
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1153399&r1=1153398&r2=1153399&view=diff
>
> ==============================================================================
> ---
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
> (original)
> +++
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
> Wed Aug  3 09:08:39 2011
> @@ -58,14 +58,52 @@ public class CarrotClusteringEngineTest
>
>   @Test
>   public void testProduceSummary() throws Exception {
> -    ModifiableSolrParams solrParams = new ModifiableSolrParams();
> -    solrParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
> -    solrParams.add(CarrotParams.SUMMARY_FRAGSIZE, "200");//how do we
> validate this?
> +    // We'll make two queries, one with- and another one without summary
> +    // and assert that documents are shorter when highlighter is in use.
> +    final List<NamedList<Object>> noSummaryClusters =
> clusterWithHighlighting(false, 80);
> +    final List<NamedList<Object>> summaryClusters =
> clusterWithHighlighting(true, 80);
> +
> +    assertEquals("Equal number of clusters", noSummaryClusters.size(),
> summaryClusters.size());
> +    for (int i = 0; i < noSummaryClusters.size(); i++) {
> +      assertTrue("Summary shorter than original document",
> +          getLabels(noSummaryClusters.get(i)).get(1).length() >
> +          getLabels(summaryClusters.get(i)).get(1).length());
> +    }
> +  }
> +
> +  @Test
> +  public void testSummaryFragSize() throws Exception {
> +    // We'll make two queries, one short summaries and another one with
> longer
> +    // summaries and will check that the results differ.
> +    final List<NamedList<Object>> shortSummaryClusters =
> clusterWithHighlighting(true, 30);
> +    final List<NamedList<Object>> longSummaryClusters =
> clusterWithHighlighting(true, 80);
> +
> +    assertEquals("Equal number of clusters", shortSummaryClusters.size(),
> longSummaryClusters.size());
> +    for (int i = 0; i < shortSummaryClusters.size(); i++) {
> +      assertTrue("Summary shorter than original document",
> +          getLabels(shortSummaryClusters.get(i)).get(1).length() <
> +      getLabels(longSummaryClusters.get(i)).get(1).length());
> +    }
> +  }
> +
> +  private List<NamedList<Object>> clusterWithHighlighting(
> +      boolean enableHighlighting, int fragSize) throws IOException {
> +
> +    final TermQuery query = new TermQuery(new Term("snippet", "mine"));
> +    // Two documents don't have mining in the snippet
> +    int expectedNumDocuments = numberOfDocs - 2;
> +
> +    final ModifiableSolrParams summaryParams = new ModifiableSolrParams();
> +    summaryParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
> +    summaryParams.add(CarrotParams.PRODUCE_SUMMARY,
> +        Boolean.toString(enableHighlighting));
> +    summaryParams
> +        .add(CarrotParams.SUMMARY_FRAGSIZE, Integer.toString(fragSize));
> +    final List<NamedList<Object>> summaryClusters = checkEngine(
> +        getClusteringEngine("echo"), expectedNumDocuments,
> +        expectedNumDocuments, query, summaryParams);
>
> -       // Note: the expected number of clusters may change after upgrading
> Carrot2
> -       // due to e.g. internal improvements or tuning of Carrot2
> clustering.
> -    final int expectedNumClusters = 15;
> -    checkEngine(getClusteringEngine("default"), numberOfDocs -2 /*two
> don't have mining in the snippet*/, expectedNumClusters, new TermQuery(new
> Term("snippet", "mine")), solrParams);
> +    return summaryClusters;
>   }
>
>   @Test
> @@ -227,7 +265,6 @@ public class CarrotClusteringEngineTest
>       assertEquals("docList size", expectedNumDocs, docList.matches());
>
>       ModifiableSolrParams solrParams = new ModifiableSolrParams();
> -      solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
>       solrParams.add(clusteringParams);
>
>       // Perform clustering
>
> Added:
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java?rev=1153399&view=auto
>
> ==============================================================================
> ---
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
> (added)
> +++
> lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoClusteringAlgorithm.java
> Wed Aug  3 09:08:39 2011
> @@ -0,0 +1,62 @@
> +package org.apache.solr.handler.clustering.carrot2;
> +/**
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License.  You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +import java.util.List;
> +
> +import org.carrot2.core.Cluster;
> +import org.carrot2.core.Document;
> +import org.carrot2.core.IClusteringAlgorithm;
> +import org.carrot2.core.ProcessingComponentBase;
> +import org.carrot2.core.ProcessingException;
> +import org.carrot2.core.attribute.AttributeNames;
> +import org.carrot2.core.attribute.Processing;
> +import org.carrot2.util.attribute.Attribute;
> +import org.carrot2.util.attribute.Bindable;
> +import org.carrot2.util.attribute.Input;
> +import org.carrot2.util.attribute.Output;
> +
> +import com.google.common.collect.Lists;
> +
> +/**
> + * A mock Carrot2 clustering algorithm that outputs input documents as
> clusters.
> + * Useful only in tests.
> + */
> +@Bindable(prefix = "EchoClusteringAlgorithm")
> +public class EchoClusteringAlgorithm extends ProcessingComponentBase
> implements
> +        IClusteringAlgorithm {
> +  @Input
> +  @Processing
> +  @Attribute(key = AttributeNames.DOCUMENTS)
> +  private List<Document> documents;
> +
> +  @Output
> +  @Processing
> +  @Attribute(key = AttributeNames.CLUSTERS)
> +  private List<Cluster> clusters;
> +
> +  @Override
> +  public void process() throws ProcessingException {
> +    clusters = Lists.newArrayListWithCapacity(documents.size());
> +
> +    for (Document document : documents) {
> +      final Cluster cluster = new Cluster();
> +      cluster.addPhrases(document.getTitle(), document.getSummary());
> +      cluster.addDocuments(document);
> +      clusters.add(cluster);
> +    }
> +  }
> +}
>
>
>

Mime
View raw message