lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r790599 - in /lucene/solr/trunk/contrib/clustering/src: main/java/org/apache/solr/handler/clustering/carrot2/ test/java/org/apache/solr/handler/clustering/ test/java/org/apache/solr/handler/clustering/carrot2/
Date Thu, 02 Jul 2009 14:08:37 GMT
Author: yonik
Date: Thu Jul  2 14:08:37 2009
New Revision: 790599

URL: http://svn.apache.org/viewvc?rev=790599&view=rev
Log:
reformat to remove tabs

Modified:
    lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
    lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java

Modified: lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Thu Jul  2 14:08:37 2009
@@ -45,204 +45,206 @@
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
- *
+ * <p/>
  * Output from this class is subject to change.
- * 
+ *
  * @link http://project.carrot2.org
  */
 @SuppressWarnings("unchecked")
 public class CarrotClusteringEngine extends SearchClusteringEngine {
-	private transient static Logger log = LoggerFactory
-			.getLogger(CarrotClusteringEngine.class);
+  private transient static Logger log = LoggerFactory
+          .getLogger(CarrotClusteringEngine.class);
 
-	/** Carrot2 controller that manages instances of clustering algorithms */
-	private CachingController controller = new CachingController();
-	private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
-	
-	private String idFieldName;
-
-	public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
-		try {
-			// Prepare attributes for Carrot2 clustering call
-			Map<String, Object> attributes = new HashMap<String, Object>();
-			List<Document> documents = getDocuments(docList, query, sreq);
-			attributes.put(AttributeNames.DOCUMENTS, documents);
-			attributes.put(AttributeNames.QUERY, query.toString());
-			
-			// Pass extra overriding attributes from the request, if any
-			extractCarrotAttributes(sreq.getParams(), attributes);
-
-			// Perform clustering and convert to named list
-			return clustersToNamedList(controller.process(attributes,
-					clusteringAlgorithmClass).getClusters(), sreq.getParams());
-		} catch (Exception e) {
-			log.error("Carrot2 clustering failed", e);
-			throw new RuntimeException(e);
-		}
-	}
-
-	@Override
-	public String init(NamedList config, final SolrCore core) {
-		String result = super.init(config, core);
-		SolrParams initParams = SolrParams.toSolrParams(config);
-		
-		// Initialize Carrot2 controller. Pass initialization attributes, if any. 
-		HashMap<String, Object> initAttributes = new HashMap<String, Object>();
-		extractCarrotAttributes(initParams, initAttributes);
-		this.controller.init(initAttributes);
-		
-		this.idFieldName = core.getSchema().getUniqueKeyField().getName();
-
-		// Make sure the requested Carrot2 clustering algorithm class is available 
-		String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
-		try {
-			Class<?> algorithmClass = Thread.currentThread().getContextClassLoader()
-					.loadClass(carrotAlgorithmClassName);
-			if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
-				throw new IllegalArgumentException("Class provided as "
-						+ CarrotParams.ALGORITHM + " must implement "
-						+ IClusteringAlgorithm.class.getName());
-			}
-			this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>) algorithmClass;
-		} catch (ClassNotFoundException e) {
-			throw new RuntimeException(
-					"Failed to load Carrot clustering algorithm class", e);
-		}
-
-		return result;
-	}
-
-	/**
-	 * Prepares Carrot2 documents for clustering.
-	 */
-	private List<Document> getDocuments(DocList docList,
-			Query query, final SolrQueryRequest sreq) throws IOException {
-		SolrHighlighter highligher = null;
-                SolrParams solrParams = sreq.getParams();
-                SolrCore core = sreq.getCore();
-
-		// Names of fields to deliver content for clustering
-		String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
-		String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
-		String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
-				titleField);
-		if (StringUtils.isBlank(snippetField)) {
-			throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
-					+ " must not be blank.");
-		}
-		Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
-				snippetField, idFieldName);
-
-		// Get the documents
-		DocIterator docsIter = docList.iterator();
-		boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
-				false);
-
-		SolrQueryRequest req = null;
-		String[] snippetFieldAry = null;
-		if (produceSummary == true) {
-			highligher = core.getHighlighter();
-			Map args = new HashMap();
-			snippetFieldAry = new String[] { snippetField };
-			args.put(HighlightParams.FIELDS, snippetFieldAry);
-			args.put(HighlightParams.HIGHLIGHT, "true");
-			req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
-                          @Override
-                          public SolrIndexSearcher getSearcher() {
-                            return sreq.getSearcher();
-                          }
-                        };
-		}
-
-		SolrIndexSearcher searcher = sreq.getSearcher();
-		List<Document> result = new ArrayList<Document>(docList.size());
-			FieldSelector fieldSelector = new SetBasedFieldSelector(fieldsToLoad,
-					Collections.emptySet());
-			float[] scores = { 1.0f };
-			int[] docsHolder = new int[1];
-			Query theQuery = query;
-
-			while (docsIter.hasNext()) {
-				Integer id = docsIter.next();
-				org.apache.lucene.document.Document doc = searcher.doc(id,
-						fieldSelector);
-				String snippet = getValue(doc, snippetField);
-				if (produceSummary == true) {
-					docsHolder[0] = id.intValue();
-					DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
-					highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
-				}
-				Document carrotDocument = new Document(getValue(doc, titleField),
-						snippet, doc.get(urlField));
-				carrotDocument.addField("solrId", doc.get(idFieldName));
-				result.add(carrotDocument);
-			}
-
-		return result;
-	}
-
-	protected String getValue(org.apache.lucene.document.Document doc,
-			String field) {
-		StringBuilder result = new StringBuilder();
-		String[] vals = doc.getValues(field);
-		for (int i = 0; i < vals.length; i++) {
-			// Join multiple values with a period so that Carrot2 does not pick up
-			// phrases that cross field value boundaries (in most cases it would
-			// create useless phrases).
-			result.append(vals[i]).append(" . ");
-		}
-		return result.toString().trim();
-	}
-
-	private List clustersToNamedList(List<Cluster> carrotClusters,
-			SolrParams solrParams) {
-          List result = new ArrayList();
-		clustersToNamedList(carrotClusters, result, solrParams.getBool(
-				CarrotParams.OUTPUT_SUB_CLUSTERS, false), solrParams.getInt(
-				CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
-		return result;
-	}
-
-	private void clustersToNamedList(List<Cluster> outputClusters,
-			List parent, boolean outputSubClusters, int maxLabels) {
-		for (Cluster outCluster : outputClusters) {
-			NamedList cluster = new SimpleOrderedMap();
-			parent.add(cluster);
-
-			List<String> labels = outCluster.getPhrases();
-                  if (labels.size() > maxLabels)
-                    labels = labels.subList(0,maxLabels);
-			cluster.add("labels", labels);
-
-			List<Document> docs = outCluster.getDocuments();
-			List docList = new ArrayList();
-			cluster.add("docs", docList);
-			for (Document doc : docs) {
-				docList.add(doc.getField("solrId"));
-			}
-
-			if (outputSubClusters) {
-				List subclusters = new ArrayList();
-				cluster.add("clusters",subclusters);
-				clustersToNamedList(outCluster.getSubclusters(), subclusters,
-						outputSubClusters, maxLabels);
-			}
-		}
-	}
-
-	/**
-	 * Extracts parameters that can possibly match some attributes of Carrot2 algorithms.
-	 */
-	private void extractCarrotAttributes(SolrParams solrParams,
-			Map<String, Object> attributes) {
-		// Extract all non-predefined parameters. This way, we'll be able to set all 
-		// parameters of Carrot2 algorithms without defining their names as constants.
-		for (Iterator<String> paramNames = solrParams.getParameterNamesIterator(); paramNames
-				.hasNext();) {
-			String paramName = paramNames.next();
-			if (!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
-				attributes.put(paramName, solrParams.get(paramName));
-			}
-		}
-	}
+  /**
+   * Carrot2 controller that manages instances of clustering algorithms
+   */
+  private CachingController controller = new CachingController();
+  private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
+
+  private String idFieldName;
+
+  public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
+    try {
+      // Prepare attributes for Carrot2 clustering call
+      Map<String, Object> attributes = new HashMap<String, Object>();
+      List<Document> documents = getDocuments(docList, query, sreq);
+      attributes.put(AttributeNames.DOCUMENTS, documents);
+      attributes.put(AttributeNames.QUERY, query.toString());
+
+      // Pass extra overriding attributes from the request, if any
+      extractCarrotAttributes(sreq.getParams(), attributes);
+
+      // Perform clustering and convert to named list
+      return clustersToNamedList(controller.process(attributes,
+              clusteringAlgorithmClass).getClusters(), sreq.getParams());
+    } catch (Exception e) {
+      log.error("Carrot2 clustering failed", e);
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public String init(NamedList config, final SolrCore core) {
+    String result = super.init(config, core);
+    SolrParams initParams = SolrParams.toSolrParams(config);
+
+    // Initialize Carrot2 controller. Pass initialization attributes, if any.
+    HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+    extractCarrotAttributes(initParams, initAttributes);
+    this.controller.init(initAttributes);
+
+    this.idFieldName = core.getSchema().getUniqueKeyField().getName();
+
+    // Make sure the requested Carrot2 clustering algorithm class is available
+    String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
+    try {
+      Class<?> algorithmClass = Thread.currentThread().getContextClassLoader()
+              .loadClass(carrotAlgorithmClassName);
+      if (!IClusteringAlgorithm.class.isAssignableFrom(algorithmClass)) {
+        throw new IllegalArgumentException("Class provided as "
+                + CarrotParams.ALGORITHM + " must implement "
+                + IClusteringAlgorithm.class.getName());
+      }
+      this.clusteringAlgorithmClass = (Class<? extends IClusteringAlgorithm>) algorithmClass;
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(
+              "Failed to load Carrot clustering algorithm class", e);
+    }
+
+    return result;
+  }
+
+  /**
+   * Prepares Carrot2 documents for clustering.
+   */
+  private List<Document> getDocuments(DocList docList,
+                                      Query query, final SolrQueryRequest sreq) throws IOException {
+    SolrHighlighter highligher = null;
+    SolrParams solrParams = sreq.getParams();
+    SolrCore core = sreq.getCore();
+
+    // Names of fields to deliver content for clustering
+    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
+    String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
+    String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
+            titleField);
+    if (StringUtils.isBlank(snippetField)) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
+              + " must not be blank.");
+    }
+    Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
+            snippetField, idFieldName);
+
+    // Get the documents
+    DocIterator docsIter = docList.iterator();
+    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
+            false);
+
+    SolrQueryRequest req = null;
+    String[] snippetFieldAry = null;
+    if (produceSummary == true) {
+      highligher = core.getHighlighter();
+      Map args = new HashMap();
+      snippetFieldAry = new String[]{snippetField};
+      args.put(HighlightParams.FIELDS, snippetFieldAry);
+      args.put(HighlightParams.HIGHLIGHT, "true");
+      req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
+        @Override
+        public SolrIndexSearcher getSearcher() {
+          return sreq.getSearcher();
+        }
+      };
+    }
+
+    SolrIndexSearcher searcher = sreq.getSearcher();
+    List<Document> result = new ArrayList<Document>(docList.size());
+    FieldSelector fieldSelector = new SetBasedFieldSelector(fieldsToLoad,
+            Collections.emptySet());
+    float[] scores = {1.0f};
+    int[] docsHolder = new int[1];
+    Query theQuery = query;
+
+    while (docsIter.hasNext()) {
+      Integer id = docsIter.next();
+      org.apache.lucene.document.Document doc = searcher.doc(id,
+              fieldSelector);
+      String snippet = getValue(doc, snippetField);
+      if (produceSummary == true) {
+        docsHolder[0] = id.intValue();
+        DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
+        highligher.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
+      }
+      Document carrotDocument = new Document(getValue(doc, titleField),
+              snippet, doc.get(urlField));
+      carrotDocument.addField("solrId", doc.get(idFieldName));
+      result.add(carrotDocument);
+    }
+
+    return result;
+  }
+
+  protected String getValue(org.apache.lucene.document.Document doc,
+                            String field) {
+    StringBuilder result = new StringBuilder();
+    String[] vals = doc.getValues(field);
+    for (int i = 0; i < vals.length; i++) {
+      // Join multiple values with a period so that Carrot2 does not pick up
+      // phrases that cross field value boundaries (in most cases it would
+      // create useless phrases).
+      result.append(vals[i]).append(" . ");
+    }
+    return result.toString().trim();
+  }
+
+  private List clustersToNamedList(List<Cluster> carrotClusters,
+                                   SolrParams solrParams) {
+    List result = new ArrayList();
+    clustersToNamedList(carrotClusters, result, solrParams.getBool(
+            CarrotParams.OUTPUT_SUB_CLUSTERS, false), solrParams.getInt(
+            CarrotParams.NUM_DESCRIPTIONS, Integer.MAX_VALUE));
+    return result;
+  }
+
+  private void clustersToNamedList(List<Cluster> outputClusters,
+                                   List parent, boolean outputSubClusters, int maxLabels) {
+    for (Cluster outCluster : outputClusters) {
+      NamedList cluster = new SimpleOrderedMap();
+      parent.add(cluster);
+
+      List<String> labels = outCluster.getPhrases();
+      if (labels.size() > maxLabels)
+        labels = labels.subList(0, maxLabels);
+      cluster.add("labels", labels);
+
+      List<Document> docs = outCluster.getDocuments();
+      List docList = new ArrayList();
+      cluster.add("docs", docList);
+      for (Document doc : docs) {
+        docList.add(doc.getField("solrId"));
+      }
+
+      if (outputSubClusters) {
+        List subclusters = new ArrayList();
+        cluster.add("clusters", subclusters);
+        clustersToNamedList(outCluster.getSubclusters(), subclusters,
+                outputSubClusters, maxLabels);
+      }
+    }
+  }
+
+  /**
+   * Extracts parameters that can possibly match some attributes of Carrot2 algorithms.
+   */
+  private void extractCarrotAttributes(SolrParams solrParams,
+                                       Map<String, Object> attributes) {
+    // Extract all non-predefined parameters. This way, we'll be able to set all
+    // parameters of Carrot2 algorithms without defining their names as constants.
+    for (Iterator<String> paramNames = solrParams.getParameterNamesIterator(); paramNames
+            .hasNext();) {
+      String paramName = paramNames.next();
+      if (!CarrotParams.CARROT_PARAM_NAMES.contains(paramName)) {
+        attributes.put(paramName, solrParams.get(paramName));
+      }
+    }
+  }
 }

Modified: lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java Thu Jul  2 14:08:37 2009
@@ -21,22 +21,20 @@
  * limitations under the License.
  */
 
-/**
- *
- */
+
 public interface CarrotParams {
 
-	String CARROT_PREFIX = "carrot.";
+  String CARROT_PREFIX = "carrot.";
 
-	String ALGORITHM = CARROT_PREFIX + "algorithm";
-	String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
-	String URL_FIELD_NAME = CARROT_PREFIX + "url";
-	String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
-	String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
-	String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
-	String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
+  String ALGORITHM = CARROT_PREFIX + "algorithm";
+  String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
+  String URL_FIELD_NAME = CARROT_PREFIX + "url";
+  String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
+  String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
+  String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
+  String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
 
-	public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
-			ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
-			PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
+  public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
+          ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME,
+          PRODUCE_SUMMARY, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS);
 }

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/AbstractClusteringTest.java Thu Jul  2 14:08:37 2009
@@ -23,21 +23,21 @@
  *
  */
 public class AbstractClusteringTest extends AbstractSolrTestCase {
-	protected int numberOfDocs = 0;
-	
+  protected int numberOfDocs = 0;
+
   @Override
   public void setUp() throws Exception {
     super.setUp();
 
     numberOfDocs = 0;
     for (String[] doc : DOCUMENTS) {
-    	assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url", doc[0], "title", doc[1], "snippet", doc[2]));
-    	numberOfDocs++;
-		}
+      assertU("add failed", adoc("id", Integer.toString(numberOfDocs), "url", doc[0], "title", doc[1], "snippet", doc[2]));
+      numberOfDocs++;
+    }
     assertU("commit", commit());
   }
 
-	public String getSchemaFile() {
+  public String getSchemaFile() {
     return "schema.xml";
   }
 
@@ -45,154 +45,154 @@
     return "solrconfig.xml";
   }
 
-  final String [][] DOCUMENTS = new String[][] {
-  	{ "http://en.wikipedia.org/wiki/Data_mining",
-  		"Data Mining - Wikipedia",
-  		"Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data for patterns." },
+  final String[][] DOCUMENTS = new String[][]{
+          {"http://en.wikipedia.org/wiki/Data_mining",
+                  "Data Mining - Wikipedia",
+                  "Article about knowledge-discovery in databases (KDD), the practice of automatically searching large stores of data for patterns."},
 
 
-  	{ "http://en.wikipedia.org/wiki/Datamining",
-  		"Data mining - Wikipedia, the free encyclopedia",
-  		"Data mining is the entire process of applying computer-based methodology, ... Moreover, some data-mining systems such as neural networks are inherently geared ..." },
+          {"http://en.wikipedia.org/wiki/Datamining",
+                  "Data mining - Wikipedia, the free encyclopedia",
+                  "Data mining is the entire process of applying computer-based methodology, ... Moreover, some data-mining systems such as neural networks are inherently geared ..."},
 
 
-  	{ "http://www.statsoft.com/textbook/stdatmin.html",
-  		"Electronic Statistics Textbook: Data Mining Techniques",
-  		"Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of computational and graphical exploratory data analysis techniques." },
+          {"http://www.statsoft.com/textbook/stdatmin.html",
+                  "Electronic Statistics Textbook: Data Mining Techniques",
+                  "Outlines the crucial concepts in data mining, defines the data warehousing process, and offers examples of computational and graphical exploratory data analysis techniques."},
 
 
-  	{ "http://www.thearling.com/text/dmwhite/dmwhite.htm",
-  		"An Introduction to Data Mining",
-  		"Data mining, the extraction of hidden predictive information from large ... Data mining tools predict future trends and behaviors, allowing businesses to ..." },
+          {"http://www.thearling.com/text/dmwhite/dmwhite.htm",
+                  "An Introduction to Data Mining",
+                  "Data mining, the extraction of hidden predictive information from large ... Data mining tools predict future trends and behaviors, allowing businesses to ..."},
 
 
-  	{ "http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm",
-  		"Data Mining: What is Data Mining?",
-  		"Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into useful information, can do and how it works." },
+          {"http://www.anderson.ucla.edu/faculty/jason.frand/teacher/technologies/palace/datamining.htm",
+                  "Data Mining: What is Data Mining?",
+                  "Outlines what knowledge discovery, the process of analyzing data from different perspectives and summarizing it into useful information, can do and how it works."},
 
 
-  	{ "http://www.spss.com/datamine",
-  		"Data Mining Software, Data Mining Applications and Data Mining Solutions",
-  		"The patterns uncovered using data mining help organizations make better and ... data mining customer ... Data mining applications, on the other hand, embed ..." },
+          {"http://www.spss.com/datamine",
+                  "Data Mining Software, Data Mining Applications and Data Mining Solutions",
+                  "The patterns uncovered using data mining help organizations make better and ... data mining customer ... Data mining applications, on the other hand, embed ..."},
 
 
-  	{ "http://www.kdnuggets.com/",
-  		"KD Nuggets",
-  		"Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, text mining, and web mining software, courses, jobs, publications, and meetings." },
+          {"http://www.kdnuggets.com/",
+                  "KD Nuggets",
+                  "Newsletter on the data mining and knowledge industries, offering information on data mining, knowledge discovery, text mining, and web mining software, courses, jobs, publications, and meetings."},
 
 
-  	{ "http://www.answers.com/topic/data-mining",
-  		"data mining: Definition from Answers.com",
-  		"data mining n. The automatic extraction of useful, often previously unknown information from large databases or data ... Data Mining For Investing ..." },
+          {"http://www.answers.com/topic/data-mining",
+                  "data mining: Definition from Answers.com",
+                  "data mining n. The automatic extraction of useful, often previously unknown information from large databases or data ... Data Mining For Investing ..."},
 
 
-  	{ "http://www.statsoft.com/products/dataminer.htm",
-  		"STATISTICA Data Mining and Predictive Modeling Solutions",
-  		"GRC site-wide menuing system research and development. ... Contact a Data Mining Solutions Consultant. News and Success Stories. Events ..." },
+          {"http://www.statsoft.com/products/dataminer.htm",
+                  "STATISTICA Data Mining and Predictive Modeling Solutions",
+                  "GRC site-wide menuing system research and development. ... Contact a Data Mining Solutions Consultant. News and Success Stories. Events ..."},
 
 
-  	{ "http://datamining.typepad.com/",
-  		"Data Mining: Text Mining, Visualization and Social Media",
-  		"Commentary on text mining, data mining, social media and data visualization. ... While mining Twitter data for business and marketing intelligence (trend/buzz ..." },
+          {"http://datamining.typepad.com/",
+                  "Data Mining: Text Mining, Visualization and Social Media",
+                  "Commentary on text mining, data mining, social media and data visualization. ... While mining Twitter data for business and marketing intelligence (trend/buzz ..."},
 
 
-  	{ "http://www.twocrows.com/",
-  		"Two Crows Corporation",
-  		"Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining accessible and easy to use." },
+          {"http://www.twocrows.com/",
+                  "Two Crows Corporation",
+                  "Dedicated to the development, marketing, sales and support of tools for knowledge discovery to make data mining accessible and easy to use."},
 
 
-  	{ "http://www.thearling.com/",
-  		"Thearling.com",
-  		"Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden predictive information from databases, and other analytic technologies." },
+          {"http://www.thearling.com/",
+                  "Thearling.com",
+                  "Kurt Thearling's site dedicated to sharing information about data mining, the automated extraction of hidden predictive information from databases, and other analytic technologies."},
 
 
-  	{ "http://www.ccsu.edu/datamining/",
-  		"CCSU - Data Mining",
-  		"Offers degrees and certificates in data mining. Allows students to explore cutting-edge data mining techniques and applications: market basket analysis, decision trees, neural networks, machine learning, web mining, and data modeling." },
+          {"http://www.ccsu.edu/datamining/",
+                  "CCSU - Data Mining",
+                  "Offers degrees and certificates in data mining. Allows students to explore cutting-edge data mining techniques and applications: market basket analysis, decision trees, neural networks, machine learning, web mining, and data modeling."},
 
 
-  	{ "http://www.oracle.com/technology/products/bi/odm",
-  		"Oracle Data Mining",
-  		"Oracle Data Mining Product Center ... New Oracle Data Mining Powers New Social CRM Application (more information ... Mining High-Dimensional Data for ..." },
+          {"http://www.oracle.com/technology/products/bi/odm",
+                  "Oracle Data Mining",
+                  "Oracle Data Mining Product Center ... New Oracle Data Mining Powers New Social CRM Application (more information ... Mining High-Dimensional Data for ..."},
 
 
-  	{ "http://databases.about.com/od/datamining/a/datamining.htm",
-  		"Data Mining: An Introduction",
-  		"About.com article on how businesses are discovering new trends and patterns of behavior that previously went unnoticed through data mining, automated statistical analysis techniques." },
+          {"http://databases.about.com/od/datamining/a/datamining.htm",
+                  "Data Mining: An Introduction",
+                  "About.com article on how businesses are discovering new trends and patterns of behavior that previously went unnoticed through data mining, automated statistical analysis techniques."},
 
 
-  	{ "http://www.dmoz.org/Computers/Software/Databases/Data_Mining/",
-  		"Open Directory - Computers: Software: Databases: Data Mining",
-  		"Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... Data mining creates information assets that an organization can leverage to ..." },
+          {"http://www.dmoz.org/Computers/Software/Databases/Data_Mining/",
+                  "Open Directory - Computers: Software: Databases: Data Mining",
+                  "Data Mining and Knowledge Discovery - A peer-reviewed journal publishing ... Data mining creates information assets that an organization can leverage to ..."},
 
 
-  	{ "http://www.cs.wisc.edu/dmi/",
-  		"DMI:Data Mining Institute",
-  		"Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data Mining Group of Microsoft ..." },
+          {"http://www.cs.wisc.edu/dmi/",
+                  "DMI:Data Mining Institute",
+                  "Data Mining Institute at UW-Madison ... The Data Mining Institute (DMI) was started on June 1, 1999 at the Computer ... of the Data Mining Group of Microsoft ..."},
 
 
-  	{ "http://www.the-data-mine.com/",
-  		"The Data Mine",
-  		"Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge discovery. List software, events, organizations, and people working in data mining." },
+          {"http://www.the-data-mine.com/",
+                  "The Data Mine",
+                  "Provides information about data mining also known as knowledge discovery in databases (KDD) or simply knowledge discovery. List software, events, organizations, and people working in data mining."},
 
 
-  	{ "http://www.statserv.com/datamining.html",
-  		"St@tServ - About Data Mining",
-  		"St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data Mining Resources, ..." },
+          {"http://www.statserv.com/datamining.html",
+                  "St@tServ - About Data Mining",
+                  "St@tServ Data Mining page ... Data mining in molecular biology, by Alvis Brazma. Graham Williams page. Knowledge Discovery and Data Mining Resources, ..."},
 
 
-  	{ "http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm",
-  		"MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining ...",
-  		"Introduces students to a class of methods known as data mining that assists managers in recognizing patterns and making intelligent use of massive amounts of ..." },
+          {"http://ocw.mit.edu/OcwWeb/Sloan-School-of-Management/15-062Data-MiningSpring2003/CourseHome/index.htm",
+                  "MIT OpenCourseWare | Sloan School of Management | 15.062 Data Mining ...",
+                  "Introduces students to a class of methods known as data mining that assists managers in recognizing patterns and making intelligent use of massive amounts of ..."},
 
 
-  	{ "http://www.pentaho.com/products/data_mining/",
-  		"Pentaho Commercial Open Source Business Intelligence: Data Mining",
-  		"For example, data mining can warn you there's a high probability a specific ... Pentaho Data Mining is differentiated by its open, standards-compliant nature, ..." },
+          {"http://www.pentaho.com/products/data_mining/",
+                  "Pentaho Commercial Open Source Business Intelligence: Data Mining",
+                  "For example, data mining can warn you there's a high probability a specific ... Pentaho Data Mining is differentiated by its open, standards-compliant nature, ..."},
 
 
-  	{ "http://www.investorhome.com/mining.htm",
-  		"Investor Home - Data Mining",
-  		"Data Mining or Data Snooping is the practice of searching for relationships and ... Data mining involves searching through databases for correlations and patterns ..." },
+          {"http://www.investorhome.com/mining.htm",
+                  "Investor Home - Data Mining",
+                  "Data Mining or Data Snooping is the practice of searching for relationships and ... Data mining involves searching through databases for correlations and patterns ..."},
 
 
-  	{ "http://www.datamining.com/",
-  		"Predictive Modeling and Predictive Analytics Solutions | Enterprise ...",
-  		"Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics." },
+          {"http://www.datamining.com/",
+                  "Predictive Modeling and Predictive Analytics Solutions | Enterprise ...",
+                  "Insightful Enterprise Miner - Enterprise data mining for predictive modeling and predictive analytics."},
 
 
-  	{ "http://www.sourcewatch.org/index.php?title=Data_mining",
-  		"Data mining - SourceWatch",
-  		"These agencies reported 199 data mining projects, of which 68 ... Office, \"DATA MINING. ... powerful technology known as data mining -- and how, in the ..." },
+          {"http://www.sourcewatch.org/index.php?title=Data_mining",
+                  "Data mining - SourceWatch",
+                  "These agencies reported 199 data mining projects, of which 68 ... Office, \"DATA MINING. ... powerful technology known as data mining -- and how, in the ..."},
 
 
-  	{ "http://www.autonlab.org/tutorials/",
-  		"Statistical Data Mining Tutorials",
-  		"Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, the foundations of statistical data analysis, and most of the classic machine learning and data mining algorithms." },
+          {"http://www.autonlab.org/tutorials/",
+                  "Statistical Data Mining Tutorials",
+                  "Includes a set of tutorials on many aspects of statistical data mining, including the foundations of probability, the foundations of statistical data analysis, and most of the classic machine learning and data mining algorithms."},
 
 
-  	{ "http://www.microstrategy.com/data-mining/index.asp",
-  		"Data Mining",
-  		"With MicroStrategy, data mining scoring is fully integrated into mainstream ... The integration of data mining models from other applications is accomplished by ..." },
+          {"http://www.microstrategy.com/data-mining/index.asp",
+                  "Data Mining",
+                  "With MicroStrategy, data mining scoring is fully integrated into mainstream ... The integration of data mining models from other applications is accomplished by ..."},
 
 
-  	{ "http://www.datamininglab.com/",
-  		"Elder Research",
-  		"Provides consulting and short courses in data mining and pattern discovery patterns in data." },
+          {"http://www.datamininglab.com/",
+                  "Elder Research",
+                  "Provides consulting and short courses in data mining and pattern discovery patterns in data."},
 
 
-  	{ "http://www.sqlserverdatamining.com/",
-  		"SQL Server Data Mining > Home",
-  		"SQL Server Data Mining Portal ... Data Mining as an Application Platform (Whitepaper) Creating a Web Cross-sell Application with SQL Server 2005 Data Mining (Article) ..." },
+          {"http://www.sqlserverdatamining.com/",
+                  "SQL Server Data Mining > Home",
+                  "SQL Server Data Mining Portal ... Data Mining as an Application Platform (Whitepaper) Creating a Web Cross-sell Application with SQL Server 2005 Data Mining (Article) ..."},
 
 
-  	{ "http://databases.about.com/cs/datamining/g/dmining.htm",
-  		"Data Mining",
-  		"What is data mining? Find out here! ... Book Review: Data Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does it Have to Do with ..." },
+          {"http://databases.about.com/cs/datamining/g/dmining.htm",
+                  "Data Mining",
+                  "What is data mining? Find out here! ... Book Review: Data Mining and Statistical Analysis Using SQL. What is Data Mining, and What Does it Have to Do with ..."},
 
 
-  	{ "http://www.sas.com/technologies/analytics/datamining/index.html",
-  		"Data Mining Software and Text Mining | SAS",
-  		"... raw data to smarter ... Data Mining is an iterative process of creating ... The knowledge gleaned from data and text mining can be used to fuel ..." }
+          {"http://www.sas.com/technologies/analytics/datamining/index.html",
+                  "Data Mining Software and Text Mining | SAS",
+                  "... raw data to smarter ... Data Mining is an iterative process of creating ... The knowledge gleaned from data and text mining can be used to fuel ..."}
   };
 }

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/ClusteringComponentTest.java Thu Jul  2 14:08:37 2009
@@ -16,18 +16,16 @@
  * limitations under the License.
  */
 
-import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.component.SearchComponent;
-import org.apache.solr.handler.component.SpellCheckComponent;
-import org.apache.solr.handler.component.QueryComponent;
-import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.request.SolrRequestHandler;
-import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.QueryComponent;
+import org.apache.solr.handler.component.SearchComponent;
 import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.SolrRequestHandler;
 
 
 /**
@@ -45,7 +43,7 @@
 
     params.add(ClusteringComponent.COMPONENT_NAME, "true");
     params.add(CommonParams.Q, "*:*");
-    
+
     params.add(ClusteringParams.USE_SEARCH_RESULTS, "true");
 
 
@@ -76,7 +74,7 @@
     //System.out.println("Clusters: " + clusters);
     assertTrue("clusters is null and it shouldn't be", clusters != null);
 
-    
+
   }
 
 }

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java Thu Jul  2 14:08:37 2009
@@ -1,7 +1,7 @@
 package org.apache.solr.handler.clustering;
 
-import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
 import org.apache.solr.search.DocSet;
 
 
@@ -9,7 +9,7 @@
  *
  *
  **/
-public class MockDocumentClusteringEngine extends DocumentClusteringEngine{
+public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
   public NamedList cluster(DocSet docs, SolrParams solrParams) {
     NamedList result = new NamedList();
     return result;

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Thu Jul  2 14:08:37 2009
@@ -17,151 +17,153 @@
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.clustering.AbstractClusteringTest;
 import org.apache.solr.handler.clustering.ClusteringComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
-import org.apache.solr.request.LocalSolrQueryRequest;
 import org.carrot2.util.attribute.AttributeUtils;
 
+import java.io.IOException;
+import java.util.List;
+
 /**
  *
  */
 @SuppressWarnings("unchecked")
 public class CarrotClusteringEngineTest extends AbstractClusteringTest {
-	public void testCarrotLingo() throws Exception {
-		checkEngine(getClusteringEngine("default"), 9);
-	}
-
-	public void testCarrotStc() throws Exception {
-		checkEngine(getClusteringEngine("stc"), 2);
-	}
-
-	public void testWithoutSubclusters() throws Exception {
-		checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs),
-				1, 1, 0);
-	}
-
-	public void testWithSubclusters() throws Exception {
-		ModifiableSolrParams params = new ModifiableSolrParams();
-		params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
-		checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
-				params), 1, 1, 2);
-	}
-	
-	public void testNumDescriptions() throws Exception {
-		ModifiableSolrParams params = new ModifiableSolrParams();
-		params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 5);
-		params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
-		checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
-				params), 1, 3, 0);
-	}
-	
-	public void testCarrotAttributePassing() throws Exception {
-		ModifiableSolrParams params = new ModifiableSolrParams();
-		params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "depth"), 1);
-		params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 3);
-		checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
-				params), 1, 3, 0);
-	}
-	
-	private CarrotClusteringEngine getClusteringEngine(String engineName) {
-		ClusteringComponent comp = (ClusteringComponent) h.getCore()
-				.getSearchComponent("clustering");
-		assertNotNull("clustering component should not be null", comp);
-		CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
-				.getSearchClusteringEngines().get(engineName);
-		assertNotNull("clustering engine for name: " + engineName
-				+ " should not be null", engine);
-		return engine;
-	}
-
-	private List checkEngine(CarrotClusteringEngine engine,
-			int expectedNumClusters) throws IOException {
-		return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams());
-	}
-
-	private List checkEngine(CarrotClusteringEngine engine,
-			int expectedNumClusters, SolrParams clusteringParams) throws IOException {
-		// Get all documents to cluster
-		RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
-		MatchAllDocsQuery query = new MatchAllDocsQuery();
-		DocList docList;
-		try {
-			SolrIndexSearcher searcher = ref.get();
-			docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
-					numberOfDocs);
-			assertEquals("docList size", this.numberOfDocs, docList.matches());
-		} finally {
-			ref.decref();
-		}
-
-		ModifiableSolrParams solrParams = new ModifiableSolrParams();
-		solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
-		solrParams.add(clusteringParams);
-
-		// Perform clustering
-                LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
-		List results = (List)engine.cluster(query, docList, req);
-                req.close();
-		assertEquals("number of clusters", expectedNumClusters, results.size());
-		checkClusters(results, false);
-		return results;
-	}
-
-	private void checkClusters(List results, int expectedDocCount,
-			int expectedLabelCount, int expectedSubclusterCount) {
-		for (int i = 0; i < results.size(); i++) {
-				NamedList cluster = (NamedList) results.get(i);
-				checkCluster(cluster, expectedDocCount, expectedLabelCount,
-						expectedSubclusterCount);
-		}
-	}
-
-	private void checkClusters(List results, boolean hasSubclusters) {
-		for (int i = 0; i < results.size(); i++) {
-                  checkCluster((NamedList)results.get(i), hasSubclusters );
-		}
-	}
-
-	private void checkCluster(NamedList cluster, boolean hasSubclusters) {
-		List docs = (List)cluster.get("docs");
-		assertNotNull("docs is null and it shouldn't be", docs);
-		for (int j = 0; j < docs.size(); j++) {
-			String id = (String) docs.get(j);
-			assertNotNull("id is null and it shouldn't be", id);
-		}
-
-		List labels = (List) cluster.get("labels");
-		assertNotNull("labels is null but it shouldn't be", labels);
-
-		if (hasSubclusters) {
-			List subclusters = (List) cluster.get("clusters");
-			assertNotNull("subclusters is null but it shouldn't be", subclusters);
-		}
-	}
-
-	private void checkCluster(NamedList cluster, int expectedDocCount,
-			int expectedLabelCount, int expectedSubclusterCount) {
-		checkCluster(cluster, expectedSubclusterCount > 0);
-		assertEquals("number of docs in cluster", expectedDocCount,
-				((List) cluster.get("docs")).size());
-		assertEquals("number of labels in cluster", expectedLabelCount,
-				((List) cluster.get("labels")).size());
-
-		if (expectedSubclusterCount > 0) {
-			List subclusters = (List) cluster.get("clusters");
-			assertEquals("numClusters", expectedSubclusterCount, subclusters.size());
-			assertEquals("number of subclusters in cluster",
-					expectedSubclusterCount, subclusters.size());
-		}
-	}
+  public void testCarrotLingo() throws Exception {
+    checkEngine(getClusteringEngine("default"), 9);
+  }
+
+  public void testCarrotStc() throws Exception {
+    checkEngine(getClusteringEngine("stc"), 2);
+  }
+
+  public void testWithoutSubclusters() throws Exception {
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs),
+            1, 1, 0);
+  }
+
+  public void testWithSubclusters() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(CarrotParams.OUTPUT_SUB_CLUSTERS, true);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 1, 2);
+  }
+
+  public void testNumDescriptions() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 5);
+    params.set(CarrotParams.NUM_DESCRIPTIONS, 3);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 3, 0);
+  }
+
+  public void testCarrotAttributePassing() throws Exception {
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "depth"), 1);
+    params.set(AttributeUtils.getKey(MockClusteringAlgorithm.class, "labels"), 3);
+    checkClusters(checkEngine(getClusteringEngine("mock"), this.numberOfDocs,
+            params), 1, 3, 0);
+  }
+
+  private CarrotClusteringEngine getClusteringEngine(String engineName) {
+    ClusteringComponent comp = (ClusteringComponent) h.getCore()
+            .getSearchComponent("clustering");
+    assertNotNull("clustering component should not be null", comp);
+    CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
+            .getSearchClusteringEngines().get(engineName);
+    assertNotNull("clustering engine for name: " + engineName
+            + " should not be null", engine);
+    return engine;
+  }
+
+  private List checkEngine(CarrotClusteringEngine engine,
+                           int expectedNumClusters) throws IOException {
+    return checkEngine(engine, expectedNumClusters, new ModifiableSolrParams());
+  }
+
+  private List checkEngine(CarrotClusteringEngine engine,
+                           int expectedNumClusters, SolrParams clusteringParams) throws IOException {
+    // Get all documents to cluster
+    RefCounted<SolrIndexSearcher> ref = h.getCore().getSearcher();
+    MatchAllDocsQuery query = new MatchAllDocsQuery();
+    DocList docList;
+    try {
+      SolrIndexSearcher searcher = ref.get();
+      docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
+              numberOfDocs);
+      assertEquals("docList size", this.numberOfDocs, docList.matches());
+    } finally {
+      ref.decref();
+    }
+
+    ModifiableSolrParams solrParams = new ModifiableSolrParams();
+    solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
+    solrParams.add(clusteringParams);
+
+    // Perform clustering
+    LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
+    List results = (List) engine.cluster(query, docList, req);
+    req.close();
+    assertEquals("number of clusters", expectedNumClusters, results.size());
+    checkClusters(results, false);
+    return results;
+  }
+
+  private void checkClusters(List results, int expectedDocCount,
+                             int expectedLabelCount, int expectedSubclusterCount) {
+    for (int i = 0; i < results.size(); i++) {
+      NamedList cluster = (NamedList) results.get(i);
+      checkCluster(cluster, expectedDocCount, expectedLabelCount,
+              expectedSubclusterCount);
+    }
+  }
+
+  private void checkClusters(List results, boolean hasSubclusters) {
+    for (int i = 0; i < results.size(); i++) {
+      checkCluster((NamedList) results.get(i), hasSubclusters);
+    }
+  }
+
+  private void checkCluster(NamedList cluster, boolean hasSubclusters) {
+    List docs = (List) cluster.get("docs");
+    assertNotNull("docs is null and it shouldn't be", docs);
+    for (int j = 0; j < docs.size(); j++) {
+      String id = (String) docs.get(j);
+      assertNotNull("id is null and it shouldn't be", id);
+    }
+
+    List labels = (List) cluster.get("labels");
+    assertNotNull("labels is null but it shouldn't be", labels);
+
+    if (hasSubclusters) {
+      List subclusters = (List) cluster.get("clusters");
+      assertNotNull("subclusters is null but it shouldn't be", subclusters);
+    }
+  }
+
+  private void checkCluster(NamedList cluster, int expectedDocCount,
+                            int expectedLabelCount, int expectedSubclusterCount) {
+    checkCluster(cluster, expectedSubclusterCount > 0);
+    assertEquals("number of docs in cluster", expectedDocCount,
+            ((List) cluster.get("docs")).size());
+    assertEquals("number of labels in cluster", expectedLabelCount,
+            ((List) cluster.get("labels")).size());
+
+    if (expectedSubclusterCount > 0) {
+      List subclusters = (List) cluster.get("clusters");
+      assertEquals("numClusters", expectedSubclusterCount, subclusters.size());
+      assertEquals("number of subclusters in cluster",
+              expectedSubclusterCount, subclusters.size());
+    }
+  }
 }

Modified: lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java?rev=790599&r1=790598&r2=790599&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java (original)
+++ lucene/solr/trunk/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java Thu Jul  2 14:08:37 2009
@@ -1,69 +1,68 @@
 package org.apache.solr.handler.clustering.carrot2;
 
-import java.util.List;
-
+import com.google.common.collect.Lists;
 import org.carrot2.core.*;
 import org.carrot2.core.attribute.AttributeNames;
 import org.carrot2.core.attribute.Processing;
 import org.carrot2.util.attribute.*;
 import org.carrot2.util.attribute.constraint.IntRange;
 
-import com.google.common.collect.Lists;
+import java.util.List;
 
 @Bindable(prefix = "MockClusteringAlgorithm")
 public class MockClusteringAlgorithm extends ProcessingComponentBase implements
-		IClusteringAlgorithm {
-	@Input
-	@Processing
-	@Attribute(key = AttributeNames.DOCUMENTS)
-	private List<Document> documents;
-
-	@Output
-	@Processing
-	@Attribute(key = AttributeNames.CLUSTERS)
-	private List<Cluster> clusters;
-
-	@Input
-	@Processing
-	@Attribute
-	@IntRange(min = 1, max = 5)
-	private int depth = 2;
-
-	@Input
-	@Processing
-	@Attribute
-	@IntRange(min = 1, max = 5)
-	private int labels = 1;
-
-	@Override
-	public void process() throws ProcessingException {
-		clusters = Lists.newArrayList();
-		if (documents == null) {
-			return;
-		}
-
-		int documentIndex = 1;
-		for (Document document : documents) {
-			StringBuilder label = new StringBuilder("Cluster " + documentIndex);
-			Cluster cluster = createCluster(label.toString(), document);
-			clusters.add(cluster);
-			for (int i = 1; i <= depth; i++) {
-				label.append(".");
-				label.append(i);
-				Cluster newCluster = createCluster(label.toString(), document);
-				cluster.addSubclusters(createCluster(label.toString(), document), newCluster);
-				cluster = newCluster;
-			}
-			documentIndex++;
-		}
-	}
-
-	private Cluster createCluster(String labelBase, Document... documents) {
-		Cluster cluster = new Cluster();
-		for (int i = 0; i < labels; i++) {
-			cluster.addPhrases(labelBase + "#" + (i + 1));
-		}
-		cluster.addDocuments(documents);
-		return cluster;
-	}
+        IClusteringAlgorithm {
+  @Input
+  @Processing
+  @Attribute(key = AttributeNames.DOCUMENTS)
+  private List<Document> documents;
+
+  @Output
+  @Processing
+  @Attribute(key = AttributeNames.CLUSTERS)
+  private List<Cluster> clusters;
+
+  @Input
+  @Processing
+  @Attribute
+  @IntRange(min = 1, max = 5)
+  private int depth = 2;
+
+  @Input
+  @Processing
+  @Attribute
+  @IntRange(min = 1, max = 5)
+  private int labels = 1;
+
+  @Override
+  public void process() throws ProcessingException {
+    clusters = Lists.newArrayList();
+    if (documents == null) {
+      return;
+    }
+
+    int documentIndex = 1;
+    for (Document document : documents) {
+      StringBuilder label = new StringBuilder("Cluster " + documentIndex);
+      Cluster cluster = createCluster(label.toString(), document);
+      clusters.add(cluster);
+      for (int i = 1; i <= depth; i++) {
+        label.append(".");
+        label.append(i);
+        Cluster newCluster = createCluster(label.toString(), document);
+        cluster.addSubclusters(createCluster(label.toString(), document), newCluster);
+        cluster = newCluster;
+      }
+      documentIndex++;
+    }
+  }
+
+  private Cluster createCluster(String labelBase, Document... documents) {
+    Cluster cluster = new Cluster();
+    for (int i = 0; i < labels; i++) {
+      cluster.addPhrases(labelBase + "#" + (i + 1));
+    }
+    cluster.addDocuments(documents);
+    return cluster;
+  }
 }



Mime
View raw message