lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Solr Wiki] Update of "Solrj" by ShawnHeisey
Date Thu, 18 Aug 2011 05:10:41 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.

The "Solrj" page has been changed by ShawnHeisey:
http://wiki.apache.org/solr/Solrj?action=diff&rev1=60&rev2=61

  
  The most appropriate way to update solr is with a single process in order to avoid race
conditions when using commit and rollback. Also, ideally the application will use batch processing
since commit and optimize can be expensive routines.
  
+ === Reading data from a database ===
+ 
+ /!\ This example class using SolrJ has not yet been tested, but hopefully it's complete
enough for community comment.
+ 
+ It will get tested eventually and updated here if there are problems.  The addResultSet
method takes a JDBC ResultSet and adds the documents to Solr in batches.  Managing the database
connection and constructing the query are not included here.
+ 
+ As it's written it maps the database field names to the same fields in Solr, but there are
some examples in the comments of how you could manually assign one or more fields.
+ 
+ {{{
+ import java.io.IOException;
+ import java.net.MalformedURLException;
+ import java.sql.ResultSet;
+ import java.sql.ResultSetMetaData;
+ import java.sql.SQLException;
+ import java.sql.Types;
+ import java.util.ArrayList;
+ import java.util.Collection;
+ 
+ import org.apache.solr.client.solrj.SolrServerException;
+ import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+ import org.apache.solr.common.SolrInputDocument;
+ 
+ public class Test
+ {
+     private static int fetchSize = 1000;
+     private static String url = "http://localhost:8983/solr/core1/";
+     private static CommonsHttpSolrServer solrCore;
+ 
+     public Test() throws MalformedURLException
+     {
+     	 solrCore = new CommonsHttpSolrServer(url);
+     }
+ 
+     /**
+      * Takes an SQL ResultSet and adds the documents to solr. Does it in batches
+      * of fetchSize.
+      * 
+      * @param rs
+      *            A ResultSet from the database.
+      * @return The number of documents added to solr.
+      * @throws SQLException
+      * @throws SolrServerException
+      * @throws IOException
+      */
+     private long addResultSet(ResultSet rs) throws SQLException,
+             SolrServerException, IOException
+     {
+         long count = 0;
+         int innerCount = 0;
+         Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
+         ResultSetMetaData rsm = rs.getMetaData();
+         int numColumns = rsm.getColumnCount();
+         String[] colNames = new String[numColumns + 1];
+ 
+         /**
+          * JDBC numbers the columns starting at 1, so the normal java convention
+          * of starting at zero won't work.
+          */
+         for (int i = 1; i < (numColumns + 1); i++)
+         {
+             colNames[i] = rsm.getColumnName(i);
+             /**
+              * If there are fields that you want to handle manually, check for
+              * them here and change that entry in colNames to null. This will
+              * cause the loop in the next section to skip that database column.
+              */
+             // //Example:
+             // if (rsm.getColumnName(i) == "db_id")
+             // {
+             // colNames[i] = null;
+             // }
+         }
+ 
+         while (rs.next())
+         {
+             count++;
+             innerCount++;
+ 
+             SolrInputDocument doc = new SolrInputDocument();
+ 
+             /**
+              * At this point, take care of manual document field assignments for
+              * which you previously assigned the colNames entry to null.
+              */
+             // //Example:
+             // doc.addField("solr_db_id", rs.getLong("db_id"));
+ 
+             for (int j = 1; j < (numColumns + 1); j++)
+             {
+                 if (colNames[j] != null)
+                 {
+                     Object f;
+                     switch (rsm.getColumnType(j))
+                     {
+                         case Types.BIGINT:
+                         {
+                             f = rs.getLong(j);
+                             break;
+                         }
+                         case Types.INTEGER:
+                         {
+                             f = rs.getInt(j);
+                             break;
+                         }
+                         case Types.DATE:
+                         {
+                             f = rs.getDate(j);
+                             break;
+                         }
+                         case Types.FLOAT:
+                         {
+                             f = rs.getFloat(j);
+                             break;
+                         }
+                         case Types.DOUBLE:
+                         {
+                             f = rs.getDouble(j);
+                             break;
+                         }
+                         case Types.TIME:
+                         {
+                             f = rs.getDate(j);
+                             break;
+                         }
+                         case Types.BOOLEAN:
+                         {
+                             f = rs.getBoolean(j);
+                             break;
+                         }
+                         default:
+                         {
+                             f = rs.getString(j);
+                         }
+                     }
+                     doc.addField(colNames[j], f);
+                 }
+             }
+             docs.add(doc);
+ 
+             /**
+              * When we reach fetchSize, index the documents and reset the inner
+              * counter.
+              */
+             if (innerCount == fetchSize)
+             {
+                 solrCore.add(docs);
+                 docs.clear();
+                 innerCount = 0;
+             }
+         }
+ 
+         /**
+          * If the outer loop ended before the inner loop reset, index the
+          * remaining documents.
+          */
+         if (innerCount != 0)
+         {
+             solrCore.add(docs);
+         }
+         return count;
+     }
+ }
+ }}}
  == Setting the RequestWriter ==
  
  SolrJ lets you upload content in XML and Binary format. The default is set to be XML.  Use
the following to upload using Binary format. This is the same format which SolrJ uses to fetch
results, and can greatly improve performance as it reduces XML marshalling overhead.

Mime
View raw message