lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yo...@apache.org
Subject svn commit: r524175 - in /lucene/solr/trunk: ./ example/exampledocs/ example/solr/conf/ lib/ src/java/org/apache/solr/handler/ src/java/org/apache/solr/request/ src/java/org/apache/solr/util/ src/test/org/apache/solr/handler/ src/test/test-files/solr/c...
Date Fri, 30 Mar 2007 16:59:59 GMT
Author: yonik
Date: Fri Mar 30 09:59:58 2007
New Revision: 524175

URL: http://svn.apache.org/viewvc?view=rev&rev=524175
Log:
CSV updates: SOLR-66

Added:
    lucene/solr/trunk/example/exampledocs/books.csv   (with props)
    lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java   (with props)
    lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java   (with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/example/solr/conf/solrconfig.xml
    lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
    lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java
    lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Fri Mar 30 09:59:58 2007
@@ -135,6 +135,8 @@
 19. SOLR-197: New parameters for input: stream.contentType for specifying
     or overriding the content type of input, and stream.file for reading
     local files. (Ryan McKinley via yonik)
+
+20. SOLR-66: CSV data format for document additions and updates. (yonik)
     
 Changes in runtime behavior
  1. Highlighting using DisMax will only pick up terms from the main 

Added: lucene/solr/trunk/example/exampledocs/books.csv
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/exampledocs/books.csv?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/example/exampledocs/books.csv (added)
+++ lucene/solr/trunk/example/exampledocs/books.csv Fri Mar 30 09:59:58 2007
@@ -0,0 +1,11 @@
+id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
+0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
+0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
+055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
+0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
+0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
+0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
+0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
+0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
+0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
+080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy

Propchange: lucene/solr/trunk/example/exampledocs/books.csv
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/example/exampledocs/books.csv
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Fri Mar 30 09:59:58 2007
@@ -384,6 +384,11 @@
   <!-- NOTE, /update is mapped to a servlet, we can have the filter handle requests off
that! -->
   <requestHandler name="/update/commit" class="solr.CommitRequestHandler" />
 
+
+  <!-- CSV update handler, loaded on demand -->
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
+  </requestHandler>
+
   
   <!-- queryResponseWriter plugins... query responses will be written using the
     writer specified by the 'wt' request parameter matching the name of a registered

Added: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar?view=auto&rev=524175
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/solr/trunk/lib/commons-csv-0.1-SNAPSHOT.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java Fri Mar 30 09:59:58
2007
@@ -0,0 +1,386 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler;
+
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrParams;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.util.ContentStream;
+import org.apache.solr.core.SolrException;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.update.*;
+import org.apache.commons.csv.CSVStrategy;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.io.IOUtils;
+
+import java.util.regex.Pattern;
+import java.util.List;
+import java.io.*;
+
+/**
+ * @author yonik
+ * @version $Id$
+ */
+
+public class CSVRequestHandler extends RequestHandlerBase {
+
+  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
{
+    CSVLoader loader = new SingleThreadedCSVLoader(req);
+
+    Iterable<ContentStream> streams = req.getContentStreams();
+    if (streams == null) {
+      throw new SolrException(400, "missing content stream");
+    }
+
+    for(ContentStream stream : streams) {
+      Reader reader = stream.getReader();
+      try {
+        loader.errHeader = "CSVLoader: input=" + stream.getSourceInfo(); 
+        loader.load(reader);
+      } finally {
+        IOUtils.closeQuietly(reader);
+      }
+    }
+  }
+
+  //////////////////////// SolrInfoMBeans methods //////////////////////
+  @Override
+  public String getDescription() {
+    return "Add/Update multiple documents with CSV formatted rows";
+  }
+
+  @Override
+  public String getVersion() {
+      return "$Revision:$";
+ }
+
+ @Override
+ public String getSourceId() {
+    return "$Id:$";
+  }
+
+  @Override
+  public String getSource() {
+    return "$URL:$";
+  }
+}
+
+
+abstract class CSVLoader {
+  static String SEPARATOR="separator";
+  static String FIELDNAMES="fieldnames";
+  static String HEADER="header";
+  static String SKIP="skip";
+  static String MAP="map";
+  static String TRIM="trim";
+  static String EMPTY="keepEmpty";
+  static String SPLIT="split";
+  static String ENCAPSULATOR="encapsulator";
+  static String COMMIT="commit";
+  static String OVERWRITE="overwrite";
+
+  private static Pattern colonSplit = Pattern.compile(":");
+  private static Pattern commaSplit = Pattern.compile(",");
+
+  final IndexSchema schema;
+  final SolrParams params;
+  final UpdateHandler handler;
+  final CSVStrategy strategy;
+
+  String[] fieldnames;
+  SchemaField[] fields;
+  CSVLoader.FieldAdder[] adders;
+
+  int skipLines;    // number of lines to skip at start of file
+
+  final AddUpdateCommand templateAdd;
+
+
+  /** Add a field to a document unless it's zero length.
+   * The FieldAdder hierarchy handles all the complexity of
+   * further transforming or splitting field values to keep the
+   * main logic loop clean.  All implementations of add() must be
+   * MT-safe!
+   */
+  private class FieldAdder {
+    void add(DocumentBuilder builder, int line, int column, String val) {
+      if (val.length() > 0) {
+        builder.addField(fields[column].getName(),val,1.0f);
+      }
+    }
+  }
+
+  /** add zero length fields */
+  private class FieldAdderEmpty extends CSVLoader.FieldAdder {
+    void add(DocumentBuilder builder, int line, int column, String val) {
+      builder.addField(fields[column].getName(),val,1.0f);
+    }
+  }
+
+  /** trim fields */
+  private class FieldTrimmer extends CSVLoader.FieldAdder {
+    private final CSVLoader.FieldAdder base;
+    FieldTrimmer(CSVLoader.FieldAdder base) { this.base=base; }
+    void add(DocumentBuilder builder, int line, int column, String val) {
+      base.add(builder, line, column, val.trim());
+    }
+  }
+
+  /** map a single value.
+   * for just a couple of mappings, this is probably faster than
+   * using a HashMap.
+   */
+ private class FieldMapperSingle extends CSVLoader.FieldAdder {
+   private final String from;
+   private final String to;
+   private final CSVLoader.FieldAdder base;
+   FieldMapperSingle(String from, String to, CSVLoader.FieldAdder base) {
+     this.from=from;
+     this.to=to;
+     this.base=base;
+   }
+    void add(DocumentBuilder builder, int line, int column, String val) {
+      if (from.equals(val)) val=to;
+      base.add(builder,line,column,val);
+    }
+ }
+
+  /** Split a single value into multiple values based on
+   * a CSVStrategy.
+   */
+  private class FieldSplitter extends CSVLoader.FieldAdder {
+    private final CSVStrategy strategy;
+    private final CSVLoader.FieldAdder base;
+    FieldSplitter(CSVStrategy strategy, CSVLoader.FieldAdder base) {
+      this.strategy = strategy;
+      this.base = base;
+    }
+
+    void add(DocumentBuilder builder, int line, int column, String val) {
+      CSVParser parser = new CSVParser(new StringReader(val), strategy);
+      try {
+        String[] vals = parser.getLine();
+        if (vals!=null) {
+          for (String v: vals) base.add(builder,line,column,v);
+        } else {
+          base.add(builder,line,column,val);
+        }
+      } catch (IOException e) {
+        throw new SolrException(400,"");
+      }
+    }
+  }
+
+
+  String errHeader="CSVLoader:";
+
+  CSVLoader(SolrQueryRequest req) {
+    this.params = req.getParams();
+    handler = req.getCore().getUpdateHandler();
+    schema = req.getSchema();
+
+    templateAdd = new AddUpdateCommand();
+    templateAdd.allowDups=false;
+    templateAdd.overwriteCommitted=true;
+    templateAdd.overwritePending=true;
+
+    if (params.getBool(OVERWRITE,true)) {
+      templateAdd.allowDups=false;
+      templateAdd.overwriteCommitted=true;
+      templateAdd.overwritePending=true;
+    } else {
+      templateAdd.allowDups=true;
+      templateAdd.overwriteCommitted=false;
+      templateAdd.overwritePending=false;
+    }
+
+    strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, true,  false, true);
+    String sep = params.get(SEPARATOR);
+    if (sep!=null) {
+      if (sep.length()!=1) throw new SolrException(400,"Invalid separator:'"+sep+"'");
+      strategy.setDelimiter(sep.charAt(0));
+    }
+
+    String encapsulator = params.get(ENCAPSULATOR);
+    if (encapsulator!=null) {
+      if (encapsulator.length()!=1) throw new SolrException(400,"Invalid encapsulator:'"+sep+"'");
+      strategy.setEncapsulator(encapsulator.charAt(0));
+    }
+
+    String fn = params.get(FIELDNAMES);
+    fieldnames = fn != null ? commaSplit.split(fn,-1) : null;
+
+    Boolean hasHeader = params.getBool(HEADER);
+
+    if (fieldnames==null) {
+      if (null == hasHeader) {
+        // assume the file has the headers if they aren't supplied in the args
+        hasHeader=true;
+      } else if (hasHeader) {
+        throw new SolrException(400,"CSVLoader: must specify fieldnames=<fields>* or
header=true");
+      }
+    } else {
+      // if the fieldnames were supplied and the file has a header, we need to
+      // skip over that header.
+      if (hasHeader!=null && hasHeader) skipLines=1;
+
+      prepareFields();
+    }
+  }
+
+  /** create the FieldAdders that control how each field  is indexed */
+  void prepareFields() {
+    // Possible future optimization: for really rapid incremental indexing
+    // from a POST, one could cache all of this setup info based on the params.
+    // The link from FieldAdder to this would need to be severed for that to happen.
+
+    fields = new SchemaField[fieldnames.length];
+    adders = new CSVLoader.FieldAdder[fieldnames.length];
+    String skipStr = params.get(SKIP);
+    List<String> skipFields = skipStr==null ? null : StrUtils.splitSmart(skipStr,',');
+
+    CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
+    CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();
+
+    for (int i=0; i<fields.length; i++) {
+      String fname = fieldnames[i];
+      // to skip a field, leave the entries in fields and addrs null
+      if (fname.length()==0 || (skipFields!=null && skipFields.contains(fname)))
continue;
+
+      fields[i] = schema.getField(fname);
+      boolean keepEmpty = params.getFieldBool(fname,EMPTY,false);
+      adders[i] = keepEmpty ? adderKeepEmpty : adder;
+
+      // Order that operations are applied: split -> trim -> map -> add
+      // so create in reverse order.
+      // Creation of FieldAdders could be optimized and shared among fields
+
+      String[] fmap = params.getFieldParams(fname,MAP);
+      if (fmap!=null) {
+        for (String mapRule : fmap) {
+          String[] mapArgs = colonSplit.split(mapRule,-1);
+          if (mapArgs.length!=2)
+            throw new SolrException(400, "Map rules must be of the form 'from:to' ,got '"+mapRule+"'");
+          adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
+        }
+      }
+
+      if (params.getFieldBool(fname,TRIM,false)) {
+        adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
+      }
+
+      if (params.getFieldBool(fname,SPLIT,false)) {
+        String sepStr = params.getFieldParam(fname,SEPARATOR);
+        char fsep = sepStr==null || sepStr.length()==0 ? ',' : sepStr.charAt(0);
+        String encStr = params.getFieldParam(fname,ENCAPSULATOR);
+        char fenc = encStr==null || encStr.length()==0 ? '\'' : encStr.charAt(0);
+
+        CSVStrategy fstrat = new CSVStrategy(fsep,fenc,CSVStrategy.COMMENTS_DISABLED);
+        adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
+      }
+    }
+  }
+
+  private void input_err(String msg, String[] line, int lineno) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(errHeader+", line="+lineno + ","+msg+"\n\tvalues={");
+    for (String val: line) { sb.append("'"+val+"',"); }
+    sb.append('}');
+    throw new SolrException(400,sb.toString());
+  }
+
+  /** load the CSV input */
+  void load(Reader input) throws IOException {
+    Reader reader = input;
+    if (skipLines>0) {
+      if (!(reader instanceof BufferedReader)) {
+        reader = new BufferedReader(reader);
+      }
+      BufferedReader r = (BufferedReader)reader;
+      for (int i=0; i<skipLines; i++) {
+        r.readLine();
+      }
+    }
+
+    CSVParser parser = new CSVParser(reader, strategy);
+
+    // parse the fieldnames from the header of the file
+    if (fieldnames==null) {
+      fieldnames = parser.getLine();
+      if (fieldnames==null) {
+        throw new SolrException(400,"Expected fieldnames in CSV input");
+      }
+      prepareFields();
+    }
+
+    // read the rest of the CSV file
+    for(;;) {
+      int line = parser.getLineNumber();  // for error reporting in MT mode
+      String[] vals = parser.getLine();
+      if (vals==null) break;
+
+      if (vals.length != fields.length) {
+        input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
+      }
+
+      addDoc(line,vals);
+    }
+
+    if (params.getBool(COMMIT,true)) {
+      handler.commit(new CommitUpdateCommand(false));
+    }
+  }
+
+  /** called for each line of values (document) */
+  abstract void addDoc(int line, String[] vals) throws IOException;
+
+  /** this must be MT safe... may be called concurrently from multiple threads. */
+  void doAdd(int line, String[] vals, DocumentBuilder builder, AddUpdateCommand template)
throws IOException {
+    // the line number is passed simply for error reporting in MT mode.
+    // first, create the lucene document
+    builder.startDoc();
+    for (int i=0; i<vals.length; i++) {
+      if (fields[i]==null) continue;  // ignore this field
+      String val = vals[i];
+      adders[i].add(builder, line, i, val);
+    }
+    builder.endDoc();
+
+    template.doc = builder.getDoc();
+    handler.addDoc(template);
+  }
+
+}
+
+
+class SingleThreadedCSVLoader extends CSVLoader {
+  protected DocumentBuilder builder;
+
+  SingleThreadedCSVLoader(SolrQueryRequest req) {
+    super(req);
+    builder = new DocumentBuilder(schema);
+  }
+
+  void addDoc(int line, String[] vals) throws IOException {
+    templateAdd.indexedId = null;
+    doAdd(line, vals, builder, templateAdd);
+  }
+}
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/CSVRequestHandler.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/SolrParams.java Fri Mar 30 09:59:58
2007
@@ -165,6 +165,14 @@
     return val!=null ? val : get(param);
   }
 
+  /** returns the String values of the field parameter, "f.field.param", or
+   *  the values for "param" if that is not set.
+   */
+  public String[] getFieldParams(String field, String param) {
+    String[] val = getParams(fpname(field,param));
+    return val!=null ? val : getParams(param);
+  }
+
   /** Returns the Boolean value of the param, or null if not set */
   public Boolean getBool(String param) {
     String val = get(param);

Modified: lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java Fri Mar 30 09:59:58 2007
@@ -197,19 +197,32 @@
    * @see LocalSolrQueryRequest
    */
   public String query(SolrQueryRequest req) throws IOException, Exception {
+    return query(req.getQueryType(), req);
+  }
 
+  /**
+   * Processes a "query" using a user constructed SolrQueryRequest
+   *
+   * @param handler the name of the request handler to process the request
+   * @param req the Query to process, will be closed.
+   * @return The XML response to the query
+   * @exception Exception any exception in the response.
+   * @exception IOException if there is a problem writing the XML
+   * @see LocalSolrQueryRequest
+   */
+  public String query(String handler, SolrQueryRequest req) throws IOException, Exception
{
     SolrQueryResponse rsp = new SolrQueryResponse();
-    core.execute(req,rsp);
+    core.execute(core.getRequestHandler(handler),req,rsp);
     if (rsp.getException() != null) {
       throw rsp.getException();
     }
-                
+
     StringWriter sw = new StringWriter(32000);
     QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
     responseWriter.write(sw,req,rsp);
 
     req.close();
-    
+
     return sw.toString();
   }
 

Added: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java?view=auto&rev=524175
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java Fri Mar 30 09:59:58
2007
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.ContentStream;
+import org.apache.solr.util.ContentStreamBase;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.core.SolrException;
+
+import java.io.*;
+import java.util.List;
+import java.util.ArrayList;
+
+public class TestCSVLoader extends AbstractSolrTestCase {
+
+  public String getSchemaFile() { return "schema.xml"; }
+  public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+  String filename = "solr_tmp.csv";
+  String def_charset = "UTF-8";
+  File file = new File(filename);
+
+  public void setUp() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.setUp();
+  }
+  public void tearDown() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.tearDown();
+    deleteFile();
+  }
+
+  void makeFile(String contents) {
+    makeFile(contents,def_charset);
+  }
+
+  void makeFile(String contents, String charset) {
+    try {
+      Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
+      out.write(contents);
+      out.close();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  void deleteFile() {
+    file.delete();
+  }
+
+  void cleanup() {
+    assertU(delQ("id:[100 TO 110]"));
+    assertU(commit());
+  }
+
+  void loadLocal(String... args) throws Exception {
+    LocalSolrQueryRequest req =  (LocalSolrQueryRequest)req(args);
+
+    // TODO: stop using locally defined streams once stream.file and
+    // stream.body work everywhere
+    List<ContentStream> cs = new ArrayList<ContentStream>();
+    cs.add(new ContentStreamBase.FileStream(new File(filename)));
+    req.setContentStreams(cs);
+    h.query("/update/csv",req);
+  }
+
+  public void testCSVLoad() throws Exception {
+    makeFile("id\n100\n101\n102");
+    loadLocal("stream.file",filename);
+    // csv loader currently defaults to committing
+    // assertU(commit());
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+  }
+
+  public void testCommitFalse() throws Exception {
+    makeFile("id\n100\n101\n102");
+    loadLocal("stream.file",filename,"commit","false");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='0']");
+    assertU(commit());
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+  }
+
+  public void testCommitTrue() throws Exception {
+    makeFile("id\n100\n101\n102");
+    loadLocal("stream.file",filename,"commit","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
+  }
+
+  public void testCSV() throws Exception {
+    lrf.args.put("version","2.0");
+    
+    makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
+    loadLocal("stream.file",filename,"commit","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+    // 102 is a quoted zero length field ,"", as opposed to ,,
+    // but we can't distinguish this case (and it's debateable
+    // if we should).  Does CSV have a way to specify missing
+    // from zero-length?
+    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+
+    // test overwrite by default
+    loadLocal("stream.file",filename, "commit","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+
+    // test no overwrites
+    loadLocal("stream.file",filename, "commit","true", "overwrite","false");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='8']");
+
+    // test overwrite
+    loadLocal("stream.file",filename, "commit","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+
+    // test global value mapping
+    loadLocal("stream.file",filename, "commit","true", "map","quoted:QUOTED");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
+    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+
+    // test value mapping to empty (remove)
+    loadLocal("stream.file",filename, "commit","true", "map","quoted:");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"count(//str[@name='str_s'])=0");
+
+    // test value mapping from empty
+    loadLocal("stream.file",filename, "commit","true", "map",":EMPTY");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+    assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
+    assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
+    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+    // test multiple map rules
+    loadLocal("stream.file",filename, "commit","true", "map",":EMPTY", "map","quoted:QUOTED");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
+    assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
+    assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
+    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+    // test indexing empty fields
+    loadLocal("stream.file",filename, "commit","true", "f.str_s.keepEmpty","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+    assertQ(req("id:101"),"//str[@name='str_s'][.='']");
+    assertQ(req("id:102"),"//str[@name='str_s'][.='']");
+    assertQ(req("id:103"),"//str[@name='str_s'][.='']");
+
+    // test overriding the name of fields
+    loadLocal("stream.file",filename, "commit","true",
+             "fieldnames","id,my_s", "header","true",
+             "f.my_s.map",":EMPTY");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
+    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
+    assertQ(req("id:101"),"//str[@name='my_s'][.='EMPTY']");
+    assertQ(req("id:102"),"//str[@name='my_s'][.='EMPTY']");
+    assertQ(req("id:103"),"//str[@name='my_s'][.='EMPTY']");
+
+    // test that header in file was skipped
+    assertQ(req("id:id"),"//*[@numFound='0']");
+
+    // test loading file as if it didn't have a header
+    loadLocal("stream.file",filename, "commit","true",
+             "fieldnames","id,my_s", "header","false");
+    assertQ(req("id:id"),"//*[@numFound='1']");
+    assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
+
+
+    // test multi-valued fields via field splitting w/ mapping of subvalues
+    makeFile("id,str_s\n"
+            +"100,\"quoted\"\n"
+            +"101,\"a,b,c\"\n"
+            +"102,\"a,,b\"\n"
+            +"103,\n");
+    loadLocal("stream.file",filename, "commit","true",
+              "f.str_s.map",":EMPTY",
+              "f.str_s.split","true");
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
+    assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
+    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+
+
+    // test alternate values for delimiters
+    makeFile("id|str_s\n"
+            +"100|^quoted^\n"
+            +"101|a;'b';c\n"
+            +"102|a;;b\n"
+            +"103|\n");
+
+    loadLocal("stream.file",filename, "commit","true",
+              "separator","|",
+              "encapsulator","^",
+              "f.str_s.map",":EMPTY",
+              "f.str_s.split","true",
+              "f.str_s.separator",";",
+              "f.str_s.encapsulator","'"
+    );
+    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
+    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
+    assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
+    assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
+    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
+  }
+
+  
+
+}

Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/TestCSVLoader.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?view=diff&rev=524175&r1=524174&r2=524175
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Fri Mar 30 09:59:58 2007
@@ -263,6 +263,9 @@
     </lst>
   </requestHandler>
 
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
+  </requestHandler>
+
   <!-- enable streaming for testing... -->
   <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
 
@@ -274,6 +277,5 @@
   <!-- test getting system property -->
   <propTest attr1="${solr.test.sys.prop1}-$${literal}"
             attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
-
 
 </config>



Mime
View raw message