lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sar...@apache.org
Subject svn commit: r1586120 - in /lucene/dev/trunk/solr: ./ example/example-DIH/solr/db/conf/ example/example-DIH/solr/db/conf/clustering/ example/example-DIH/solr/db/conf/lang/ example/example-DIH/solr/db/conf/velocity/ example/example-DIH/solr/db/conf/xslt/...
Date Wed, 09 Apr 2014 20:05:27 GMT
Author: sarowe
Date: Wed Apr  9 20:05:25 2014
New Revision: 1586120

URL: http://svn.apache.org/r1586120
Log:
SOLR-5937: Modernize the DIH example config sets

Added:
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.menu-bottom.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.menu-top.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/clustering/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/currency.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/elevate.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/lang/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/mapping-FoldToASCII.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/mapping-ISOLatin1Accent.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/spellings.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/stopwords.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/synonyms.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/update-script.js
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/velocity/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
    lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/xslt/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.menu-bottom.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.menu-top.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/clustering/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/currency.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/elevate.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/lang/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/scripts.conf
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/scripts.conf
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/spellings.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/stopwords.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/synonyms.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/update-script.js
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/velocity/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/xslt/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/clustering/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/currency.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/elevate.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/lang/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/mapping-FoldToASCII.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/mapping-ISOLatin1Accent.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/spellings.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/stopwords.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/synonyms.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/update-script.js
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/velocity/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
    lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/xslt/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.menu-bottom.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.menu-top.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/clustering/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/currency.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/elevate.xml
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/lang/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/mapping-FoldToASCII.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/mapping-ISOLatin1Accent.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/spellings.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/stopwords.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/synonyms.txt
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/update-script.js
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/velocity/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
    lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/xslt/
      - copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
    lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
    lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.menu-bottom.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
    lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.menu-top.html
      - copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
    lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml
      - copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
Removed:
    lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/data-config.xml
Modified:
    lucene/dev/trunk/solr/CHANGES.txt

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1586120&r1=1586119&r2=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Apr  9 20:05:25 2014
@@ -295,6 +295,8 @@ Other Changes
 * SOLR-5773: CollapsingQParserPlugin should make elevated documents the 
   group head. (David Boychuck, Joel Bernstein)
 
+* SOLR-5937: Modernize the DIH example config sets. (Steve Rowe)
+
 ==================  4.7.1  ==================
 
 Versions of Major Components

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml Wed Apr  9 20:05:25 2014
@@ -45,7 +45,7 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.5">
+<schema name="example-DIH-db" version="1.5">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        version="x.y" is Solr's version number for the schema syntax and 
        semantics.  It should not normally be changed by applications.
@@ -290,17 +290,17 @@
 
    <!-- Create a string version of author for faceting -->
    <copyField source="author" dest="author_s"/>
-	
-   <!-- Above, multiple source fields are copied to the [text] field. 
-	  Another way to map multiple source fields to the same 
-	  destination field is to use the dynamic field syntax. 
+
+   <!-- Above, multiple source fields are copied to the [text] field.
+	  Another way to map multiple source fields to the same
+	  destination field is to use the dynamic field syntax.
 	  copyField also supports a maxChars to copy setting.  -->
-	   
+
    <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
 
    <!-- copy name to alphaNameSort, a field designed for sorting by name -->
    <!-- <copyField source="name" dest="alphaNameSort"/> -->
- 
+
   
     <!-- field type definitions. The "name" attribute is
        just a label to be used by field definitions.  The "class"

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml Wed Apr  9 20:05:25 2014
@@ -72,17 +72,19 @@
        The examples below can be used to load some solr-contribs along 
        with their external dependencies.
     -->
-  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
 
-  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+  <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
 
-  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+  <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
 
-  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+  <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
 
   <!-- an exact 'path' can be used instead of a 'dir' to specify a 
        specific jar file.  This will cause a serious error to be logged 
@@ -803,6 +805,13 @@
        not be initialized until the first request that uses it.
 
     -->
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">db-data-config.xml</str>
+    </lst>
+  </requestHandler>
+
   <!-- SearchHandler
 
        http://wiki.apache.org/solr/SearchHandler

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml Wed Apr  9 20:05:25 2014
@@ -45,7 +45,7 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.5">
+<schema name="example-DIH-mail" version="1.5">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        version="x.y" is Solr's version number for the schema syntax and 
        semantics.  It should not normally be changed by applications.
@@ -111,89 +111,25 @@
    --> 
    <field name="_version_" type="long" indexed="true" stored="true"/>
    
-   <!-- points to the root document of a block of nested documents. Required for nested
-      document support, may be removed otherwise
-   -->
-   <field name="_root_" type="string" indexed="true" stored="false"/>
-
-   <!-- Only remove the "id" field if you have a very good reason to. While not strictly
-     required, it is highly recommended. A <uniqueKey> is present in almost all Solr 
-     installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
-   -->   
-   <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> 
-        
-   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
-   <field name="name" type="text_general" indexed="true" stored="true"/>
-   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
-   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
-   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
-   <field name="weight" type="float" indexed="true" stored="true"/>
-   <field name="price"  type="float" indexed="true" stored="true"/>
-   <field name="popularity" type="int" indexed="true" stored="true" />
-   <field name="inStock" type="boolean" indexed="true" stored="true" />
-
-   <field name="store" type="location" indexed="true" stored="true"/>
-
-   <!-- Common metadata fields, named specifically to match up with
-     SolrCell metadata when parsing rich documents such as Word, PDF.
-     Some fields are multiValued only because Tika currently may return
-     multiple values for them. Some metadata is parsed from the documents,
-     but there are some which come from the client context:
-       "content_type": From the HTTP headers of incoming stream
-       "resourcename": From SolrCell request param resource.name
-   -->
-   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
-   <field name="subject" type="text_general" indexed="true" stored="true"/>
-   <field name="description" type="text_general" indexed="true" stored="true"/>
-   <field name="comments" type="text_general" indexed="true" stored="true"/>
-   <field name="author" type="text_general" indexed="true" stored="true"/>
-   <field name="keywords" type="text_general" indexed="true" stored="true"/>
-   <field name="category" type="text_general" indexed="true" stored="true"/>
-   <field name="resourcename" type="text_general" indexed="true" stored="true"/>
-   <field name="url" type="text_general" indexed="true" stored="true"/>
-   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="last_modified" type="date" indexed="true" stored="true"/>
-   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
-   <!-- Main body of document extracted by SolrCell.
-        NOTE: This field is not indexed by default, since it is also copied to "text"
-        using copyField below. This is to save space. Use this field for returning and
-        highlighting document content. Use the "text" field to search the content. -->
-   <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-   
+   <field name="content" type="text_general" indexed="true" stored="true" multiValued="true"/>
 
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
 
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
-   <!-- non-tokenized version of manufacturer to make it easier to sort or group
-        results by manufacturer.  copied from "manu" via copyField -->
-   <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
-   <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
-   <!--
-     Some fields such as popularity and manu_exact could be modified to
-     leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
-     <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
-
-
-     Although it would make indexing slightly slower and the index bigger, it
-     would also make the index faster to load, more memory-efficient and more
-     NRT-friendly.
-     -->
+   <field name="messageId" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
+   <field name="subject" type="text_general" indexed="true" stored="true"/>
+   <field name="from" type="string" indexed="true" stored="true" omitNorms="true"/>
+   <field name="sentDate" type="date" indexed="true" stored="true"/>
+   <field name="xMailer" type="string" indexed="true" stored="true" omitNorms="true"/>
+
+   <field name="allTo" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
+   <field name="flags" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
+   <field name="attachment" type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <field name="attachmentNames" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
 
    <!-- Dynamic field definitions allow using convention over configuration
-       for fields via the specification of patterns to match field names. 
+       for fields via the specification of patterns to match field names.
        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
        RESTRICTION: the glob-like pattern in the name attribute must have
        a "*" only at the start or the end.  -->
@@ -247,7 +183,7 @@
  <!-- Field to use to determine and enforce document uniqueness. 
       Unless this field is marked with required="false", it will be a required field
    -->
- <uniqueKey>id</uniqueKey>
+ <uniqueKey>messageId</uniqueKey>
 
  <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
   parsing a query string that isn't explicit about the field.  Machine (non-user)
@@ -268,29 +204,12 @@
         is added to the index.  It's used either to index the same field differently,
         or to add multiple fields to the same field for easier/faster searching.  -->
 
-   <copyField source="cat" dest="text"/>
-   <copyField source="name" dest="text"/>
-   <copyField source="manu" dest="text"/>
-   <copyField source="features" dest="text"/>
-   <copyField source="includes" dest="text"/>
-   <copyField source="manu" dest="manu_exact"/>
-
-   <!-- Copy the price into a currency enabled field (default USD) -->
-   <copyField source="price" dest="price_c"/>
-
-   <!-- Text fields from SolrCell to search by default in our catch-all field -->
-   <copyField source="title" dest="text"/>
-   <copyField source="author" dest="text"/>
-   <copyField source="description" dest="text"/>
-   <copyField source="keywords" dest="text"/>
-   <copyField source="content" dest="text"/>
-   <copyField source="content_type" dest="text"/>
-   <copyField source="resourcename" dest="text"/>
-   <copyField source="url" dest="text"/>
-
-   <!-- Create a string version of author for faceting -->
-   <copyField source="author" dest="author_s"/>
-	
+    <copyField source="content" dest="text"/>
+    <copyField source="attachmentNames" dest="text"/>
+    <copyField source="attachment" dest="text"/>
+    <copyField source="subject" dest="text"/>
+    <copyField source="allTo" dest="text"/>
+
    <!-- Above, multiple source fields are copied to the [text] field. 
 	  Another way to map multiple source fields to the same 
 	  destination field is to use the dynamic field syntax. 

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml Wed Apr  9 20:05:25 2014
@@ -72,17 +72,20 @@
        The examples below can be used to load some solr-contribs along 
        with their external dependencies.
     -->
-  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
 
-  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+  <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
 
-  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+  <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
 
-  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+  <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
 
   <!-- an exact 'path' can be used instead of a 'dir' to specify a 
        specific jar file.  This will cause a serious error to be logged 
@@ -803,6 +806,13 @@
        not be initialized until the first request that uses it.
 
     -->
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">mail-data-config.xml</str>
+    </lst>
+  </requestHandler>
+
   <!-- SearchHandler
 
        http://wiki.apache.org/solr/SearchHandler

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml Wed Apr  9 20:05:25 2014
@@ -45,7 +45,7 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.5">
+<schema name="example-DIH-rss" version="1.5">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        version="x.y" is Solr's version number for the schema syntax and 
        semantics.  It should not normally be changed by applications.
@@ -111,86 +111,25 @@
    --> 
    <field name="_version_" type="long" indexed="true" stored="true"/>
    
-   <!-- points to the root document of a block of nested documents. Required for nested
-      document support, may be removed otherwise
-   -->
-   <field name="_root_" type="string" indexed="true" stored="false"/>
-
-   <!-- Only remove the "id" field if you have a very good reason to. While not strictly
-     required, it is highly recommended. A <uniqueKey> is present in almost all Solr 
-     installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
-   -->   
-   <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> 
-        
-   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
-   <field name="name" type="text_general" indexed="true" stored="true"/>
-   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
-   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
-   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
-   <field name="weight" type="float" indexed="true" stored="true"/>
-   <field name="price"  type="float" indexed="true" stored="true"/>
-   <field name="popularity" type="int" indexed="true" stored="true" />
-   <field name="inStock" type="boolean" indexed="true" stored="true" />
-
-   <field name="store" type="location" indexed="true" stored="true"/>
-
-   <!-- Common metadata fields, named specifically to match up with
-     SolrCell metadata when parsing rich documents such as Word, PDF.
-     Some fields are multiValued only because Tika currently may return
-     multiple values for them. Some metadata is parsed from the documents,
-     but there are some which come from the client context:
-       "content_type": From the HTTP headers of incoming stream
-       "resourcename": From SolrCell request param resource.name
-   -->
-   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
    <field name="subject" type="text_general" indexed="true" stored="true"/>
-   <field name="description" type="text_general" indexed="true" stored="true"/>
-   <field name="comments" type="text_general" indexed="true" stored="true"/>
-   <field name="author" type="text_general" indexed="true" stored="true"/>
-   <field name="keywords" type="text_general" indexed="true" stored="true"/>
-   <field name="category" type="text_general" indexed="true" stored="true"/>
-   <field name="resourcename" type="text_general" indexed="true" stored="true"/>
-   <field name="url" type="text_general" indexed="true" stored="true"/>
-   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="last_modified" type="date" indexed="true" stored="true"/>
-   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
-   <!-- Main body of document extracted by SolrCell.
-        NOTE: This field is not indexed by default, since it is also copied to "text"
-        using copyField below. This is to save space. Use this field for returning and
-        highlighting document content. Use the "text" field to search the content. -->
-   <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-   
 
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
 
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
-   <!-- non-tokenized version of manufacturer to make it easier to sort or group
-        results by manufacturer.  copied from "manu" via copyField -->
-   <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
-   <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
-   <!--
-     Some fields such as popularity and manu_exact could be modified to
-     leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
-     <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
+   <field name="source" type="text_general" indexed="true" stored="true" />
+   <field name="source-link" type="string" indexed="false" stored="true" />
 
+   <field name="title" type="text_general" indexed="true" stored="true" />
+   <field name="description" type="html" indexed="true" stored="true" />
 
-     Although it would make indexing slightly slower and the index bigger, it
-     would also make the index faster to load, more memory-efficient and more
-     NRT-friendly.
-     -->
+   <field name="link" type="string" indexed="true" stored="true" required="true" multiValued="false" />
+   <field name="creator" type="string" indexed="false" stored="true" />
+   <field name="item-subject" type="string" indexed="true" stored="false" />
+   <field name="date" type="date" indexed="true" stored="false" />
+   <field name="slash-department" type="string" indexed="true" stored="false" />
+   <field name="slash-section" type="string" indexed="true" stored="false" />
+   <field name="slash-comments" type="int" indexed="true" stored="true" />
 
    <!-- Dynamic field definitions allow using convention over configuration
        for fields via the specification of patterns to match field names. 
@@ -247,7 +186,7 @@
  <!-- Field to use to determine and enforce document uniqueness. 
       Unless this field is marked with required="false", it will be a required field
    -->
- <uniqueKey>id</uniqueKey>
+ <uniqueKey>link</uniqueKey>
 
  <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
   parsing a query string that isn't explicit about the field.  Machine (non-user)
@@ -268,29 +207,13 @@
         is added to the index.  It's used either to index the same field differently,
         or to add multiple fields to the same field for easier/faster searching.  -->
 
-   <copyField source="cat" dest="text"/>
-   <copyField source="name" dest="text"/>
-   <copyField source="manu" dest="text"/>
-   <copyField source="features" dest="text"/>
-   <copyField source="includes" dest="text"/>
-   <copyField source="manu" dest="manu_exact"/>
-
-   <!-- Copy the price into a currency enabled field (default USD) -->
-   <copyField source="price" dest="price_c"/>
-
-   <!-- Text fields from SolrCell to search by default in our catch-all field -->
-   <copyField source="title" dest="text"/>
-   <copyField source="author" dest="text"/>
-   <copyField source="description" dest="text"/>
-   <copyField source="keywords" dest="text"/>
-   <copyField source="content" dest="text"/>
-   <copyField source="content_type" dest="text"/>
-   <copyField source="resourcename" dest="text"/>
-   <copyField source="url" dest="text"/>
-
-   <!-- Create a string version of author for faceting -->
-   <copyField source="author" dest="author_s"/>
-	
+    <copyField source="source" dest="text"/>
+    <copyField source="subject" dest="text"/>
+    <copyField source="title" dest="text"/>
+    <copyField source="description" dest="text"/>
+    <copyField source="creator" dest="text"/>
+    <copyField source="item-subject" dest="text"/>
+
    <!-- Above, multiple source fields are copied to the [text] field. 
 	  Another way to map multiple source fields to the same 
 	  destination field is to use the dynamic field syntax. 
@@ -310,6 +233,33 @@
        standard package such as org.apache.solr.analysis
     -->
 
+    <fieldtype name="html" stored="true" indexed="true" class="solr.TextField">
+      <analyzer type="index">
+        <charFilter class="solr.HTMLStripCharFilterFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <charFilter class="solr.HTMLStripCharFilterFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
     <!-- The StrField type is not analyzed, but indexed/stored verbatim.
        It supports doc values but in that case the field needs to be
        single-valued and either required or have a default value.

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml Wed Apr  9 20:05:25 2014
@@ -72,17 +72,19 @@
        The examples below can be used to load some solr-contribs along 
        with their external dependencies.
     -->
-  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
 
-  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+  <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
 
-  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+  <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
 
-  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+  <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
 
   <!-- an exact 'path' can be used instead of a 'dir' to specify a 
        specific jar file.  This will cause a serious error to be logged 
@@ -803,6 +805,13 @@
        not be initialized until the first request that uses it.
 
     -->
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">rss-data-config.xml</str>
+    </lst>
+  </requestHandler>
+
   <!-- SearchHandler
 
        http://wiki.apache.org/solr/SearchHandler

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml Wed Apr  9 20:05:25 2014
@@ -45,7 +45,7 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.5">
+<schema name="example-DIH-solr" version="1.5">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        version="x.y" is Solr's version number for the schema syntax and 
        semantics.  It should not normally be changed by applications.

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml Wed Apr  9 20:05:25 2014
@@ -72,17 +72,19 @@
        The examples below can be used to load some solr-contribs along 
        with their external dependencies.
     -->
-  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
 
-  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+  <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
 
-  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+  <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
 
-  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+  <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
 
   <!-- an exact 'path' can be used instead of a 'dir' to specify a 
        specific jar file.  This will cause a serious error to be logged 
@@ -803,6 +805,13 @@
        not be initialized until the first request that uses it.
 
     -->
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">solr-data-config.xml</str>
+    </lst>
+  </requestHandler>
+
   <!-- SearchHandler
 
        http://wiki.apache.org/solr/SearchHandler

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml Wed Apr  9 20:05:25 2014
@@ -45,7 +45,7 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.5">
+<schema name="example-DIH-tika" version="1.5">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        version="x.y" is Solr's version number for the schema syntax and 
        semantics.  It should not normally be changed by applications.
@@ -106,91 +106,13 @@
       trailing underscores (e.g. _version_) are reserved.
    -->
 
-   <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
-      or Solr won't start. _version_ and update log are required for SolrCloud
-   --> 
-   <field name="_version_" type="long" indexed="true" stored="true"/>
-   
-   <!-- points to the root document of a block of nested documents. Required for nested
-      document support, may be removed otherwise
-   -->
-   <field name="_root_" type="string" indexed="true" stored="false"/>
-
-   <!-- Only remove the "id" field if you have a very good reason to. While not strictly
-     required, it is highly recommended. A <uniqueKey> is present in almost all Solr 
-     installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
-   -->   
-   <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> 
-        
-   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
-   <field name="name" type="text_general" indexed="true" stored="true"/>
-   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
-   <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
-   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
-   <field name="weight" type="float" indexed="true" stored="true"/>
-   <field name="price"  type="float" indexed="true" stored="true"/>
-   <field name="popularity" type="int" indexed="true" stored="true" />
-   <field name="inStock" type="boolean" indexed="true" stored="true" />
-
-   <field name="store" type="location" indexed="true" stored="true"/>
-
-   <!-- Common metadata fields, named specifically to match up with
-     SolrCell metadata when parsing rich documents such as Word, PDF.
-     Some fields are multiValued only because Tika currently may return
-     multiple values for them. Some metadata is parsed from the documents,
-     but there are some which come from the client context:
-       "content_type": From the HTTP headers of incoming stream
-       "resourcename": From SolrCell request param resource.name
-   -->
    <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
-   <field name="subject" type="text_general" indexed="true" stored="true"/>
-   <field name="description" type="text_general" indexed="true" stored="true"/>
-   <field name="comments" type="text_general" indexed="true" stored="true"/>
    <field name="author" type="text_general" indexed="true" stored="true"/>
-   <field name="keywords" type="text_general" indexed="true" stored="true"/>
-   <field name="category" type="text_general" indexed="true" stored="true"/>
-   <field name="resourcename" type="text_general" indexed="true" stored="true"/>
-   <field name="url" type="text_general" indexed="true" stored="true"/>
-   <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="last_modified" type="date" indexed="true" stored="true"/>
-   <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
-   <!-- Main body of document extracted by SolrCell.
-        NOTE: This field is not indexed by default, since it is also copied to "text"
-        using copyField below. This is to save space. Use this field for returning and
-        highlighting document content. Use the "text" field to search the content. -->
-   <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-   
 
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
    <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
 
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
-   <!-- non-tokenized version of manufacturer to make it easier to sort or group
-        results by manufacturer.  copied from "manu" via copyField -->
-   <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
-   <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
-   <!--
-     Some fields such as popularity and manu_exact could be modified to
-     leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
-     <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
-
-
-     Although it would make indexing slightly slower and the index bigger, it
-     would also make the index faster to load, more memory-efficient and more
-     NRT-friendly.
-     -->
 
    <!-- Dynamic field definitions allow using convention over configuration
        for fields via the specification of patterns to match field names. 
@@ -228,8 +150,6 @@
    <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
    <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
 
-   <dynamicField name="*_c"   type="currency" indexed="true"  stored="true"/>
-
    <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
    <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
 
@@ -247,7 +167,7 @@
  <!-- Field to use to determine and enforce document uniqueness. 
       Unless this field is marked with required="false", it will be a required field
    -->
- <uniqueKey>id</uniqueKey>
+ <!-- <uniqueKey>id</uniqueKey> -->
 
  <!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
   parsing a query string that isn't explicit about the field.  Machine (non-user)
@@ -263,44 +183,6 @@
   The default is OR, which is generally assumed so it is not a good idea to change it
   globally here.  The "q.op" request parameter takes precedence over this.
  <solrQueryParser defaultOperator="OR"/> -->
-
-  <!-- copyField commands copy one field to another at the time a document
-        is added to the index.  It's used either to index the same field differently,
-        or to add multiple fields to the same field for easier/faster searching.  -->
-
-   <copyField source="cat" dest="text"/>
-   <copyField source="name" dest="text"/>
-   <copyField source="manu" dest="text"/>
-   <copyField source="features" dest="text"/>
-   <copyField source="includes" dest="text"/>
-   <copyField source="manu" dest="manu_exact"/>
-
-   <!-- Copy the price into a currency enabled field (default USD) -->
-   <copyField source="price" dest="price_c"/>
-
-   <!-- Text fields from SolrCell to search by default in our catch-all field -->
-   <copyField source="title" dest="text"/>
-   <copyField source="author" dest="text"/>
-   <copyField source="description" dest="text"/>
-   <copyField source="keywords" dest="text"/>
-   <copyField source="content" dest="text"/>
-   <copyField source="content_type" dest="text"/>
-   <copyField source="resourcename" dest="text"/>
-   <copyField source="url" dest="text"/>
-
-   <!-- Create a string version of author for faceting -->
-   <copyField source="author" dest="author_s"/>
-	
-   <!-- Above, multiple source fields are copied to the [text] field. 
-	  Another way to map multiple source fields to the same 
-	  destination field is to use the dynamic field syntax. 
-	  copyField also supports a maxChars to copy setting.  -->
-	   
-   <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
-
-   <!-- copy name to alphaNameSort, a field designed for sorting by name -->
-   <!-- <copyField source="name" dest="alphaNameSort"/> -->
- 
   
     <!-- field type definitions. The "name" attribute is
        just a label to be used by field definitions.  The "class"
@@ -443,46 +325,26 @@
 
     <!-- A general text field that has reasonable, generic
          cross-language defaults: it tokenizes with StandardTokenizer,
-	 removes stop words from case-insensitive "stopwords.txt"
-	 (empty by default), and down cases.  At query time only, it
-	 also applies synonyms. -->
+         and down cases. -->
     <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
-        <!-- in this example, we will only use synonyms at query time
-        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-        -->
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
 
     <!-- A text field with defaults appropriate for English: it
-         tokenizes with StandardTokenizer, removes English stop words
-         (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
-         finally applies Porter's stemming.  The query time analyzer
-         also applies synonyms from synonyms.txt. -->
+         tokenizes with StandardTokenizer, down cases, and
+         finally applies Porter's stemming. -->
     <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <!-- in this example, we will only use synonyms at query time
-        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-        -->
-        <!-- Case insensitive stop word removal.
-        -->
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="lang/stopwords_en.txt"
-                />
         <filter class="solr.LowerCaseFilterFactory"/>
 	<filter class="solr.EnglishPossessiveFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
 	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
         <filter class="solr.EnglishMinimalStemFilterFactory"/>
 	-->
@@ -490,14 +352,8 @@
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="lang/stopwords_en.txt"
-                />
         <filter class="solr.LowerCaseFilterFactory"/>
 	<filter class="solr.EnglishPossessiveFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
 	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
         <filter class="solr.EnglishMinimalStemFilterFactory"/>
 	-->
@@ -517,30 +373,14 @@
     <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer type="index">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <!-- in this example, we will only use synonyms at query time
-        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-        -->
-        <!-- Case insensitive stop word removal.
-        -->
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="lang/stopwords_en.txt"
-                />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
         <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="lang/stopwords_en.txt"
-                />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
         <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -550,11 +390,8 @@
     <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
         <filter class="solr.EnglishMinimalStemFilterFactory"/>
         <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
              possible with WordDelimiterFilter in conjuncton with stemming. -->
@@ -567,15 +404,12 @@
     <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -708,19 +542,7 @@
     <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
         geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
 
-   <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
-        Parameters:
-          defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
-          precisionStep:   Specifies the precisionStep for the TrieLong field used for the amount
-          providerClass:   Lets you plug in other exchange provider backend:
-                           solr.FileExchangeRateProvider is the default and takes one parameter:
-                             currencyConfig: name of an xml file holding exchange rates
-                           solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
-                             ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
-                             refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
-   -->
-    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
-             
+
 
 
    <!-- some examples for different languages (generally ordered by ISO code) -->
@@ -731,7 +553,6 @@
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <!-- for any non-arabic -->
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
         <!-- normalizes ﻯ to ﻱ, etc -->
         <filter class="solr.ArabicNormalizationFilterFactory"/>
         <filter class="solr.ArabicStemFilterFactory"/>
@@ -743,8 +564,7 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/> 
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> 
-        <filter class="solr.BulgarianStemFilterFactory"/>       
+        <filter class="solr.BulgarianStemFilterFactory"/>
       </analyzer>
     </fieldType>
     
@@ -752,11 +572,8 @@
     <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <!-- removes l', etc -->
-        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
-        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
+        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
       </analyzer>
     </fieldType>
     
@@ -779,7 +596,6 @@
         <filter class="solr.SoraniNormalizationFilterFactory"/>
         <!-- for any latin text -->
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
         <filter class="solr.SoraniStemFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -789,8 +605,7 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
-        <filter class="solr.CzechStemFilterFactory"/>       
+        <filter class="solr.CzechStemFilterFactory"/>
       </analyzer>
     </fieldType>
     
@@ -799,8 +614,7 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
-        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
+        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
       </analyzer>
     </fieldType>
     
@@ -809,7 +623,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
         <filter class="solr.GermanNormalizationFilterFactory"/>
         <filter class="solr.GermanLightStemFilterFactory"/>
         <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
@@ -823,7 +636,6 @@
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <!-- greek specific lowercase for sigma -->
         <filter class="solr.GreekLowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
         <filter class="solr.GreekStemFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -833,7 +645,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
         <filter class="solr.SpanishLightStemFilterFactory"/>
         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
       </analyzer>
@@ -844,7 +655,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
         <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
       </analyzer>
     </fieldType>
@@ -858,7 +668,6 @@
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ArabicNormalizationFilterFactory"/>
         <filter class="solr.PersianNormalizationFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
       </analyzer>
     </fieldType>
     
@@ -867,7 +676,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
         <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
         <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
       </analyzer>
@@ -877,10 +685,7 @@
     <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <!-- removes l', etc -->
-        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
         <filter class="solr.FrenchLightStemFilterFactory"/>
         <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
@@ -891,12 +696,7 @@
     <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <!-- removes d', etc -->
-        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
-        <!-- removes n-, etc. position increments is intentionally false! -->
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
         <filter class="solr.IrishLowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
         <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
       </analyzer>
     </fieldType>
@@ -906,7 +706,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
         <filter class="solr.GalicianStemFilterFactory"/>
         <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
       </analyzer>
@@ -921,7 +720,6 @@
         <filter class="solr.IndicNormalizationFilterFactory"/>
         <!-- normalizes variation in spelling -->
         <filter class="solr.HindiNormalizationFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
         <filter class="solr.HindiStemFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -931,7 +729,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
         <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
         <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->   
       </analyzer>
@@ -942,7 +739,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
         <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
       </analyzer>
     </fieldType>
@@ -952,7 +748,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
         <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
         <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
       </analyzer>
@@ -962,10 +757,7 @@
     <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
-        <!-- removes l', etc -->
-        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
         <filter class="solr.ItalianLightStemFilterFactory"/>
         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
       </analyzer>
@@ -1001,22 +793,15 @@
                      userDictionary: user dictionary filename
              userDictionaryEncoding: user dictionary encoding (default is UTF-8)
 
-           See lang/userdict_ja.txt for a sample user dictionary file.
-
            Punctuation characters are discarded by default.  Use discardPunctuation="false" to keep them.
 
            See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
         -->
         <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
-        <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
         <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
         <filter class="solr.JapaneseBaseFormFilterFactory"/>
-        <!-- Removes tokens with certain part-of-speech tags -->
-        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
         <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
         <filter class="solr.CJKWidthFilterFactory"/>
-        <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
         <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
         <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
         <!-- Lower-cases romaji characters -->
@@ -1029,7 +814,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
         <filter class="solr.LatvianStemFilterFactory"/>
       </analyzer>
     </fieldType>
@@ -1039,8 +823,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
-        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
         <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
       </analyzer>
     </fieldType>
@@ -1050,7 +832,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
         <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
         <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
         <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
@@ -1063,7 +844,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
         <filter class="solr.PortugueseLightStemFilterFactory"/>
         <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
@@ -1076,7 +856,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
         <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
       </analyzer>
     </fieldType>
@@ -1086,7 +865,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
         <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
         <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
       </analyzer>
@@ -1097,7 +875,6 @@
       <analyzer> 
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
         <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
         <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
       </analyzer>
@@ -1109,7 +886,6 @@
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ThaiWordFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
       </analyzer>
     </fieldType>
     
@@ -1119,7 +895,6 @@
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.ApostropheFilterFactory"/>
         <filter class="solr.TurkishLowerCaseFilterFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
         <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
       </analyzer>
     </fieldType>

Copied: lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml Wed Apr  9 20:05:25 2014
@@ -72,17 +72,20 @@
        The examples below can be used to load some solr-contribs along 
        with their external dependencies.
     -->
-  <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
 
-  <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+  <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
 
-  <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+  <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
 
-  <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
-  <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+  <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+  <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
 
   <!-- an exact 'path' can be used instead of a 'dir' to specify a 
        specific jar file.  This will cause a serious error to be logged 
@@ -353,9 +356,11 @@
          is recommended (see below).
          "dir" - the target directory for transaction logs, defaults to the
                 solr data directory.  --> 
+    <!--
     <updateLog>
       <str name="dir">${solr.ulog.dir:}</str>
     </updateLog>
+    -->
  
     <!-- AutoCommit
 
@@ -803,6 +808,13 @@
        not be initialized until the first request that uses it.
 
     -->
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">tika-data-config.xml</str>
+    </lst>
+  </requestHandler>
+
   <!-- SearchHandler
 
        http://wiki.apache.org/solr/SearchHandler
@@ -1558,31 +1570,6 @@
   </requestHandler>
 
 
-  <!-- Query Elevation Component
-
-       http://wiki.apache.org/solr/QueryElevationComponent
-
-       a search component that enables you to configure the top
-       results for a given query regardless of the normal lucene
-       scoring.
-    -->
-  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
-    <!-- pick a fieldType to analyze queries -->
-    <str name="queryFieldType">string</str>
-    <str name="config-file">elevate.xml</str>
-  </searchComponent>
-
-  <!-- A request handler for demonstrating the elevator component -->
-  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
-    <lst name="defaults">
-      <str name="echoParams">explicit</str>
-      <str name="df">text</str>
-    </lst>
-    <arr name="last-components">
-      <str>elevator</str>
-    </arr>
-  </requestHandler>
-
   <!-- Highlighting Component
 
        http://wiki.apache.org/solr/HighlightingParameters



Mime
View raw message