lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Pierre Caserta <pierre.case...@gmail.com>
Subject DataImportHandler with a managed-schema only import id and version
Date Wed, 10 Aug 2016 07:12:34 GMT
Hi,
It seems that using the DataImportHandler with a XPathEntityProcessor config with a managed-schema
setup, only import the id and version field.

data-config.xml

<dataConfig>
    <dataSource type="FileDataSource" encoding="UTF-8" />
    <document>
        <entity name="post"
            processor="XPathEntityProcessor"
            stream="true"
            forEach="/posts/row/"
            url="${dataimporter.request.dataurl}"
            transformer="RegexTransformer,DateFormatTransformer,HTMLStripTransformer"
            >
            <field column="id"        xpath="/posts/row/@Id" />
            <field column="postTypeId"     xpath="/posts/row/@PostTypeId" />
            <field column="acceptedAnswerId"     xpath="/posts/row/@AcceptedAnswerId" />
            <field column="creationDate" xpath="/posts/row/@CreationDate" dateTimeFormat="yyyy-MM-dd'T'hh:mm:ss.SSS"
/>
            <field column="postScore"  xpath="/posts/row/@Score" />
            <field column="viewCount"  xpath="/posts/row/@ViewCount" />
            <field column="body"  xpath="/posts/row/@Body" stripHTML="true" />
            <field column="ownerUserId"  xpath="/posts/row/@OwnerUserId" />
            <field column="lastEditorUserId"  xpath="/posts/row/@LastEditorUserId" />
            <field column="lastEditorDisplayName"  xpath="/posts/row/@LastEditorDisplayName"
/>
            <field column="lastActivityDate"  xpath="/posts/row/@LastActivityDate" dateTimeFormat="yyyy-MM-dd'T'hh:mm:ss.SSS"
/>
            <field column="title"  xpath="/posts/row/@Title" />
            <field column="trimmedTags" xpath="/posts/row/@Tags" regex="&lt;(.*)&gt;"
/>
            <field column="tags" sourceColName="trimmedTags" splitBy="&gt;&lt;"
/>
            <field column="answerCount"  xpath="/posts/row/@AnswerCount" />
            <field column="commentCount"  xpath="/posts/row/@CommentCount" />
            <field column="favoriteCount"  xpath="/posts/row/@FavoriteCount" />
            <field column="communityOwnedDate"  xpath="/posts/row/@CommunityOwnedDate"
dateTimeFormat="yyyy-MM-dd'T'hh:mm:ss.SSS" />
        </entity>
    </document>
</dataConfig>


http://192.168.99.100:8999/solr/solrexchange/select?indent=on&q=*:*&wt=json
{
  "responseHeader":{
    "status":0,
    "QTime":0,
    "params":{
      "q":"*:*",
      "indent":"on",
      "wt":"json",
      "_":"1470811193595"}},
  "response":{"numFound":8,"start":0,"docs":[
      {
        "id":"38822",
        "_version_":1542258196375142400},
      {
        "id":"38836",
        "_version_":1542258196387725312},
      {
        "id":"63896",
        "_version_":1542258196388773888},
      {
        "id":"65406",
        "_version_":1542258196391919616},
      {
        "id":"1357173",
        "_version_":1542258196391919617},
      {
        "id":"5339763",
        "_version_":1542258196392968192},
      {
        "id":"9932722",
        "_version_":1542258196392968193},
      {
        "id":"9217299",
        "_version_":1542258196392968194}]
  }}
 
data_search.xml (8 rows)



the url I am hitting (with custom dataurl parameter)

curl 'http://192.168.99.100:8999/solr/solrexchange/dataimport?command=full-import&commit=true&dataurl=/code/solr/data/search/dih/data_search.xml'
<http://192.168.99.100:8999/solr/solrexchange/dataimport?command=full-import&commit=true&dataurl=/code/solr/data/search/dih/data_search.xml'>

I changed my data to use <add> <doc> <field> and use the bin/post tool and
this is working as expected.
Now I am interested to make it work with the DataImportHandler.
How can I use the DataImportHandler to import my document ? 

Thanks,
Pierre Caserta


Mime
View raw message