lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Bachan <balajichande...@gmail.com>
Subject How to index more than one entity in ApacheSolr
Date Thu, 05 Feb 2015 08:38:46 GMT
The mwikipage alone is indexed by solr, All other mapped entities are not
indexed / ignored by ApacheSolr while doing indexing,

*data-config.xml*

<dataConfig>
<dataSource batchSize="1" driver="com.mysql.jdbc.Driver" password="123"
type="JdbcDataSource" url="jdbc:mysql://localhost:3306/cwiki" user="root"/>
<document name="mediawiki-doc">
    
    <entity name="mwikipage" query="SELECT * FROM mwikipage">
        <field column="page_id" name="id"/>
        <field column="page_title" name="page_title"/>
        <field column="page_latest" name="page_latest"/>
        <field column="page_counter" name="page_counter"/>
        
        <entity name="mwikipagelinks"
            deltaQuery="SELECT pl_from FROM mwikipagelinks WHERE
last_modified &gt;'${dataimporter.last_index_time}'"
            parentDeltaQuery="SELECT page_id FROM mwikipage WHERE page_id =
'${mwikipagelinks.pl_from}'"
            query="SELECT * FROM mwikipagelinks WHERE pl_from = '${
mwikipage.page_id }'">
            <field column="pl_namespace" name="pl_namespace"/>
            <field column="pl_title" name="pl_title"/>
        </entity>
        
        <entity name="mwikiredirect" query="SELECT * FROM mwikiredirect
WHERE rd_from = '${ mwikipage.page_id }'">
            <field column="rd_namespace" name="rd_namespace"/>
            <field column="rd_title" name="rd_title"/>
            <field column="rd_title" name="rd_title"/>
        </entity>
        
        <entity name="mwikirevision" query="SELECT * FROM mwikirevision
WHERE rev_id = '${ mwikipage.page_latest }'">
            <field column="rev_id" name="rev_id"/>
            <field column="rev_page" name="rev_page"/>
            <field column="rev_text_id" name="rev_text_id"/>
            
            <entity name="mwikitext" query="select * from mwikitext WHERE
old_id = '${ mwikirevision.rev_text_id }'">
                <field column="old_id" name="old_id"/>
                <field column="old_text" name="old_text"/>
                <field column="old_flags" name="old_flags"/>
            </entity>
        </entity>
        
        <entity name="mwikicategorylinks" query="SELECT * FROM
mwikicategorylinks WHERE cl_from = '${ mwikipage.page_id }'">
            <field column="cl_from" name="cl_from"/>
            <field column="cl_to" name="cl_to"/>
        </entity>
        
        <entity name="mwikiexternallinks" query="SELECT * FROM
mwikiexternallinks where el_from = '${  mwikipage.page_id }'">
            <field column="el_from" name="el_from"/>
            <field column="el_to" name="el_to"/>
            <field column="el_index" name="el_index"/>
        </entity>
    </entity>
</document>

*shcema.xml*



<field name="page_title" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="page_latest" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="page_counter" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="pl_namespace" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="pl_title" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="rd_namespace" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="rd_title" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="rev_id" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="rev_page" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="rev_text_id" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="old_id" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="old_text" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="old_flags" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="cl_from" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="cl_to" type="text_general" indexed="true" stored="true"
multiValued="true"/>

<field name="el_from" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="el_to" type="text_general" indexed="true" stored="true"
multiValued="true"/>
<field name="el_index" type="text_general" indexed="true" stored="true"
multiValued="true"/>


*document uniqueness enforcer*

 <uniqueKey>id</uniqueKey> 

*Result*

  Last Update: 13:24:39 Indexing completed. 
 Added/Updated: 530 documents. Deleted 0 documents. (Duration: 1s)
 Requests: 2,651 (2,651/s), Fetched: 530 (530/s), Skipped: 0, Processed: 530
(530/s) 

And when queried only *mwikipage * entities are indexed, And the query
result is as follows,


  "docs": [
  {
    "id": "1",
    "page_title": [
      "Main_Page"
    ],
    "page_latest": [
      "1247"
    ],
    "page_counter": [
      "5223"
    ],
    "_version_": 1492251847862255600
  }
 

Other entities are ignored. How to index / document outline ?

Thanks and Regards,
Bachan




--
View this message in context: http://lucene.472066.n3.nabble.com/How-to-index-more-than-one-entity-in-ApacheSolr-tp4184104.html
Sent from the Solr - User mailing list archive at Nabble.com.

Mime
View raw message