Dear Wiki user,
You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.
The following page has been changed by BrianLucas:
http://wiki.apache.org/solr/SolrUpdate
The comment on the change is:
first attempt at solrUpdate
New page:
<?
// VERSION SolrUpdate 0.100
// Written by Brian Lucas, use any which way you see fit.
// DESCRIPTION
//
// This class performs an update on an existing Solr repository
// It uses the handy ADODB database library, but you can substitute your own if you prefer.
// ADODB makes dealing with SQL much nicer than the php mysql_ calls and handy if you switch
databases.
// http://adodb.sourceforge.net/
// SQL TABLE Notice:
// Inside your table that stores the primary key for your data or each record, create a tinyint(1)
(or enum) flag
// called "index_flag". Ex:
// ALTER TABLE `stories` ADD `index_flag` TINYINT( 1 ) NOT NULL ;
// This will allow the update process to continue where it left off in case there is an error.
// Example usages:
// From the command line
/*
#!/usr/local/bin/php
<?php
require_once("config.php"); // config file
require_once(PHYSICALDIR."/ops/lib/common.php");
$DB=createADODB();
$solr = new SolrUpdate;
$solr->updateIndex();
? >
*/
// From within a PHP function
/*
$solr = new SolrUpdate();
$array = array();
$array['story_id'] = $story_id;
$array['group_id'] = $group_id;
$array['lucene_date'] = $lucene_date;
$array['title'] = $title;
$solr->addIndex(array($array));
*/
// Change this to your server
define('SOLR_META_UPDATE', '127.0.0.1:8080');
class SolrUpdate {
// This function will update your solr index with information stored in the database.
// Use addIndex to add data from a function call.
function updateIndex () {
global $DB;
list($usec, $sec) = explode(" ", microtime());
$start = ((float)$usec + (float)$sec);
$current_count = 0;
$batch_interval = 100;
$limit = 5000;
$loop_control=0;
$continue_index = true;
echo "Preparing to build index...";
while ($continue_index) {
if ($loop_control++>50) break; //loop infinity control, change if this aborts for you
$failed = false;
$continue_index = false;
// this is the query we are going to use to populate our index
$dropSql = "drop table if exists temp_story_keywords";
$ok = $DB->Execute($dropSql);
if (!($ok)) echo "ERROR".$DB->ErrorMsg()." sql:".$dropSql;
// this is the query we are going to use to populate our index
$createSql = "create table temp_story_keywords engine=memory select story_keywords.story_id
from story_keywords where story_keywords.index_flag=0 limit $limit";
$ok = $DB->Execute($createSql);
if (!($ok)) echo "ERROR".$DB->ErrorMsg()." sql:".$createSql;
// this is the query we are going to use to populate our index
$selectSql = "select keywords story from (temp_keywords, keywords) where keywords.story_id
= temp_keywords.story_id";
$rs = $DB->Execute($selectSql);
if (!($rs)) echo "ERROR".$DB->ErrorMsg()." sql:".$selectSql;
echo "Building index...";
while ($array = $rs->FetchRow()) {
$continue_index=true;
if(!$failed) {
$result_array[] = $array;
$current_count++;
$modcount = $current_count % $batch_interval;
if ($modcount == 0) {
echo ".";
$ok = $this->addIndex($result_array);
if (!$ok) {
$failed = true;
echo "Error, restarting...";
sleep(1);
break;
}
$result_array = array();
} // end if
}
} // end while
if (!$failed) {
// this is the query that updates the tables
echo "Updating and committing ".$limit." records...";
$updateSql = "update story_keywords, temp_story_keywords set story_keywords.index_flag=1
where story_keywords.story_id = temp_story_keywords.story_id";
$ok = $DB->Execute($updateSql);
if (!($ok)) echo $DB->ErrorMsg()." sql:".$updateSql;
// commit the data
if ($ok) {
$this->sendCommit();
echo "Success.";
} else {
echo "Failed.";
}
}
} // end while
// this is the query we are going to use to populate our index
$finalDropSql = "drop table if exists temp_story_keywords";
$ok = $DB->Execute($finalDropSql);
if (!($ok)) echo $DB->ErrorMsg()." sql:".$finalDropSql;
list($usec, $sec) = explode(" ", microtime());
$end = ((float)$usec + (float)$sec);
$retElapsed = ($end - $start)*1000;
$docsec = $current_count/(($retElapsed)/1000);
echo "Done indexing. Took ".($retElapsed)." ms to index $current_count documents (".$docsec."
docs/sec)";
echo "Optimizing index...";
$this->sendUpdate('<optimize>');
} // end function
// Use this function to add data from a php call.
// Example usage:
/*
$solr = new SolrUpdate();
$array = array();
$array['story_id'] = $story_id;
$array['group_id'] = $group_id;
$array['lucene_date'] = $lucene_date;
$array['title'] = $title;
$solr->addIndex(array($array));
*/
function addIndex($resultarray) {
$dom = new DomDocument();
$root_element = $dom->createElement('add');
$root = $dom->appendChild($root_element);
if($resultarray) foreach($resultarray as $num=>$array) {
$doc_element = $dom->createElement('doc');
$doc = $root->appendChild($doc_element);
// add node for each row
$story_id_element = $dom->createElement('field');
$story_id_element->setAttribute('name', 'story_id');
$story_id_text = $dom->createTextNode($array['story_id']);
$story_id_element->appendChild($story_id_text);
$story_id = $doc->appendChild($story_id_element);
// add node for each row
$group_id_element = $dom->createElement('field');
$group_id_element->setAttribute('name', 'group_id');
$group_id_text = $dom->createTextNode($array['group_id']);
$group_id_element->appendChild($group_id_text);
$group_id = $doc->appendChild($group_id_element);
// Basic error checking
if($array["lucene_date"] != null) {
$date_element = $dom->createElement('field');
$date_element->setAttribute('name', 'lucene_date');
$date_text = $dom->createTextNode($array['lucene_date']);
$date_element->appendChild($date_text);
$date = $doc->appendChild($date_element);
}
$title_element = $dom->createElement('field');
$title_element->setAttribute('name', 'title');
$title_text = $dom->createTextNode($array['title']);
$title = $doc->appendChild($title_element-);
} // end if /foreach
$dom_string = $dom->saveXML();
if ($dom_string) {
$ok = $this->sendUpdate($dom_string);
} else {
echo "Error with xml document";
print_r($resultarray);
}
return $ok;
}
function sendCommit() {
// Choose what type of commit you want,
// which is non-blocking or blocking and refreshing(flush) or not
// non-blocking, non-refreshing index
// $this->sendUpdate('<commit waitFlush="false" waitSearcher="false"/>');
// blocking
$this->sendUpdate('<commit/>');
}
function escapeChars($string) {
$string = str_replace("&", "&", $string);
$string = str_replace("<", "<", $string);
$string = str_replace(">", ">", $string);
$string = str_replace("'", "'", $string);
$string = str_replace('"', """, $string);
return $string;
}
function sendUpdate($post_string) {
$url = "http://".SOLR_META_UPDATE;
$page = "/solr/update";
$header = "POST ".$page." HTTP/1.0 \r\n";
$header .= "MIME-Version: 1.0 \r\n";
$header .= "Content-type: application/PTI26 \r\n";
$header .= "Content-length: ".strlen($post_string)." \r\n";
$header .= "Content-transfer-encoding: text \r\n";
$header .= "Request-number: 1 \r\n";
$header .= "Document-type: Request \r\n";
$header .= "Interface-Version: Test 1.4 \r\n";
$header .= "Connection: close \r\n\r\n";
$header .= $post_string;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $header);
$data = curl_exec($ch);
if (curl_errno($ch)) {
print curl_error($ch);
return false;
} else {
curl_close($ch);
if ( strstr ( $data, '<result status="0"></result>')) {
return true;
} else {
return false;
}
}
} // end function send update
} // end class
?>
|