Mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Solr Wiki] Update of "SolrUpdate" by BrianLucas
Date Wed, 28 Jun 2006 21:15:54 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.

The following page has been changed by BrianLucas:
http://wiki.apache.org/solr/SolrUpdate

The comment on the change is:
first attempt at solrUpdate

New page:
<?
	
	// VERSION SolrUpdate 0.100
	// Written by Brian Lucas, use any which way you see fit.
	
	// DESCRIPTION
	// 
	//   This class performs an update on an existing Solr repository
	// It uses the handy ADODB database library, but you can substitute your own if you prefer.
	// ADODB makes dealing with SQL much nicer than the php mysql_ calls and handy if you switch
databases.
	// http://adodb.sourceforge.net/

	
	// SQL TABLE Notice:
	
	// Inside your table that stores the primary key for your data or each record, create a tinyint(1)
(or enum) flag 
	// called "index_flag".  Ex:
	// ALTER TABLE `stories` ADD `index_flag` TINYINT( 1 ) NOT NULL ;
	// This will allow the update process to continue where it left off in case there is an error.
	
	
	// Example usages:
	
	// From the command line
	/* 
	#!/usr/local/bin/php
	<?php
	
	require_once("config.php"); // config file
	require_once(PHYSICALDIR."/ops/lib/common.php");
	$DB=createADODB();
	$solr = new SolrUpdate;
	$solr->updateIndex();
	? >
	*/
	
	// From within a PHP function

	/*
	$solr = new SolrUpdate();
	
	$array = array();
	$array['story_id'] = $story_id;
	$array['group_id'] = $group_id;
	$array['lucene_date'] = $lucene_date;
	$array['title'] = $title;
	$solr->addIndex(array($array));
	*/



// Change this to your server
define('SOLR_META_UPDATE', '127.0.0.1:8080');


class SolrUpdate {
	
	// This function will update your solr index with information stored in the database.  
	// Use addIndex to add data from a function call.
	
	function updateIndex () {
		global $DB;

		list($usec, $sec) = explode(" ", microtime());
   		$start = ((float)$usec + (float)$sec);
		
		$current_count = 0;
		$batch_interval = 100;
		$limit = 5000;
		
		$loop_control=0;
		$continue_index = true;
 		echo "Preparing to build index...";

		while ($continue_index) {

			if ($loop_control++>50) break; //loop infinity control, change if this aborts for you

			$failed = false;
			$continue_index = false;

			// this is the query we are going to use to populate our index
			$dropSql = "drop table if exists temp_story_keywords";
			$ok = $DB->Execute($dropSql);
			if (!($ok)) echo "ERROR".$DB->ErrorMsg()." sql:".$dropSql;
			
			// this is the query we are going to use to populate our index
			$createSql = "create table temp_story_keywords engine=memory select story_keywords.story_id
from story_keywords where story_keywords.index_flag=0 limit $limit";
			$ok = $DB->Execute($createSql);
			if (!($ok)) echo "ERROR".$DB->ErrorMsg()." sql:".$createSql;
			
			// this is the query we are going to use to populate our index
			$selectSql = "select keywords story from (temp_keywords, keywords) where keywords.story_id
= temp_keywords.story_id";

			$rs = $DB->Execute($selectSql);
			if (!($rs)) echo "ERROR".$DB->ErrorMsg()." sql:".$selectSql;

			echo "Building index...";
		
			while ($array = $rs->FetchRow()) {
				$continue_index=true;
				if(!$failed) {
	
					$result_array[] = $array;
					
					$current_count++;
					$modcount = $current_count % $batch_interval;
					
					if ($modcount == 0) {
						echo ".";
						
						$ok = $this->addIndex($result_array);
						
						if (!$ok) {
							$failed = true;
							echo "Error, restarting...";
							sleep(1);
							break;
						}
						$result_array = array();
					} // end if
				}
			} // end while
			
			if (!$failed) {
				// this is the query that updates the tables
				echo "Updating and committing ".$limit." records...";
				$updateSql = "update story_keywords, temp_story_keywords set story_keywords.index_flag=1
where story_keywords.story_id = temp_story_keywords.story_id";
				
				$ok = $DB->Execute($updateSql);
				if (!($ok)) echo $DB->ErrorMsg()." sql:".$updateSql;
				
				// commit the data
				if ($ok) {
					$this->sendCommit();
					echo "Success.";
				} else {
					echo "Failed.";
				}
			}
		} // end while
		
		// this is the query we are going to use to populate our index
		$finalDropSql = "drop table if exists temp_story_keywords";
		$ok = $DB->Execute($finalDropSql);
		if (!($ok)) echo $DB->ErrorMsg()." sql:".$finalDropSql;
		
		list($usec, $sec) = explode(" ", microtime());
   		$end = ((float)$usec + (float)$sec);
		$retElapsed = ($end -  $start)*1000;
		
		$docsec = $current_count/(($retElapsed)/1000);

		echo "Done indexing. Took ".($retElapsed)." ms to index $current_count documents (".$docsec."
docs/sec)";
		echo "Optimizing index...";
		$this->sendUpdate('<optimize>');		
		
	} // end function
	
	// Use this function to add data from a php call. 

	// Example usage:
	/*
	$solr = new SolrUpdate();
	
	$array = array();
	$array['story_id'] = $story_id;
	$array['group_id'] = $group_id;
	$array['lucene_date'] = $lucene_date;
	$array['title'] = $title;
	$solr->addIndex(array($array));
	*/
	
	function addIndex($resultarray) {
	
		$dom = new DomDocument();
		$root_element = $dom->createElement('add');
		$root = $dom->appendChild($root_element);
		if($resultarray) foreach($resultarray as $num=>$array) {

			$doc_element = $dom->createElement('doc');
			$doc = $root->appendChild($doc_element);

			// add node for each row
			$story_id_element = $dom->createElement('field');
			$story_id_element->setAttribute('name', 'story_id');
			$story_id_text = $dom->createTextNode($array['story_id']);
			$story_id_element->appendChild($story_id_text);
			$story_id = $doc->appendChild($story_id_element);
	
			// add node for each row
			$group_id_element = $dom->createElement('field');
			$group_id_element->setAttribute('name', 'group_id');
			$group_id_text = $dom->createTextNode($array['group_id']);
			$group_id_element->appendChild($group_id_text);
			
			$group_id = $doc->appendChild($group_id_element);
		
			// Basic error checking
			if($array["lucene_date"] != null) {
				$date_element = $dom->createElement('field');
				$date_element->setAttribute('name', 'lucene_date');
				$date_text = $dom->createTextNode($array['lucene_date']);
				$date_element->appendChild($date_text);
				
				$date = $doc->appendChild($date_element);
			}
			 
			
			
			$title_element = $dom->createElement('field');
			$title_element->setAttribute('name', 'title');
	 		$title_text = $dom->createTextNode($array['title']);
			$title = $doc->appendChild($title_element-);
			
		} // end if /foreach
		
		$dom_string = $dom->saveXML();
		
		if ($dom_string) {
			$ok = $this->sendUpdate($dom_string);
		} else {
			echo "Error with xml document";
			print_r($resultarray);
		}
		return $ok;
	}
	
	function sendCommit() {

		// Choose what type of commit you want,
		// which is non-blocking or blocking and refreshing(flush) or not

		// non-blocking, non-refreshing index
		//		$this->sendUpdate('<commit waitFlush="false" waitSearcher="false"/>');
		
		// blocking
		$this->sendUpdate('<commit/>');		
	}

	
	function escapeChars($string) {
        $string = str_replace("&", "&amp;", $string);
        $string = str_replace("<", "&lt;", $string);
        $string = str_replace(">", "&gt;", $string);
        $string = str_replace("'", "&apos;", $string);
        $string = str_replace('"', "&quot;", $string);		
		return $string;
	}
	
	function sendUpdate($post_string) {
		
		$url = "http://".SOLR_META_UPDATE;
		$page = "/solr/update";
	
		$header  = "POST ".$page." HTTP/1.0 \r\n";
		$header .= "MIME-Version: 1.0 \r\n";
		$header .= "Content-type: application/PTI26 \r\n";
		$header .= "Content-length: ".strlen($post_string)." \r\n";
		$header .= "Content-transfer-encoding: text \r\n";
		$header .= "Request-number: 1 \r\n";
		$header .= "Document-type: Request \r\n";
		$header .= "Interface-Version: Test 1.4 \r\n";
		$header .= "Connection: close \r\n\r\n";
		$header .= $post_string;

		$ch = curl_init();
		curl_setopt($ch, CURLOPT_URL,$url);
		curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
		curl_setopt($ch, CURLOPT_TIMEOUT, 15);
		curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $header);
		
		$data = curl_exec($ch);

		if (curl_errno($ch)) {
			print curl_error($ch);
			return false;

		} else {
			curl_close($ch);
			if ( strstr ( $data, '<result status="0"></result>')) {
				return true;
			} else {
				return false;
			}
		}
	} // end function send update
} // end class


?>

Mime
View raw message