lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Solr Wiki] Update of "DataImportHandler" by MarkoBonaci
Date Fri, 10 Sep 2010 10:30:52 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.

The "DataImportHandler" page has been changed by MarkoBonaci.
http://wiki.apache.org/solr/DataImportHandler?action=diff&rev1=250&rev2=251

--------------------------------------------------

  == A screenshot ==
  {{attachment:interactive-dev-dataimporthandler.PNG}}
  
+ 
+ 
+ 
+ <<Anchor(scheduling)>>
+ = Scheduling DIH =
+  * Author Marko Bonaci, Croatia
+  * Version 1.0
+  * Proofs the concept, needs almost complete overhaul
+  * Tested on Apache Tomcat v6
+ 
+ Global /!\ :TODO: /!\
+  * add comments to methods
+  * try to use Solr's internal classes wherever possible
+  
+ 
+ == SolrDataImportProperties ==
+  * the class uses resource loading to get DIHScheduler settings
+  * requires dataimport.properties file in folder solr.home/conf/
+  * see constants for param names which should be added to dataimport.properties
+ /!\ :TODO: /!\
+  * uses system property for locating solr.home (use Solr's resource loader instead)
+ 
+ {{{
+ package hr.infodom.solr.dataimport;
+ 
+ import java.io.FileInputStream;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+ import java.util.Properties;
+ 
+ 
+ public class SolrDataImportProperties {
+ 	private Properties properties;
+ 	public static final String SYNC_CORES	= "syncCores";
+ 	public static final String SERVER 	= "server";
+ 	public static final String PORT 	= "port";
+ 	public static final String WEBAPP 	= "webapp";
+ 	public static final String PARAMS 	= "params";
+ 	
+ 	
+ 	public SolrDataImportProperties(){
+ 		loadProperties(true);
+ 	}
+ 	
+ 	public void loadProperties(boolean force){
+ 		try{
+ 			if(force || properties == null){
+ 				properties = new Properties();
+ 				String dataImportPropertiesPath = System.getProperty("solr.solr.home") + "\\conf\\dataimport.properties";
+ 	
+ 				FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
+ 				properties.load(fis);	
+ 			}
+ 		}catch(FileNotFoundException e){
+ 			e.printStackTrace();
+ 		}catch(IOException ioe){
+ 			ioe.printStackTrace();
+ 		}		
+ 	}
+ 	
+ 	public String getProperty(String key){
+ 		return properties.getProperty(key);
+ 	}
+ 	
+ }
+ }}}
+ 
+ 
+ == ApplicationListener ==
+  * the class implements [[http://download.oracle.com/javaee/6/api/javax/servlet/ServletContextListener.html|javax.servlet.ServletContextListener]]
(listens to web app Initialize and Destroy events)
+  * uses HTTPPostScheduler, Timer and context attribute map to schedule periodic event
+ 
+ {{{
+ package hr.infodom.solr.scheduler;
+ 
+ import java.util.Calendar;
+ import java.util.Date;
+ import java.util.Timer;
+ 
+ import javax.servlet.ServletContext;
+ import javax.servlet.ServletContextEvent;
+ import javax.servlet.ServletContextListener;
+ 
+ public class ApplicationListener implements ServletContextListener {
+ 
+ 	@Override
+ 	public void contextDestroyed(ServletContextEvent servletContextEvent) {
+ 		ServletContext servletContext = servletContextEvent.getServletContext();
+ 
+ 		// get our timer from the app attribute map
+ 		Timer timer = (Timer)servletContext.getAttribute("timer");
+ 
+ 		// cancel all pending tasks in the timers queue
+ 		if (timer != null)
+ 			timer.cancel();
+ 
+ 		// remove the timer from the servlet context
+ 		servletContext.removeAttribute("timer");
+ 
+ 	}
+ 
+ 	@Override
+ 	public void contextInitialized(ServletContextEvent servletContextEvent) {
+ 		ServletContext servletContext = servletContextEvent.getServletContext();
+ 		try{
+ 			// create the timer and timer task objects
+ 			Timer timer = new Timer();
+ 			HTTPPostScheduler task = new HTTPPostScheduler(servletContext.getServletContextName());
+ 
+ 			// get a calendar to initialize the start time
+ 			Calendar calendar = Calendar.getInstance();
+ 			Date startTime = calendar.getTime();
+ 
+ 			// schedule the task to run hourly
+ 			timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * 10);
+ 
+ 			// save our timer for later use
+ 			servletContext.setAttribute("timer", timer);
+ 			
+ 		} catch (Exception e) {
+ 			e.printStackTrace();
+ 			servletContext.log("Problem initializing the task: " + e.getMessage ());
+ 		}
+ 
+ 	}
+ 
+ }
+ }}}
+ 
+ 
+ == HTTPPostScheduler ==
+  * the class extends [[http://download.oracle.com/javase/1.4.2/docs/api/java/util/TimerTask.html|java.util.TimerTask]],
which implements [[http://download.oracle.com/javase/1.4.2/docs/api/java/lang/Runnable.html|java.lang.Runnable]]
+  * gets DIH params from ''SolrDataImportProperties'' and sets default values if empty
+ 
+ /!\ :TODO: /!\
+  * currently logs events to server console -> use logger
+  * make it core-aware (to work with and without cores)
+  * explode ''params'' to specific parameters
+ 
+ 
+ {{{
+ package hr.infodom.solr.scheduler;
+ 
+ import hr.infodom.solr.dataimport.SolrDataImportProperties;
+ 
+ import java.io.IOException;
+ import java.net.HttpURLConnection;
+ import java.net.MalformedURLException;
+ import java.net.URL;
+ import java.text.DateFormat;
+ import java.text.SimpleDateFormat;
+ import java.util.Date;
+ import java.util.Locale;
+ import java.util.TimerTask;
+ 
+ 
+ public class HTTPPostScheduler extends TimerTask {
+ 	private String[] syncCores;
+ 	private String server;
+ 	private String port;
+ 	private String webapp;
+ 	private String params;
+ 	private SolrDataImportProperties p;
+ 	
+ 	
+ 	public HTTPPostScheduler(String webAppName){
+ 		//load properties from global dataimport.properties
+ 		p = new SolrDataImportProperties();
+ 		String cores = p.getProperty(SolrDataImportProperties.SYNC_CORES);
+ 		syncCores	= cores.split(",");
+ 		server 		= p.getProperty(SolrDataImportProperties.SERVER);
+ 		port 		= p.getProperty(SolrDataImportProperties.PORT);
+ 		webapp 		= p.getProperty(SolrDataImportProperties.WEBAPP);
+ 		params 		= p.getProperty(SolrDataImportProperties.PARAMS);
+ 		
+ 		fixParams(webAppName);
+ 	}
+ 	
+ 	private void fixParams(String webAppName){
+ 		if(server.isEmpty()) server = "localhost";
+ 		if(port.isEmpty()) port = "80";
+ 		if(webapp.isEmpty()) webapp = webAppName;
+ 	}
+ 	
+ 	public void run() {
+ 		try{
+ 			if(syncCores.length < 1 || (syncCores.length == 1 && syncCores[0].isEmpty())){
+ 				System.out.println("INFO: <index update process> No cores scheduled for data import");
+ 				System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ 				p.loadProperties(true);	//listen for change in properties file
+ 				
+ 			}else if(server.isEmpty() || webapp.isEmpty() || params.isEmpty()){
+ 				System.out.println("INFO: <index update process> Insuficient info provided for
data import");
+ 				System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ 				p.loadProperties(true);	//listen for change in properties file
+ 				
+ 			}else{
+ 				for(String core : syncCores){
+ 					sendHttpPost(core);
+ 				}
+ 			}
+ 		}catch(Exception e){
+ 			e.printStackTrace();
+ 		}
+ 	}
+ 	
+ 	private void sendHttpPost(String coreName){
+ 		String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/" + coreName + params;
+ 		sendHttpPost(coreUrl, coreName);	
+ 	}
+ 	
+ 	private void sendHttpPost(String completeUrl, String core){
+ 		DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS", new Locale("hr", "HR"));
+ 		Date startTime = new Date();
+ 		
+ 		System.out.println("INFO: [" + core + "] <index update process> Process started
at .............. " + df.format(startTime));
+ 		
+ 		try{
+ 			// Send HTTP POST
+ 			URL url = new URL(completeUrl);
+ 			HttpURLConnection conn = (HttpURLConnection)url.openConnection();	
+ 		    
+ 			conn.setRequestMethod("POST");
+ 			conn.setRequestProperty("type", "submit");
+ 			conn.setDoOutput(true);
+ 		    
+ 			conn.connect();
+ 			System.out.println("INFO: [" + core + "] <index update process> Request method\t\t\t"
+ conn.getRequestMethod());
+ 			System.out.println("INFO: [" + core + "] <index update process> Succesfully connected
to server\t" + server);		    
+ 			System.out.println("INFO: [" + core + "] <index update process> Using port\t\t\t"
+ port);
+ 			System.out.println("INFO: [" + core + "] <index update process> Application name\t\t\t"
+ webapp);
+ 			System.out.println("INFO: [" + core + "] <index update process> URL params\t\t\t"
+ params);
+ 			System.out.println("INFO: [" + core + "] <index update process> Full URL\t\t\t\t"
+ conn.getURL());
+ 			System.out.println("INFO: [" + core + "] <index update process> Response message\t\t\t"
+ conn.getResponseMessage());
+ 			System.out.println("INFO: [" + core + "] <index update process> Response code\t\t\t"
+ conn.getResponseCode());
+ 		    
+ 			conn.disconnect();
+ 			System.out.println("INFO: [" + core + "] <index update process> Disconnected from
server\t\t" + server);
+ 			Date endTime = new Date();
+ 			System.out.println("INFO: [" + core + "] <index update process> Process ended at
................ " + df.format(endTime));
+ 		}catch(MalformedURLException mue){
+ 			mue.printStackTrace();
+ 		}catch(IOException ioe){
+ 			ioe.printStackTrace();
+ 		}catch(Exception e){
+ 			e.printStackTrace();
+ 		}
+ 	}
+ }
+ 
+ }}}
+ 
+ 
+ == dataimport.properties file example ==
+  * explains default values when param is omitted
+ 
+ {{{
+ #Tue Jul 20 15:12:52 CEST 2010
+ metadataObject.last_index_time=2010-07-20 15\:12\:47
+ last_index_time=2010-07-20 15\:12\:47
+ 
+ 
+ #################################################
+ #						#
+ #	dataimport scheduler properties		#
+ #						#
+ #################################################
+ 
+ #which cores you want to schedule [mandatory]
+ syncCores=coreHr,coreEn
+ 
+ #solr server name or IP address [defaults to localhost if empty]
+ server=
+ 
+ #solr server port [defaults to 80 if empty]
+ port=8080
+ 
+ #application name/context [defaults to ServletContextListener's context name (web app name)]
+ webapp=solrDIHSchedulerTest
+ 
+ #URL params [mandatory]
+ params=/select?clean=false&commit=true&command=delta-import&qt=/dataimport&handler=/dataimport
+ }}}
+ 
+ 
+ 
  = Where to find it? =
  DataImportHandler is a new addition to Solr. You can either:
   * Download a nightly build of Solr from [[http://lucene.apache.org/solr/|Solr website]],
or

Mime
View raw message