Dear Wiki user,
You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.
The "DataImportHandler" page has been changed by MarkoBonaci.
http://wiki.apache.org/solr/DataImportHandler?action=diff&rev1=250&rev2=251
--------------------------------------------------
== A screenshot ==
{{attachment:interactive-dev-dataimporthandler.PNG}}
+
+
+
+ <<Anchor(scheduling)>>
+ = Scheduling DIH =
+ * Author Marko Bonaci, Croatia
+ * Version 1.0
+ * Proofs the concept, needs almost complete overhaul
+ * Tested on Apache Tomcat v6
+
+ Global /!\ :TODO: /!\
+ * add comments to methods
+ * try to use Solr's internal classes wherever possible
+
+
+ == SolrDataImportProperties ==
+ * the class uses resource loading to get DIHScheduler settings
+ * requires dataimport.properties file in folder solr.home/conf/
+ * see constants for param names which should be added to dataimport.properties
+ /!\ :TODO: /!\
+ * uses system property for locating solr.home (use Solr's resource loader instead)
+
+ {{{
+ package hr.infodom.solr.dataimport;
+
+ import java.io.FileInputStream;
+ import java.io.FileNotFoundException;
+ import java.io.IOException;
+ import java.util.Properties;
+
+
+ public class SolrDataImportProperties {
+ private Properties properties;
+ public static final String SYNC_CORES = "syncCores";
+ public static final String SERVER = "server";
+ public static final String PORT = "port";
+ public static final String WEBAPP = "webapp";
+ public static final String PARAMS = "params";
+
+
+ public SolrDataImportProperties(){
+ loadProperties(true);
+ }
+
+ public void loadProperties(boolean force){
+ try{
+ if(force || properties == null){
+ properties = new Properties();
+ String dataImportPropertiesPath = System.getProperty("solr.solr.home") + "\\conf\\dataimport.properties";
+
+ FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
+ properties.load(fis);
+ }
+ }catch(FileNotFoundException e){
+ e.printStackTrace();
+ }catch(IOException ioe){
+ ioe.printStackTrace();
+ }
+ }
+
+ public String getProperty(String key){
+ return properties.getProperty(key);
+ }
+
+ }
+ }}}
+
+
+ == ApplicationListener ==
+ * the class implements [[http://download.oracle.com/javaee/6/api/javax/servlet/ServletContextListener.html|javax.servlet.ServletContextListener]]
(listens to web app Initialize and Destroy events)
+ * uses HTTPPostScheduler, Timer and context attribute map to schedule periodic event
+
+ {{{
+ package hr.infodom.solr.scheduler;
+
+ import java.util.Calendar;
+ import java.util.Date;
+ import java.util.Timer;
+
+ import javax.servlet.ServletContext;
+ import javax.servlet.ServletContextEvent;
+ import javax.servlet.ServletContextListener;
+
+ public class ApplicationListener implements ServletContextListener {
+
+ @Override
+ public void contextDestroyed(ServletContextEvent servletContextEvent) {
+ ServletContext servletContext = servletContextEvent.getServletContext();
+
+ // get our timer from the app attribute map
+ Timer timer = (Timer)servletContext.getAttribute("timer");
+
+ // cancel all pending tasks in the timers queue
+ if (timer != null)
+ timer.cancel();
+
+ // remove the timer from the servlet context
+ servletContext.removeAttribute("timer");
+
+ }
+
+ @Override
+ public void contextInitialized(ServletContextEvent servletContextEvent) {
+ ServletContext servletContext = servletContextEvent.getServletContext();
+ try{
+ // create the timer and timer task objects
+ Timer timer = new Timer();
+ HTTPPostScheduler task = new HTTPPostScheduler(servletContext.getServletContextName());
+
+ // get a calendar to initialize the start time
+ Calendar calendar = Calendar.getInstance();
+ Date startTime = calendar.getTime();
+
+ // schedule the task to run hourly
+ timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * 10);
+
+ // save our timer for later use
+ servletContext.setAttribute("timer", timer);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ servletContext.log("Problem initializing the task: " + e.getMessage ());
+ }
+
+ }
+
+ }
+ }}}
+
+
+ == HTTPPostScheduler ==
+ * the class extends [[http://download.oracle.com/javase/1.4.2/docs/api/java/util/TimerTask.html|java.util.TimerTask]],
which implements [[http://download.oracle.com/javase/1.4.2/docs/api/java/lang/Runnable.html|java.lang.Runnable]]
+ * gets DIH params from ''SolrDataImportProperties'' and sets default values if empty
+
+ /!\ :TODO: /!\
+ * currently logs events to server console -> use logger
+ * make it core-aware (to work with and without cores)
+ * explode ''params'' to specific parameters
+
+
+ {{{
+ package hr.infodom.solr.scheduler;
+
+ import hr.infodom.solr.dataimport.SolrDataImportProperties;
+
+ import java.io.IOException;
+ import java.net.HttpURLConnection;
+ import java.net.MalformedURLException;
+ import java.net.URL;
+ import java.text.DateFormat;
+ import java.text.SimpleDateFormat;
+ import java.util.Date;
+ import java.util.Locale;
+ import java.util.TimerTask;
+
+
+ public class HTTPPostScheduler extends TimerTask {
+ private String[] syncCores;
+ private String server;
+ private String port;
+ private String webapp;
+ private String params;
+ private SolrDataImportProperties p;
+
+
+ public HTTPPostScheduler(String webAppName){
+ //load properties from global dataimport.properties
+ p = new SolrDataImportProperties();
+ String cores = p.getProperty(SolrDataImportProperties.SYNC_CORES);
+ syncCores = cores.split(",");
+ server = p.getProperty(SolrDataImportProperties.SERVER);
+ port = p.getProperty(SolrDataImportProperties.PORT);
+ webapp = p.getProperty(SolrDataImportProperties.WEBAPP);
+ params = p.getProperty(SolrDataImportProperties.PARAMS);
+
+ fixParams(webAppName);
+ }
+
+ private void fixParams(String webAppName){
+ if(server.isEmpty()) server = "localhost";
+ if(port.isEmpty()) port = "80";
+ if(webapp.isEmpty()) webapp = webAppName;
+ }
+
+ public void run() {
+ try{
+ if(syncCores.length < 1 || (syncCores.length == 1 && syncCores[0].isEmpty())){
+ System.out.println("INFO: <index update process> No cores scheduled for data import");
+ System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ p.loadProperties(true); //listen for change in properties file
+
+ }else if(server.isEmpty() || webapp.isEmpty() || params.isEmpty()){
+ System.out.println("INFO: <index update process> Insuficient info provided for
data import");
+ System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ p.loadProperties(true); //listen for change in properties file
+
+ }else{
+ for(String core : syncCores){
+ sendHttpPost(core);
+ }
+ }
+ }catch(Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ private void sendHttpPost(String coreName){
+ String coreUrl = "http://" + server + ":" + port + "/" + webapp + "/" + coreName + params;
+ sendHttpPost(coreUrl, coreName);
+ }
+
+ private void sendHttpPost(String completeUrl, String core){
+ DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS", new Locale("hr", "HR"));
+ Date startTime = new Date();
+
+ System.out.println("INFO: [" + core + "] <index update process> Process started
at .............. " + df.format(startTime));
+
+ try{
+ // Send HTTP POST
+ URL url = new URL(completeUrl);
+ HttpURLConnection conn = (HttpURLConnection)url.openConnection();
+
+ conn.setRequestMethod("POST");
+ conn.setRequestProperty("type", "submit");
+ conn.setDoOutput(true);
+
+ conn.connect();
+ System.out.println("INFO: [" + core + "] <index update process> Request method\t\t\t"
+ conn.getRequestMethod());
+ System.out.println("INFO: [" + core + "] <index update process> Succesfully connected
to server\t" + server);
+ System.out.println("INFO: [" + core + "] <index update process> Using port\t\t\t"
+ port);
+ System.out.println("INFO: [" + core + "] <index update process> Application name\t\t\t"
+ webapp);
+ System.out.println("INFO: [" + core + "] <index update process> URL params\t\t\t"
+ params);
+ System.out.println("INFO: [" + core + "] <index update process> Full URL\t\t\t\t"
+ conn.getURL());
+ System.out.println("INFO: [" + core + "] <index update process> Response message\t\t\t"
+ conn.getResponseMessage());
+ System.out.println("INFO: [" + core + "] <index update process> Response code\t\t\t"
+ conn.getResponseCode());
+
+ conn.disconnect();
+ System.out.println("INFO: [" + core + "] <index update process> Disconnected from
server\t\t" + server);
+ Date endTime = new Date();
+ System.out.println("INFO: [" + core + "] <index update process> Process ended at
................ " + df.format(endTime));
+ }catch(MalformedURLException mue){
+ mue.printStackTrace();
+ }catch(IOException ioe){
+ ioe.printStackTrace();
+ }catch(Exception e){
+ e.printStackTrace();
+ }
+ }
+ }
+
+ }}}
+
+
+ == dataimport.properties file example ==
+ * explains default values when param is omitted
+
+ {{{
+ #Tue Jul 20 15:12:52 CEST 2010
+ metadataObject.last_index_time=2010-07-20 15\:12\:47
+ last_index_time=2010-07-20 15\:12\:47
+
+
+ #################################################
+ # #
+ # dataimport scheduler properties #
+ # #
+ #################################################
+
+ #which cores you want to schedule [mandatory]
+ syncCores=coreHr,coreEn
+
+ #solr server name or IP address [defaults to localhost if empty]
+ server=
+
+ #solr server port [defaults to 80 if empty]
+ port=8080
+
+ #application name/context [defaults to ServletContextListener's context name (web app name)]
+ webapp=solrDIHSchedulerTest
+
+ #URL params [mandatory]
+ params=/select?clean=false&commit=true&command=delta-import&qt=/dataimport&handler=/dataimport
+ }}}
+
+
+
= Where to find it? =
DataImportHandler is a new addition to Solr. You can either:
* Download a nightly build of Solr from [[http://lucene.apache.org/solr/|Solr website]],
or
|