lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Solr Wiki] Update of "DataImportHandler" by MarkoBonaci
Date Thu, 16 Sep 2010 13:50:13 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Solr Wiki" for change notification.

The "DataImportHandler" page has been changed by MarkoBonaci.
http://wiki.apache.org/solr/DataImportHandler?action=diff&rev1=257&rev2=258

--------------------------------------------------

  = Scheduling =
  {i}
   * Data``Import``Handler``Scheduler
-  * Version 1.0
+  * Version 1.1
-  * Last revision: 10.09.2010.
+  * Last revision: 16.09.2010.
   * Author: Marko Bonaci, Zagreb, Croatia
   * Enables scheduling DIH delta and/or full imports
   * Proofs the concept, needs almost complete overhaul
   * Successfully tested on ''Apache Tomcat v6'' (should work on any servlet container)
   * Hasn't been committed to SVN (published only here)
+ 
  <<BR>>
  <!> Global TODO:
-  * add comments to methods
+  * make it ''core-aware'' (to work with and without cores)
+  * add ''cancel'' functionality (to be able to completely disable DIHScheduler background
thread, without stopping the app/server). Currently sync can be disabled by emptying ''syncCores''
param in dataimport.properties file (but the background thread remains active and reloads
the properties file on every run)
+  * parametrize the schedule interval in minutes (currently set to 10 minutes in ''Application``Listener's
contextInitialized'' method)
   * try to use Solr's internal classes wherever possible
-  * parametrize the schedule interval in minutes (currently set to 10 minutes in ''Application``Listener's
contextInitialized'' method)
+ 
  <<BR>>
  == Prereqs ==
   {1} working DIH configuration in place <<BR>>
   {2} ''dataimport.properties'' file in folder ''solr.home/conf/'' with mandatory params
inside (see bellow for the example of ''dataimport.properties'') <<BR>>
   {3} ''solr.home'' full path defined in OS System properties (Win: My Computer > Properties
> Advanced > Environment > System variables > Add ''solr.solr.home'' with full
path to solr folder)
+ 
  <<BR>>
  == SolrDataImportProperties ==
   * uses [[http://download.oracle.com/javase/6/docs/api/java/util/Properties.html|java.util.Properties]]
to load settings from ''dataimport.properties''
  
+ {OK} Revisions:
+  * v1.1:
+  * now using Solr``Resource``Loader to get solr.home (as opposed to System properties in
v1.0)
+  * logging done using slf4j
+ 
  <!> TODO:
-  * currently uses system property for locating ''solr.home'' (use Solr's resource loader
instead)
+  (./) currently uses system property for locating ''solr.home'' (use Solr's resource loader
instead)
+  * add javadoc comments
  
  {{{
  package hr.mbo.solr.dataimport;
@@ -1071, +1081 @@

  import java.io.IOException;
  import java.util.Properties;
  
+ import org.apache.commons.io.FilenameUtils;
+ import org.apache.solr.core.SolrResourceLoader;
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
  
  public class SolrDataImportProperties {
  	private Properties properties;
@@ -1080, +1094 @@

  	public static final String WEBAPP 	= "webapp";
  	public static final String PARAMS 	= "params";
  	
+ 	private static final Logger logger = LoggerFactory.getLogger(SolrDataImportProperties.class);
  	
  	public SolrDataImportProperties(){
  		loadProperties(true);
@@ -1087, +1102 @@

  	
  	public void loadProperties(boolean force){
  		try{
+ 			SolrResourceLoader loader = new SolrResourceLoader(null);
+ 			logger.info("SolrResourceLoader instance dir: " + loader.getInstanceDir());
+ 			
+ 			String configDir = loader.getConfigDir();
+ 			configDir = FilenameUtils.normalizeNoEndSeparator(configDir);
+ 			
  			if(force || properties == null){
  				properties = new Properties();
- 				String dataImportPropertiesPath = System.getProperty("solr.solr.home") + "\\conf\\dataimport.properties";
- 	
+ 							
+ 				String dataImportPropertiesPath = configDir + "\\dataimport.properties";
+ 				
  				FileInputStream fis = new FileInputStream(dataImportPropertiesPath);
  				properties.load(fis);	
  			}
- 		}catch(FileNotFoundException e){
+ 		}catch(FileNotFoundException fnfe){
- 			e.printStackTrace();
+ 			logger.error("Error locating DataImportScheduler dataimport.properties file", fnfe);
  		}catch(IOException ioe){
- 			ioe.printStackTrace();
+ 			logger.error("Error reading DataImportScheduler dataimport.properties file", ioe);
+ 		}catch(Exception e){
+ 			logger.error("Error loading DataImportScheduler properties", e);
- 		}		
+ 		}
  	}
  	
  	public String getProperty(String key){
@@ -1114, +1138 @@

   * uses ''HTTPPostScheduler'', [[http://download.oracle.com/javase/6/docs/api/java/util/Timer.html|java.util.Timer]]
and context attribute map to facilitate periodic method invocation (scheduling)
   * Timer is essentially a facility for threads to schedule tasks for future execution in
a background thread.
  
+ {OK} Revisions:
+  * v1.1:
+  * logging done using slf4j
+ 
  {{{
  package hr.mbo.solr.scheduler;
  
@@ -1125, +1153 @@

  import javax.servlet.ServletContextEvent;
  import javax.servlet.ServletContextListener;
  
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ 
  public class ApplicationListener implements ServletContextListener {
  
+ 	private static final Logger logger = LoggerFactory.getLogger(ApplicationListener.class);
+ 	
  	@Override
  	public void contextDestroyed(ServletContextEvent servletContextEvent) {
  		ServletContext servletContext = servletContextEvent.getServletContext();
  
- 		// get our timer from the app attribute map
+ 		// get our timer from the Context
  		Timer timer = (Timer)servletContext.getAttribute("timer");
  
  		// cancel all pending tasks in the timers queue
@@ -1147, +1180 @@

  	public void contextInitialized(ServletContextEvent servletContextEvent) {
  		ServletContext servletContext = servletContextEvent.getServletContext();
  		try{
- 			// create the timer and HTTPPostScheduler (TimerTask) objects
+ 			// create the timer and timer task objects
  			Timer timer = new Timer();
  			HTTPPostScheduler task = new HTTPPostScheduler(servletContext.getServletContextName());
  
@@ -1155, +1188 @@

  			Calendar calendar = Calendar.getInstance();
  			Date startTime = calendar.getTime();
  
- 			// schedule the task to run every ten minutes
+ 			// schedule the task to run hourly
  			timer.scheduleAtFixedRate(task, startTime, 1000 * 60 * 10);
  
- 			// save our timer in context attribute map for later use
+ 			// save our timer for later use
  			servletContext.setAttribute("timer", timer);
  			
  		} catch (Exception e) {
+ 			logger.error("Problem initializing the scheduled task: ", e);
- 			e.printStackTrace();
- 			servletContext.log("Problem initializing the task: " + e.getMessage ());
  		}
  
  	}
@@ -1179, +1211 @@

   * uses those params to assemble complete URL
   * invokes URL using HTTP POST request
  
+ {OK} Revisions:
+  * v1.1:
+  * forces reloading of the properties file if the response code is not 200
+  * logging done using slf4j
+ 
  <!> TODO:
-  * make it ''core-aware'' (to work with and without cores)
-  * add ''cancel'' functionality (to be able to completely disable DIHScheduler background
thread, without stopping the app or server)
   * explode ''params'' to specific parameters
-  * currently logs events to server console -> use logger instead
+  (./) currently logs events to server console -> use logger instead
  
  {{{
  package hr.mbo.solr.scheduler;
@@ -1200, +1235 @@

  import java.util.Locale;
  import java.util.TimerTask;
  
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ 
  
  public class HTTPPostScheduler extends TimerTask {
  	private String[] syncCores;
@@ -1209, +1247 @@

  	private String params;
  	private SolrDataImportProperties p;
  	
+ 	private static final Logger logger = LoggerFactory.getLogger(HTTPPostScheduler.class);
  	
  	public HTTPPostScheduler(String webAppName){
  		//load properties from global dataimport.properties
@@ -1231, +1270 @@

  	
  	public void run() {
  		try{
+ 			
  			if(syncCores.length < 1 || (syncCores.length == 1 && syncCores[0].isEmpty())){
- 				System.out.println("INFO: <index update process> No cores scheduled for data import");
+ 				logger.warn("<index update process> No cores scheduled for data import");
- 				System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ 				logger.info("<index update process> Reloading global dataimport.properties");
  				p.loadProperties(true);	//listen for change in properties file
  				
  			}else if(server.isEmpty() || webapp.isEmpty() || params.isEmpty()){
- 				System.out.println("INFO: <index update process> Insuficient info provided for
data import");
+ 				logger.warn("<index update process> Insuficient info provided for data import");
- 				System.out.println("INFO: <index update process> Reloading global dataimport.properties");
+ 				logger.info("<index update process> Reloading global dataimport.properties");
  				p.loadProperties(true);	//listen for change in properties file
  				
  			}else{
@@ -1247, +1287 @@

  				}
  			}
  		}catch(Exception e){
- 			e.printStackTrace();
+ 			logger.error("Failed to prepare for sendHttpPost", e);
  		}
  	}
  	
@@ -1260, +1300 @@

  		DateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss SSS", new Locale("hr", "HR"));
  		Date startTime = new Date();
  		
- 		System.out.println("INFO: [" + core + "] <index update process> Process started
at .............. " + df.format(startTime));
+ 		logger.info("[" + core + "] <index update process> Process started at ..............
" + df.format(startTime));
  		
  		try{
  			// Send HTTP POST
- 			URL url = new URL(completeUrl);
+ 		    URL url = new URL(completeUrl);
- 			HttpURLConnection conn = (HttpURLConnection)url.openConnection();	
+ 		    HttpURLConnection conn = (HttpURLConnection)url.openConnection();	
  		    
- 			conn.setRequestMethod("POST");
+ 		    conn.setRequestMethod("POST");
- 			conn.setRequestProperty("type", "submit");
+ 		    conn.setRequestProperty("type", "submit");
- 			conn.setDoOutput(true);
+ 		    conn.setDoOutput(true);
  		    
- 			conn.connect();
+ 		    conn.connect();
- 			System.out.println("INFO: [" + core + "] <index update process> Request method\t\t\t"
+ conn.getRequestMethod());
+ 		    logger.info("[" + core + "] <index update process> Request method\t\t\t" + conn.getRequestMethod());
- 			System.out.println("INFO: [" + core + "] <index update process> Succesfully connected
to server\t" + server);		    
+ 		    logger.info("[" + core + "] <index update process> Succesfully connected to
server\t" + server);		    
- 			System.out.println("INFO: [" + core + "] <index update process> Using port\t\t\t"
+ port);
+ 		    logger.info("[" + core + "] <index update process> Using port\t\t\t" + port);
- 			System.out.println("INFO: [" + core + "] <index update process> Application name\t\t\t"
+ webapp);
+ 		    logger.info("[" + core + "] <index update process> Application name\t\t\t" +
webapp);
- 			System.out.println("INFO: [" + core + "] <index update process> URL params\t\t\t"
+ params);
+ 		    logger.info("[" + core + "] <index update process> URL params\t\t\t" + params);
- 			System.out.println("INFO: [" + core + "] <index update process> Full URL\t\t\t\t"
+ conn.getURL());
+ 		    logger.info("[" + core + "] <index update process> Full URL\t\t\t\t" + conn.getURL());
- 			System.out.println("INFO: [" + core + "] <index update process> Response message\t\t\t"
+ conn.getResponseMessage());
+ 		    logger.info("[" + core + "] <index update process> Response message\t\t\t" +
conn.getResponseMessage());
- 			System.out.println("INFO: [" + core + "] <index update process> Response code\t\t\t"
+ conn.getResponseCode());
+ 		    logger.info("[" + core + "] <index update process> Response code\t\t\t" + conn.getResponseCode());
  		    
+ 		    //force reloading the properties file if an error occurs 
+ 		    if(conn.getResponseCode() != 200) p.loadProperties(true);
+ 		    
- 			conn.disconnect();
+ 		    conn.disconnect();
- 			System.out.println("INFO: [" + core + "] <index update process> Disconnected from
server\t\t" + server);
+ 		    logger.info("[" + core + "] <index update process> Disconnected from server\t\t"
+ server);
- 			Date endTime = new Date();
+ 		    Date endTime = new Date();
- 			System.out.println("INFO: [" + core + "] <index update process> Process ended at
................ " + df.format(endTime));
+ 		    logger.info("[" + core + "] <index update process> Process ended at ................
" + df.format(endTime));
  		}catch(MalformedURLException mue){
- 			mue.printStackTrace();
+ 			logger.error("Failed to assemble URL for HTTP POST", mue);
  		}catch(IOException ioe){
- 			ioe.printStackTrace();
+ 			logger.error("Failed to connect to the specified URL while trying to send HTTP POST",
ioe);
  		}catch(Exception e){
- 			e.printStackTrace();
+ 			logger.error("Failed to send HTTP POST", e);
  		}
  	}
+ 
  }
- 
  }}}
  
  <<BR>>

Mime
View raw message