manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1455389 - in /manifoldcf/trunk: connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/ connectors/sharepoint/connector...
Date Tue, 12 Mar 2013 01:57:24 GMT
Author: kwright
Date: Tue Mar 12 01:57:23 2013
New Revision: 1455389

URL: http://svn.apache.org/r1455389
Log:
Centralize all date parsing into one core module.

Added:
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
  (with props)
    manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/
    manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java
      - copied, changed from r1455382, manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
Removed:
    manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
Modified:
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
    manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
    manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
    manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
    manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelErrorContext.java
    manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
    manifoldcf/trunk/framework/build.xml

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
(original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
Tue Mar 12 01:57:23 2013
@@ -25,6 +25,7 @@ import org.apache.manifoldcf.crawler.sys
 import org.apache.manifoldcf.crawler.system.ManifoldCF;
 
 import org.apache.manifoldcf.core.fuzzyml.*;
+import org.apache.manifoldcf.core.common.DateParser;
 
 import org.apache.http.conn.ConnectTimeoutException;
 import org.apache.http.client.RedirectException;
@@ -4006,13 +4007,13 @@ public class RSSConnector extends org.ap
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
         {
-          origDateDate = parseRFC822Date(pubDateField);
+          origDateDate = DateParser.parseRFC822Date(pubDateField);
           // Special for China Daily News
           if (origDateDate == null)
-            origDateDate = parseChinaDate(pubDateField);
+            origDateDate = DateParser.parseChinaDate(pubDateField);
           // Special for LL
           if (origDateDate == null)
-            origDateDate = parseISO8601Date(pubDateField);
+            origDateDate = DateParser.parseISO8601Date(pubDateField);
         }
         Long origDate;
         if (origDateDate != null)
@@ -4424,7 +4425,7 @@ public class RSSConnector extends org.ap
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
-          origDateDate = parseISO8601Date(pubDateField);
+          origDateDate = DateParser.parseISO8601Date(pubDateField);
 
         Long origDate;
         if (origDateDate != null)
@@ -4827,7 +4828,7 @@ public class RSSConnector extends org.ap
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
-          origDateDate = parseISO8601Date(pubDateField);
+          origDateDate = DateParser.parseISO8601Date(pubDateField);
 
         Long origDate;
         if (origDateDate != null)
@@ -5120,7 +5121,7 @@ public class RSSConnector extends org.ap
       {
         Date origDateDate = null;
         if (pubDateField != null && pubDateField.length() > 0)
-          origDateDate = parseISO8601Date(pubDateField);
+          origDateDate = DateParser.parseISO8601Date(pubDateField);
 
         Long origDate;
         if (origDateDate != null)
@@ -5168,293 +5169,6 @@ public class RSSConnector extends org.ap
     }
   }
 
-  /** Parse a China Daily News date */
-  protected static Date parseChinaDate(String dateValue)
-  {
-    dateValue = dateValue.trim();
-    // Format: 2007/12/30 11:01
-    int index;
-    index = dateValue.indexOf("/");
-    if (index == -1)
-      return null;
-    String year = dateValue.substring(0,index);
-    dateValue = dateValue.substring(index+1);
-    index = dateValue.indexOf("/");
-    if (index == -1)
-      return null;
-    String month = dateValue.substring(0,index);
-    dateValue = dateValue.substring(index+1);
-    index = dateValue.indexOf(" ");
-    String day;
-    String hour = null;
-    String minute = null;
-    String second = null;
-    if (index == -1)
-      day = dateValue;
-    else
-    {
-      day = dateValue.substring(0,index);
-      dateValue = dateValue.substring(index+1);
-      index = dateValue.indexOf(":");
-      if (index == -1)
-        return null;
-      hour = dateValue.substring(0,index);
-      dateValue = dateValue.substring(index+1);
-      index = dateValue.indexOf(":");
-      if (index != -1)
-      {
-        minute = dateValue.substring(0,index);
-        dateValue = dateValue.substring(index+1);
-        second = dateValue;
-      }
-      else
-        minute = dateValue;
-    }
-    TimeZone tz = TimeZone.getTimeZone("GMT");
-    Calendar c = new GregorianCalendar(tz);
-    try
-    {
-      int value = Integer.parseInt(year);
-      if (value < 1900)
-        value += 1900;
-      c.set(Calendar.YEAR,value);
-
-      value = Integer.parseInt(month);
-      c.set(Calendar.MONTH,value-1);
-
-      value = Integer.parseInt(day);
-      c.set(Calendar.DAY_OF_MONTH,value);
-
-      if (hour != null)
-        value = Integer.parseInt(hour);
-      else
-        value = 0;
-      c.set(Calendar.HOUR_OF_DAY,value);
-
-      if (minute != null)
-        value = Integer.parseInt(minute);
-      else
-        value = 0;
-      c.set(Calendar.MINUTE,value);
-
-      if (second != null)
-        value = Integer.parseInt(second);
-      else
-        value = 0;
-      c.set(Calendar.SECOND,value);
-
-      c.set(Calendar.MILLISECOND,0);
-      return new Date(c.getTimeInMillis());
-    }
-    catch (NumberFormatException e)
-    {
-      return null;
-    }
-
-  }
-
-  /** Parse ISO 8601 dates, and their common variants.
-  */
-  protected static Date parseISO8601Date(String isoDateValue)
-  {
-    // There are a number of variations on the basic format.
-    // We'll look for key characters to help is determine which is which.
-    StringBuilder isoFormatString = new StringBuilder("yy");
-    if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
-      isoFormatString.append("yy");
-    isoFormatString.append("-MM-dd'T'HH:mm:ss");
-    if (isoDateValue.indexOf(".") != -1)
-      isoFormatString.append(".SSS");
-    if (isoDateValue.endsWith("Z"))
-      isoFormatString.append("'Z'");
-    else
-      isoFormatString.append("Z");      // RFC 822 time, including general time zones
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(isoFormatString.toString());
-    try
-    {
-      return iso8601Format.parse(isoDateValue);
-    }
-    catch (java.text.ParseException e)
-    {
-      System.out.println("Date value: '"+isoDateValue+"'");
-      e.printStackTrace();
-      return null;
-    }
-  }
-  
-  /** Timezone mapping from RFC822 timezones to ones understood by Java */
-  
-  // Month map
-  protected static HashMap monthMap = new HashMap();
-  static
-  {
-    monthMap.put("jan",new Integer(1));
-    monthMap.put("feb",new Integer(2));
-    monthMap.put("mar",new Integer(3));
-    monthMap.put("apr",new Integer(4));
-    monthMap.put("may",new Integer(5));
-    monthMap.put("jun",new Integer(6));
-    monthMap.put("jul",new Integer(7));
-    monthMap.put("aug",new Integer(8));
-    monthMap.put("sep",new Integer(9));
-    monthMap.put("oct",new Integer(10));
-    monthMap.put("nov",new Integer(11));
-    monthMap.put("dec",new Integer(12));
-  }
-
-  protected static final HashMap milTzMap;
-  static
-  {
-    milTzMap = new HashMap();
-    milTzMap.put("Z","GMT");
-    milTzMap.put("UT","GMT");
-    milTzMap.put("A","GMT-01:00");
-    milTzMap.put("M","GMT-12:00");
-    milTzMap.put("N","GMT+01:00");
-    milTzMap.put("Y","GMT+12:00");
-  }
-
-  /** Parse RFC822 date */
-  protected static Date parseRFC822Date(String dateValue)
-  {
-    dateValue = dateValue.trim();
-    // See http://www.faqs.org/rfcs/rfc822.html for legal formats
-    // Format: [day of week,] day mo year hh24:mm:ss tz
-    int commaIndex = dateValue.indexOf(",");
-    String usable;
-    if (commaIndex == -1)
-      usable = dateValue;
-    else
-      usable = dateValue.substring(commaIndex+1).trim();
-    int index;
-
-    index = usable.indexOf(" ");
-    if (index == -1)
-      return null;
-    String day = usable.substring(0,index);
-    usable = usable.substring(index+1).trim();
-
-    index = usable.indexOf(" ");
-    if (index == -1)
-      return null;
-    String month = usable.substring(0,index).toLowerCase();
-    usable = usable.substring(index+1).trim();
-
-    String year;
-    String hour = null;
-    String minute = null;
-    String second = null;
-    String timezone = null;
-
-    index = usable.indexOf(" ");
-    if (index != -1)
-    {
-      year = usable.substring(0,index);
-      usable = usable.substring(index+1).trim();
-
-      index = usable.indexOf(":");
-      if (index == -1)
-        return null;
-      hour = usable.substring(0,index);
-      usable = usable.substring(index+1).trim();
-
-      index = usable.indexOf(":");
-      if (index != -1)
-      {
-        minute = usable.substring(0,index);
-        usable = usable.substring(index+1).trim();
-
-        index = usable.indexOf(" ");
-        if (index == -1)
-          second = usable;
-        else
-        {
-          second = usable.substring(0,index);
-          timezone = usable.substring(index+1).trim();
-        }
-      }
-      else
-      {
-        index = usable.indexOf(" ");
-        if (index == -1)
-          minute = usable;
-        else
-        {
-          minute = usable.substring(0,index);
-          timezone = usable.substring(index+1).trim();
-        }
-      }
-    }
-    else
-      year = usable;
-
-    // Now construct a calendar object from this
-    TimeZone tz;
-    if (timezone != null && timezone.length() > 0)
-    {
-      if (timezone.startsWith("+") || timezone.startsWith("-"))
-      {
-        if (timezone.indexOf(":") == -1 && timezone.length() > 3)
-          timezone = timezone.substring(0,timezone.length()-2) + ":" + timezone.substring(timezone.length()-2);
-        timezone = "GMT"+timezone;
-      }
-      else
-      {
-        // Map special timezones to java timezones
-        if (milTzMap.get(timezone) != null)
-          timezone = (String)milTzMap.get(timezone);
-      }
-
-    }
-    else
-      timezone = "GMT";
-
-
-    tz = TimeZone.getTimeZone(timezone);
-
-    Calendar c = new GregorianCalendar(tz);
-    try
-    {
-      int value = Integer.parseInt(year);
-      if (value < 1900)
-        value += 1900;
-      c.set(Calendar.YEAR,value);
-
-      Integer x = (Integer)monthMap.get(month);
-      if (x == null)
-        return null;
-      c.set(Calendar.MONTH,x.intValue()-1);
-
-      value = Integer.parseInt(day);
-      c.set(Calendar.DAY_OF_MONTH,value);
-
-      if (hour != null)
-        value = Integer.parseInt(hour);
-      else
-        value = 0;
-      c.set(Calendar.HOUR_OF_DAY,value);
-
-      if (minute != null)
-        value = Integer.parseInt(minute);
-      else
-        value = 0;
-      c.set(Calendar.MINUTE,value);
-
-      if (second != null)
-        value = Integer.parseInt(second);
-      else
-        value = 0;
-      c.set(Calendar.SECOND,value);
-
-      c.set(Calendar.MILLISECOND,0);
-      return new Date(c.getTimeInMillis());
-    }
-    catch (NumberFormatException e)
-    {
-      return null;
-    }
-
-  }
 
   /** Get the maximum number of documents to amalgamate together into one batch, for this
connector.
   *@return the maximum number. 0 indicates "unlimited".

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
(original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Tue Mar 12 01:57:23 2013
@@ -781,8 +781,8 @@ public class SharePointRepository extend
                   {
                     // Item has a modified date so we presume it exists.
                     
-                    Date modifiedDateValue = parseDate(modifiedDate);
-                    Date createdDateValue = parseDate(createdDate);
+                    Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
+                    Date createdDateValue = DateParser.parseISO8601Date(createdDate);
                     
                     // Build version string
                     String versionToken = modifiedDate;
@@ -939,8 +939,8 @@ public class SharePointRepository extend
                   if (modifyDate != null)
                   {
                     // Item has a modified date, so we presume it exists
-                    Date modifiedDateValue = parseDate(modifiedDate);
-                    Date createdDateValue = parseDate(createdDate);
+                    Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
+                    Date createdDateValue = DateParser.parseISO8601Date(createdDate);
 
                     // Build version string
                     String versionToken = modifyDate;
@@ -1101,21 +1101,6 @@ public class SharePointRepository extend
     return index;
   }
   
-  protected static Date parseDate(String dateTimeValue)
-  {
-    if (dateTimeValue == null)
-      return null;
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
-    try
-    {
-      return iso8601Format.parse(dateTimeValue);
-    }
-    catch (java.text.ParseException e)
-    {
-      return null;
-    }
-  }
-  
   protected String[] lookupAccessTokensSorted(String encodedSitePath, String guid, Map<String,String[]>
ACLmap)
     throws ManifoldCFException, ServiceInterruption
   {

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
(original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
Tue Mar 12 01:57:23 2013
@@ -42,12 +42,14 @@ public abstract class BaseProcessingCont
     super(theStream);
   }
 
+  @Override
   protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
     throws ManifoldCFException, ServiceInterruption
   {
     return super.beginTag(namespaceURI,localName,qName,atts);
   }
-    
+  
+  @Override
   protected void endTag()
     throws ManifoldCFException, ServiceInterruption
   {

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
(original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
Tue Mar 12 01:57:23 2013
@@ -46,6 +46,7 @@ public abstract class SingleLevelContext
     this.nodeName = nodeName;
   }
 
+  @Override
   protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -56,6 +57,7 @@ public abstract class SingleLevelContext
   
   protected abstract BaseProcessingContext createChild(String namespaceURI, String localName,
String qName, Attributes atts);
   
+  @Override
   protected void endTag()
     throws ManifoldCFException, ServiceInterruption
   {

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelErrorContext.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelErrorContext.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelErrorContext.java
(original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelErrorContext.java
Tue Mar 12 01:57:23 2013
@@ -55,6 +55,7 @@ public abstract class SingleLevelErrorCo
     return errorType != null && errorType.equals(ERROR_TYPE_LOGIN_NEEDED);
   }
   
+  @Override
   protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -70,6 +71,7 @@ public abstract class SingleLevelErrorCo
   
   protected abstract BaseProcessingContext createChild(String namespaceURI, String localName,
String qName, Attributes atts);
   
+  @Override
   protected void endTag()
     throws ManifoldCFException, ServiceInterruption
   {

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
(original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
Tue Mar 12 01:57:23 2013
@@ -23,9 +23,10 @@ import org.apache.manifoldcf.agents.inte
 import org.apache.manifoldcf.crawler.interfaces.*;
 import org.apache.manifoldcf.crawler.system.Logging;
 
+import org.apache.manifoldcf.core.common.*;
+
 import org.xml.sax.Attributes;
 
-import org.apache.manifoldcf.core.common.XMLDoc;
 import org.apache.manifoldcf.agents.common.XMLStream;
 import org.apache.manifoldcf.agents.common.XMLContext;
 import org.apache.manifoldcf.agents.common.XMLStringContext;
@@ -446,10 +447,12 @@ public class WikiConnector extends org.a
       this.result = result;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts) {
       return new WikiLoginAPIResultAPIContext(theStream, namespaceURI, localName, qName,
atts, result);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException {
       token = ((WikiLoginAPIResultAPIContext)child).getToken();
@@ -475,6 +478,7 @@ public class WikiConnector extends org.a
       this.result = result;
     }
 
+    @Override
     protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
       throws ManifoldCFException, ServiceInterruption {
       if (qName.equals("login")) {
@@ -587,10 +591,12 @@ public class WikiConnector extends org.a
       this.result = result;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts) {
       return new WikiTokenLoginAPIResultAPIContext(theStream, namespaceURI, localName, qName,
atts, result);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException {
     }
@@ -610,6 +616,7 @@ public class WikiConnector extends org.a
       this.result = result;
     }
 
+    @Override
     protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
       throws ManifoldCFException, ServiceInterruption {
       if (qName.equals("login")) {
@@ -1927,11 +1934,13 @@ public class WikiConnector extends org.a
       super(theStream,"api");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiCheckQueryContext(theStream,namespaceURI,localName,qName,atts);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -1961,11 +1970,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"query");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiCheckAllPagesContext(theStream,namespaceURI,localName,qName,atts);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -1989,11 +2000,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"allpages");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiCheckPContext(theStream,namespaceURI,localName,qName,atts);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2307,11 +2320,13 @@ public class WikiConnector extends org.a
       this.startPageTitle = startPageTitle;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiListPagesQueryContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2346,11 +2361,13 @@ public class WikiConnector extends org.a
       this.startPageTitle = startPageTitle;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiListPagesAllPagesContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2379,12 +2396,14 @@ public class WikiConnector extends org.a
       this.startPageTitle = startPageTitle;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       // When we recognize allpages, we need to look for <p> records.
       return new WikiListPagesPContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2669,11 +2688,13 @@ public class WikiConnector extends org.a
       this.urls = urls;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocURLsQueryContext(theStream,namespaceURI,localName,qName,atts,urls);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2699,11 +2720,13 @@ public class WikiConnector extends org.a
       this.urls = urls;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocURLsPagesContext(theStream,namespaceURI,localName,qName,atts,urls);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2723,11 +2746,13 @@ public class WikiConnector extends org.a
       this.urls = urls;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocURLsPageContext(theStream,namespaceURI,localName,qName,atts,urls);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -2991,11 +3016,13 @@ public class WikiConnector extends org.a
       this.versions = versions;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetTimestampQueryContext(theStream,namespaceURI,localName,qName,atts,versions);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3020,11 +3047,13 @@ public class WikiConnector extends org.a
       this.versions = versions;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetTimestampPagesContext(theStream,namespaceURI,localName,qName,atts,versions);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3044,11 +3073,13 @@ public class WikiConnector extends org.a
       this.versions = versions;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetTimestampPageContext(theStream,namespaceURI,localName,qName,atts,versions);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3106,11 +3137,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"revisions");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetTimestampRevContext(theStream,namespaceURI,localName,qName,atts);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3364,12 +3397,13 @@ public class WikiConnector extends org.a
       this.namespaces = namespaces;
     }
 
-
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetNamespacesQueryContext(theStream,namespaceURI,localName,qName,atts,namespaces);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3395,11 +3429,13 @@ public class WikiConnector extends org.a
       this.namespaces = namespaces;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetNamespacesNamespacesContext(theStream,namespaceURI,localName,qName,atts,namespaces);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3419,11 +3455,13 @@ public class WikiConnector extends org.a
       this.namespaces = namespaces;
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetNamespacesNsContext(theStream,namespaceURI,localName,qName,atts,namespaces);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3546,7 +3584,7 @@ public class WikiConnector extends org.a
                 if (lastModified != null)
                 {
                   rd.addField("last-modified",lastModified);
-                  rd.setModifiedDate(parseISODate(lastModified));
+                  rd.setModifiedDate(DateParser.parseISO8601Date(lastModified));
                 }
 
                 if (allowACL != null && allowACL.length > 0) {
@@ -3659,19 +3697,6 @@ public class WikiConnector extends org.a
     }
   }
   
-  protected static Date parseISODate(String isoDateValue)
-  {
-    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat ("yyyy-MM-dd'T'HH:mm:ss'Z'");
-    try
-    {
-      return iso8601Format.parse(isoDateValue);
-    }
-    catch (java.text.ParseException e)
-    {
-      return null;
-    }
-  }
-  
   /** Thread to execute a "get doc info" operation.  This thread both executes the operation
and parses the result. */
   protected static class ExecuteGetDocInfoThread extends Thread
   {
@@ -3868,11 +3893,13 @@ public class WikiConnector extends org.a
       super(theStream,"api");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocInfoQueryContext(theStream,namespaceURI,localName,qName,atts);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -3950,11 +3977,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"query");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocInfoPagesContext(theStream,namespaceURI,localName,qName,atts);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -4026,11 +4055,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"pages");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocInfoPageContext(theStream,namespaceURI,localName,qName,atts);
     }
     
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
@@ -4184,11 +4215,13 @@ public class WikiConnector extends org.a
       super(theStream,namespaceURI,localName,qName,atts,"revisions");
     }
 
+    @Override
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
     {
       return new WikiGetDocInfoRevContext(theStream,namespaceURI,localName,qName,atts);
     }
 
+    @Override
     protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {

Modified: manifoldcf/trunk/framework/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/build.xml?rev=1455389&r1=1455388&r2=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/framework/build.xml (original)
+++ manifoldcf/trunk/framework/build.xml Tue Mar 12 01:57:23 2013
@@ -1265,6 +1265,21 @@
 
     <target name="build-tests" depends="jar-tests"/>
 
+    <target name="run-core-tests" depends="compile-core,compile-core-tests">
+        <mkdir dir="test-output"/>
+        <junit fork="true" maxmemory="128m" dir="test-output" outputtoformatters="true"
showoutput="true" haltonfailure="true">
+            <classpath>
+                <path refid="framework-classpath"/>
+                <pathelement location="build/core/classes"/>
+                <pathelement location="build/core-tests/classes"/>
+            </classpath>
+            <formatter type="brief" usefile="false"/>
+
+            <test name="org.apache.manifoldcf.core.common.DateTest" todir="test-output"/>
+            
+        </junit>
+    </target>
+
     <target name="run-script-engine-tests" depends="compile-core,compile-script-engine,compile-script-engine-tests">
         <mkdir dir="test-output"/>
         <junit fork="true" maxmemory="128m" dir="test-output" outputtoformatters="true"
showoutput="true" haltonfailure="true">
@@ -1298,7 +1313,7 @@
         </junit>
     </target>
 
-    <target name="run-tests" depends="compile-tests,run-script-engine-tests"/>
+    <target name="run-tests" depends="compile-tests,run-core-tests,run-script-engine-tests"/>
 
     <target name="run-tests-derby" depends="compile-tests">
         <mkdir dir="test-derby-output"/>

Added: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java?rev=1455389&view=auto
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
(added)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
Tue Mar 12 01:57:23 2013
@@ -0,0 +1,322 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.common;
+
+import java.util.*;
+
+/** Class to parse and format common dates.
+*/
+public class DateParser
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  /** Parse ISO 8601 dates, and their common variants.
+  */
+  public static Date parseISO8601Date(String isoDateValue)
+  {
+    if (isoDateValue == null)
+      return null;
+    // There are a number of variations on the basic format.
+    // We'll look for key characters to help is determine which is which.
+    StringBuilder isoFormatString = new StringBuilder("yy");
+    if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
+      isoFormatString.append("yy");
+    isoFormatString.append("-MM-dd'T'HH:mm:ss");
+    if (isoDateValue.indexOf(".") != -1)
+      isoFormatString.append(".SSS");
+    if (isoDateValue.endsWith("Z"))
+      isoFormatString.append("'Z'");
+    else
+      isoFormatString.append("Z");      // RFC 822 time, including general time zones
+    java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(isoFormatString.toString());
+    try
+    {
+      return iso8601Format.parse(isoDateValue);
+    }
+    catch (java.text.ParseException e)
+    {
+      return null;
+    }
+  }
+  
+  /** Timezone mapping from RFC822 timezones to ones understood by Java */
+  
+  // Month map
+  protected static HashMap monthMap = new HashMap();
+  static
+  {
+    monthMap.put("jan",new Integer(1));
+    monthMap.put("feb",new Integer(2));
+    monthMap.put("mar",new Integer(3));
+    monthMap.put("apr",new Integer(4));
+    monthMap.put("may",new Integer(5));
+    monthMap.put("jun",new Integer(6));
+    monthMap.put("jul",new Integer(7));
+    monthMap.put("aug",new Integer(8));
+    monthMap.put("sep",new Integer(9));
+    monthMap.put("oct",new Integer(10));
+    monthMap.put("nov",new Integer(11));
+    monthMap.put("dec",new Integer(12));
+  }
+
+  protected static final HashMap milTzMap;
+  static
+  {
+    milTzMap = new HashMap();
+    milTzMap.put("Z","GMT");
+    milTzMap.put("UT","GMT");
+    milTzMap.put("A","GMT-01:00");
+    milTzMap.put("M","GMT-12:00");
+    milTzMap.put("N","GMT+01:00");
+    milTzMap.put("Y","GMT+12:00");
+  }
+
+  /** Parse RFC822 date */
+  public static Date parseRFC822Date(String dateValue)
+  {
+    if (dateValue == null)
+      return null;
+    dateValue = dateValue.trim();
+    // See http://www.faqs.org/rfcs/rfc822.html for legal formats
+    // Format: [day of week,] day mo year hh24:mm:ss tz
+    int commaIndex = dateValue.indexOf(",");
+    String usable;
+    if (commaIndex == -1)
+      usable = dateValue;
+    else
+      usable = dateValue.substring(commaIndex+1).trim();
+    int index;
+
+    index = usable.indexOf(" ");
+    if (index == -1)
+      return null;
+    String day = usable.substring(0,index);
+    usable = usable.substring(index+1).trim();
+
+    index = usable.indexOf(" ");
+    if (index == -1)
+      return null;
+    String month = usable.substring(0,index).toLowerCase();
+    usable = usable.substring(index+1).trim();
+
+    String year;
+    String hour = null;
+    String minute = null;
+    String second = null;
+    String timezone = null;
+
+    index = usable.indexOf(" ");
+    if (index != -1)
+    {
+      year = usable.substring(0,index);
+      usable = usable.substring(index+1).trim();
+
+      index = usable.indexOf(":");
+      if (index == -1)
+        return null;
+      hour = usable.substring(0,index);
+      usable = usable.substring(index+1).trim();
+
+      index = usable.indexOf(":");
+      if (index != -1)
+      {
+        minute = usable.substring(0,index);
+        usable = usable.substring(index+1).trim();
+
+        index = usable.indexOf(" ");
+        if (index == -1)
+          second = usable;
+        else
+        {
+          second = usable.substring(0,index);
+          timezone = usable.substring(index+1).trim();
+        }
+      }
+      else
+      {
+        index = usable.indexOf(" ");
+        if (index == -1)
+          minute = usable;
+        else
+        {
+          minute = usable.substring(0,index);
+          timezone = usable.substring(index+1).trim();
+        }
+      }
+    }
+    else
+      year = usable;
+
+    // Now construct a calendar object from this
+    TimeZone tz;
+    if (timezone != null && timezone.length() > 0)
+    {
+      if (timezone.startsWith("+") || timezone.startsWith("-"))
+      {
+        if (timezone.indexOf(":") == -1 && timezone.length() > 3)
+          timezone = timezone.substring(0,timezone.length()-2) + ":" + timezone.substring(timezone.length()-2);
+        timezone = "GMT"+timezone;
+      }
+      else
+      {
+        // Map special timezones to java timezones
+        if (milTzMap.get(timezone) != null)
+          timezone = (String)milTzMap.get(timezone);
+      }
+
+    }
+    else
+      timezone = "GMT";
+
+
+    tz = TimeZone.getTimeZone(timezone);
+
+    Calendar c = new GregorianCalendar(tz);
+    try
+    {
+      int value = Integer.parseInt(year);
+      if (value < 1900)
+        value += 1900;
+      c.set(Calendar.YEAR,value);
+
+      Integer x = (Integer)monthMap.get(month);
+      if (x == null)
+        return null;
+      c.set(Calendar.MONTH,x.intValue()-1);
+
+      value = Integer.parseInt(day);
+      c.set(Calendar.DAY_OF_MONTH,value);
+
+      if (hour != null)
+        value = Integer.parseInt(hour);
+      else
+        value = 0;
+      c.set(Calendar.HOUR_OF_DAY,value);
+
+      if (minute != null)
+        value = Integer.parseInt(minute);
+      else
+        value = 0;
+      c.set(Calendar.MINUTE,value);
+
+      if (second != null)
+        value = Integer.parseInt(second);
+      else
+        value = 0;
+      c.set(Calendar.SECOND,value);
+
+      c.set(Calendar.MILLISECOND,0);
+      return new Date(c.getTimeInMillis());
+    }
+    catch (NumberFormatException e)
+    {
+      return null;
+    }
+
+  }
+
+  /** Parse a China Daily News date */
+  public static Date parseChinaDate(String dateValue)
+  {
+    if (dateValue == null)
+      return null;
+    dateValue = dateValue.trim();
+    // Format: 2007/12/30 11:01
+    int index;
+    index = dateValue.indexOf("/");
+    if (index == -1)
+      return null;
+    String year = dateValue.substring(0,index);
+    dateValue = dateValue.substring(index+1);
+    index = dateValue.indexOf("/");
+    if (index == -1)
+      return null;
+    String month = dateValue.substring(0,index);
+    dateValue = dateValue.substring(index+1);
+    index = dateValue.indexOf(" ");
+    String day;
+    String hour = null;
+    String minute = null;
+    String second = null;
+    if (index == -1)
+      day = dateValue;
+    else
+    {
+      day = dateValue.substring(0,index);
+      dateValue = dateValue.substring(index+1);
+      index = dateValue.indexOf(":");
+      if (index == -1)
+        return null;
+      hour = dateValue.substring(0,index);
+      dateValue = dateValue.substring(index+1);
+      index = dateValue.indexOf(":");
+      if (index != -1)
+      {
+        minute = dateValue.substring(0,index);
+        dateValue = dateValue.substring(index+1);
+        second = dateValue;
+      }
+      else
+        minute = dateValue;
+    }
+    TimeZone tz = TimeZone.getTimeZone("GMT");
+    Calendar c = new GregorianCalendar(tz);
+    try
+    {
+      int value = Integer.parseInt(year);
+      if (value < 1900)
+        value += 1900;
+      c.set(Calendar.YEAR,value);
+
+      value = Integer.parseInt(month);
+      c.set(Calendar.MONTH,value-1);
+
+      value = Integer.parseInt(day);
+      c.set(Calendar.DAY_OF_MONTH,value);
+
+      if (hour != null)
+        value = Integer.parseInt(hour);
+      else
+        value = 0;
+      c.set(Calendar.HOUR_OF_DAY,value);
+
+      if (minute != null)
+        value = Integer.parseInt(minute);
+      else
+        value = 0;
+      c.set(Calendar.MINUTE,value);
+
+      if (second != null)
+        value = Integer.parseInt(second);
+      else
+        value = 0;
+      c.set(Calendar.SECOND,value);
+
+      c.set(Calendar.MILLISECOND,0);
+      return new Date(c.getTimeInMillis());
+    }
+    catch (NumberFormatException e)
+    {
+      return null;
+    }
+
+  }
+
+
+}

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
------------------------------------------------------------------------------
    svn:keywords = Id

Copied: manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java
(from r1455382, manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java)
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java?p2=manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java&p1=manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java&r1=1455382&r2=1455389&rev=1455389&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/rss/tests/DateTest.java
(original)
+++ manifoldcf/trunk/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java
Tue Mar 12 01:57:23 2013
@@ -16,29 +16,28 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.manifoldcf.crawler.connectors.rss.tests;
+package org.apache.manifoldcf.core.common;
 
 import java.util.*;
-import org.apache.manifoldcf.crawler.connectors.rss.RSSConnector;
 import org.junit.*;
 import static org.junit.Assert.*;
 
-public class DateTest extends RSSConnector
+public class DateTest
 {
 
   @Test
   public void iso8601()
     throws Exception
   {
-    Date d = RSSConnector.parseISO8601Date("96-11-15T01:32:33.344GMT");
+    Date d = DateParser.parseISO8601Date("96-11-15T01:32:33.344GMT");
     assertNotNull(d);
-    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33.344Z");
+    d = DateParser.parseISO8601Date("2012-11-15T01:32:33.344Z");
     assertNotNull(d);
-    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33Z");
+    d = DateParser.parseISO8601Date("2012-11-15T01:32:33Z");
     assertNotNull(d);
-    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33+0100");
+    d = DateParser.parseISO8601Date("2012-11-15T01:32:33+0100");
     assertNotNull(d);
-    d = RSSConnector.parseISO8601Date("2012-11-15T01:32:33GMT-03:00");
+    d = DateParser.parseISO8601Date("2012-11-15T01:32:33GMT-03:00");
     assertNotNull(d);
   }
 



Mime
View raw message