manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1430565 - in /manifoldcf/trunk: CHANGES.txt connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
Date Tue, 08 Jan 2013 22:01:06 GMT
Author: kwright
Date: Tue Jan  8 22:01:06 2013
New Revision: 1430565

URL: http://svn.apache.org/viewvc?rev=1430565&view=rev
Log:
Fix for CONNECTORS-600.

Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1430565&r1=1430564&r2=1430565&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Jan  8 22:01:06 2013
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 1.1-dev =====================
 
+CONNECTORS-600: Add a field to the RSS connector that contains
+document origination date in ISO 8601 format.
+(David Morana, Karl Wright)
+
 CONNECTORS-598: Add an RSS connector mode that allows just
 metadata to be consumed, in conjunction with content from description
 or content fields.

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1430565&r1=1430564&r2=1430565&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
(original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
Tue Jan  8 22:01:06 2013
@@ -41,6 +41,7 @@ import org.apache.http.HttpException;
 import java.io.*;
 import java.util.*;
 import java.net.*;
+import java.text.*;
 import java.util.regex.*;
 
 /** This is the RSS implementation of the IRepositoryConnector interface.
@@ -1449,24 +1450,33 @@ public class RSSConnector extends org.ap
             // The pubdates are a ms since epoch value; we want the minimum one for the origination
time.
             Long minimumOrigTime = null;
             String[] pubDateValues = new String[pubDates.size()];
+            String[] pubDateValuesISO = new String[pubDates.size()];
+            TimeZone tz = TimeZone.getTimeZone("UTC");
+            DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm'Z'");
+            df.setTimeZone(tz);
             k = 0;
             while (k < pubDates.size())
             {
               String pubDate = (String)pubDates.get(k);
-              pubDateValues[k++] = pubDate;
+              pubDateValues[k] = pubDate;
               try
               {
                 Long pubDateLong = new Long(pubDate);
                 if (minimumOrigTime == null || pubDateLong.longValue() < minimumOrigTime.longValue())
                   minimumOrigTime = pubDateLong;
+                pubDateValuesISO[k] = df.format(new Date(pubDateLong.longValue()));
               }
               catch (NumberFormatException e)
               {
                 // Do nothing; the version string seems to not mean anything
               }
+              k++;
             }
             if (k > 0)
+            {
               rd.addField("pubdate",pubDateValues);
+              rd.addField("pubdateiso",pubDateValuesISO);
+            }
 
             if (minimumOrigTime != null)
               activities.setDocumentOriginationTime(urlValue,minimumOrigTime);



Mime
View raw message