nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jnio...@apache.org
Subject svn commit: r1126421 - in /nutch/trunk: CHANGES.txt conf/schema.xml src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Date Mon, 23 May 2011 10:34:32 GMT
Author: jnioche
Date: Mon May 23 10:34:32 2011
New Revision: 1126421

URL: http://svn.apache.org/viewvc?rev=1126421&view=rev
Log:
NUTCH-999 Normalise String representation for Dates in IndexingFilters

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/schema.xml
    nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
    nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1126421&r1=1126420&r2=1126421&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon May 23 10:34:32 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-999 Normalise String representation for Dates in IndexingFilters (jnioche)
+
 * NUTCH-996 Indexer adds solr.commit.size+1 docs (markus)
 
 * NUTCH-983 Upgrade SolrJ to 3.1 (markus, jnioche)

Modified: nutch/trunk/conf/schema.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/schema.xml?rev=1126421&r1=1126420&r2=1126421&view=diff
==============================================================================
--- nutch/trunk/conf/schema.xml (original)
+++ nutch/trunk/conf/schema.xml Mon May 23 10:34:32 2011
@@ -84,9 +84,9 @@
             multiValued="true"/>
         <field name="contentLength" type="long" stored="true"
             indexed="false"/>
-        <field name="lastModified" type="long" stored="true"
+        <field name="lastModified" type="date" stored="true"
             indexed="false"/>
-        <field name="date" type="string" stored="true" indexed="true"/>
+        <field name="date" type="date" stored="true" indexed="true"/>
 
         <!-- fields for languageidentifier plugin -->
         <field name="lang" type="string" stored="true" indexed="true"/>
@@ -99,9 +99,9 @@
         <field name="author" type="string" stored="true" indexed="true"/>
         <field name="tag" type="string" stored="true" indexed="true"/>
         <field name="feed" type="string" stored="true" indexed="true"/>
-        <field name="publishedDate" type="string" stored="true"
+        <field name="publishedDate" type="date" stored="true"
             indexed="true"/>
-        <field name="updatedDate" type="string" stored="true"
+        <field name="updatedDate" type="date" stored="true"
             indexed="true"/>
     </fields>
     <uniqueKey>id</uniqueKey>

Modified: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java?rev=1126421&r1=1126420&r2=1126421&view=diff
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
(original)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
Mon May 23 10:34:32 2011
@@ -34,6 +34,7 @@ import org.apache.nutch.metadata.Feed;
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
+import org.apache.solr.common.util.DateUtil;
 
 /**
  * @author dogacan
@@ -96,17 +97,15 @@ public class FeedIndexingFilter implemen
     if (feed != null)
       doc.add(Feed.FEED, feed);
     
-    SimpleDateFormat sdf = new SimpleDateFormat(dateFormatStr);
-    sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
     if (published != null) {
       Date date = new Date(Long.parseLong(published));
-      String dateString = sdf.format(date);
+      String dateString =  DateUtil.getThreadLocalDateFormat().format(date);
       doc.add(PUBLISHED_DATE, dateString);
     }
     
     if (updated != null) {
       Date date = new Date(Long.parseLong(updated));
-      String dateString = sdf.format(date);
+      String dateString = DateUtil.getThreadLocalDateFormat().format(date);
       doc.add(UPDATED_DATE, dateString);
     }
         

Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=1126421&r1=1126420&r2=1126421&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
(original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Mon May 23 10:34:32 2011
@@ -43,6 +43,7 @@ import org.apache.oro.text.regex.Perl5Co
 import org.apache.oro.text.regex.Perl5Matcher;
 import org.apache.oro.text.regex.Perl5Pattern;
 import org.apache.tika.mime.MimeType;
+import org.apache.solr.common.util.DateUtil;
 
 /**
  * Add (or reset) a few metaData properties as respective fields (if they are
@@ -88,8 +89,9 @@ public class MoreIndexingFilter implemen
     // String lastModified = data.getMeta(Metadata.LAST_MODIFIED);
     if (lastModified != null) { // try parse last-modified
       time = getTime(lastModified.toString(), url); // use as time
+      String formlastModified = DateUtil.getThreadLocalDateFormat().format(new Date(time));
       // store as string
-      doc.add("lastModified", Long.toString(time));
+      doc.add("lastModified", formlastModified);
     }
 
     if (time == -1) { // if no last-modified
@@ -97,11 +99,7 @@ public class MoreIndexingFilter implemen
       time = page.getFetchTime(); // use fetch time
     }
 
-    // add support for query syntax date:
-    // query filter is implemented in DateQueryFilter.java
-    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
-    sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
-    String dateString = sdf.format(new Date(time));
+    String dateString = DateUtil.getThreadLocalDateFormat().format(new Date(time));
 
     // un-stored, indexed and un-tokenized
     doc.add("date", dateString);



Mime
View raw message