manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1429250 - in /manifoldcf/trunk: ./ connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/ connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/ framework/core/src/main/ja...
Date Sat, 05 Jan 2013 10:46:02 GMT
Author: kwright
Date: Sat Jan  5 10:46:02 2013
New Revision: 1429250

URL: http://svn.apache.org/viewvc?rev=1429250&view=rev
Log:
Fix for CONNECTORS-598, minus Japanese translations required in the RSS connector.

Added:
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
  (with props)
Modified:
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
    manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_en_US.properties
    manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_ja_JP.properties
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1429250&r1=1429249&r2=1429250&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Sat Jan  5 10:46:02 2013
@@ -3,6 +3,11 @@ $Id$
 
 ======================= 1.1-dev =====================
 
+CONNECTORS-598: Add an RSS connector mode that allows just
+metadata to be consumed, in conjunction with content from description
+or content fields.
+(David Morana, Karl Wright)
+
 CONNECTORS-596: RSS and Web connectors need to peel off any
 namespace qualifies from tag names when processing XML feeds.
 (David Morana, Karl Wright)

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java?rev=1429250&r1=1429249&r2=1429250&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
(original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
Sat Jan  5 10:46:02 2013
@@ -92,11 +92,13 @@ public class RSSConnector extends org.ap
   /** Dechromed content mode - content field */
   public static final int DECHROMED_CONTENT = 2;
 
-  /** Chromed suppression mode - use chromed content */
+  /** Chromed suppression mode - use chromed content if dechromed content not available */
   public static final int CHROMED_USE = 0;
-  /** Chromed suppression mode - skip all chromed content */
+  /** Chromed suppression mode - skip documents if dechromed content not available */
   public static final int CHROMED_SKIP = 1;
-
+  /** Chromed suppression mode - index metadata only if dechromed content not available */
+  public static final int CHROMED_METADATA_ONLY = 2;
+  
   /** Robots usage flag */
   protected int robotsUsage = ROBOTS_ALL;
 
@@ -2406,6 +2408,9 @@ public class RSSConnector extends org.ap
 "  <tr>\n"+
 "    <td class=\"value\"><nobr><input type=\"radio\" name=\"chromedmode\"
value=\"skip\" "+(chromedMode.equals("skip")?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.NeverUseChromedContent")+"</nobr></td>\n"+
 "  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"value\"><nobr><input type=\"radio\" name=\"chromedmode\"
value=\"metadata\" "+(chromedMode.equals("metadata")?"checked=\"true\"":"")+"/>"+Messages.getBodyString(locale,"RSSConnector.NoContentMetadataOnly")+"</nobr></td>\n"+
+"  </tr>\n"+
 "</table>\n"
       );
     }
@@ -3939,7 +3944,7 @@ public class RSSConnector extends org.ap
               ((origDate==null)?"null":origDate.toString()));
             if (filter.isLegalURL(newIdentifier))
             {
-              if (contentsFile == null)
+              if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
               {
                 // It's a reference!  Add it.
                 String[] dataNames = new String[]{"pubdate","title","source","category","description"};
@@ -3988,19 +3993,37 @@ public class RSSConnector extends org.ap
 
                 if (descriptionField != null)
                   dataValues[5] = new String[]{descriptionField};
-
-                CharacterInput ci = new TempFileCharacterInput(contentsFile);
-                try
+                  
+                if (contentsFile == null)
                 {
-                  contentsFile = null;
-                  dataValues[4] = new Object[]{ci};
+                  CharacterInput ci = new NullCharacterInput();
+                  try
+                  {
+                    dataValues[4] = new Object[]{ci};
 
-                  // Add document reference, including the data to pass down, and the dechromed
content too
-                  activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                    // Add document reference, including the data to pass down, and the dechromed
content too
+                    activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                  }
+                  finally
+                  {
+                    ci.discard();
+                  }
                 }
-                finally
+                else
                 {
-                  ci.discard();
+                  CharacterInput ci = new TempFileCharacterInput(contentsFile);
+                  try
+                  {
+                    contentsFile = null;
+                    dataValues[4] = new Object[]{ci};
+
+                    // Add document reference, including the data to pass down, and the dechromed
content too
+                    activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                  }
+                  finally
+                  {
+                    ci.discard();
+                  }
                 }
               }
             }
@@ -4323,7 +4346,7 @@ public class RSSConnector extends org.ap
               ((origDate==null)?"null":origDate.toString()));
             if (filter.isLegalURL(newIdentifier))
             {
-              if (contentsFile == null)
+              if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
               {
                 // It's a reference!  Add it.
                 String[] dataNames = new String[]{"pubdate","title","source","description"};
@@ -4356,18 +4379,37 @@ public class RSSConnector extends org.ap
                 dataValues[2] = new String[]{documentIdentifier};
                 if (descriptionField != null)
                   dataValues[4] = new String[]{descriptionField};
-                CharacterInput ci = new TempFileCharacterInput(contentsFile);
-                try
+                  
+                if (contentsFile == null)
                 {
-                  contentsFile = null;
-                  dataValues[3] = new Object[]{ci};
+                  CharacterInput ci = new NullCharacterInput();
+                  try
+                  {
+                    dataValues[3] = new Object[]{ci};
 
-                  // Add document reference, including the data to pass down, and the dechromed
content too
-                  activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                    // Add document reference, including the data to pass down, and the dechromed
content too
+                    activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                  }
+                  finally
+                  {
+                    ci.discard();
+                  }
                 }
-                finally
+                else
                 {
-                  ci.discard();
+                  CharacterInput ci = new TempFileCharacterInput(contentsFile);
+                  try
+                  {
+                    contentsFile = null;
+                    dataValues[3] = new Object[]{ci};
+
+                    // Add document reference, including the data to pass down, and the dechromed
content too
+                    activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                  }
+                  finally
+                  {
+                    ci.discard();
+                  }
                 }
               }
             }
@@ -4699,7 +4741,7 @@ public class RSSConnector extends org.ap
                 ((origDate==null)?"null":origDate.toString()));
               if (filter.isLegalURL(newIdentifier))
               {
-                if (contentsFile == null)
+                if (contentsFile == null && filter.getChromedContentMode() != CHROMED_METADATA_ONLY)
                 {
                   // It's a reference!  Add it.
                   String[] dataNames = new String[]{"pubdate","title","source","category","description"};
@@ -4746,20 +4788,38 @@ public class RSSConnector extends org.ap
                   }
                   if (descriptionField != null)
                     dataValues[5] = new String[]{descriptionField};
-                    
-                  CharacterInput ci = new TempFileCharacterInput(contentsFile);
-                  try
+                  
+                  if (contentsFile == null)
                   {
-                    contentsFile = null;
-
-                    dataValues[4] = new Object[]{ci};
+                    CharacterInput ci = new NullCharacterInput();
+                    try
+                    {
+                      dataValues[4] = new Object[]{ci};
 
-                    // Add document reference, including the data to pass down, and the dechromed
content too
-                    activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                      // Add document reference, including the data to pass down, and the
dechromed content too
+                      activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                    }
+                    finally
+                    {
+                      ci.discard();
+                    }
                   }
-                  finally
+                  else
                   {
-                    ci.discard();
+                    CharacterInput ci = new TempFileCharacterInput(contentsFile);
+                    try
+                    {
+                      contentsFile = null;
+
+                      dataValues[4] = new Object[]{ci};
+
+                      // Add document reference, including the data to pass down, and the
dechromed content too
+                      activities.addDocumentReference(newIdentifier,documentIdentifier,null,dataNames,dataValues,origDate);
+                    }
+                    finally
+                    {
+                      ci.discard();
+                    }
                   }
                 }
               }
@@ -6124,6 +6184,8 @@ public class RSSConnector extends org.ap
               chromedContentMode = CHROMED_USE;
             else if (mode.equals("skip"))
               chromedContentMode = CHROMED_SKIP;
+            else if (mode.equals("metadata"))
+              chromedContentMode = CHROMED_METADATA_ONLY;
           }
         }
       }

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_en_US.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_en_US.properties?rev=1429250&r1=1429249&r2=1429250&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_en_US.properties
(original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_en_US.properties
Sat Jan  5 10:46:02 2013
@@ -76,7 +76,8 @@ RSSConnector.NoDechromedContent=No dechr
 RSSConnector.DechromedContentIfPresentInDescriptionField=Dechromed content, if present, in
'description' field
 RSSConnector.DechromedContentIfPresentInContentField=Dechromed content, if present, in 'content'
field
 RSSConnector.UseChromedContentIfNoDechromedContentFound=Use chromed content if no dechromed
content found
-RSSConnector.NeverUseChromedContent=Never use chromed content
+RSSConnector.NeverUseChromedContent=Skip documents if dechromed content unavailable
+RSSConnector.NoContentMetadataOnly=Include only metadata if dechromed content unavailable
 RSSConnector.DeleteToken=Delete token #
 RSSConnector.AddAccessToken=Add access token
 RSSConnector.DeleteMetadata=Delete metadata #

Modified: manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_ja_JP.properties
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_ja_JP.properties?rev=1429250&r1=1429249&r2=1429250&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_ja_JP.properties
(original)
+++ manifoldcf/trunk/connectors/rss/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/rss/common_ja_JP.properties
Sat Jan  5 10:46:02 2013
@@ -76,7 +76,9 @@ RSSConnector.NoDechromedContent=デã
 RSSConnector.DechromedContentIfPresentInDescriptionField=デクロムコンテンツが項目「切見え」にある場合
 RSSConnector.DechromedContentIfPresentInContentField=デクロムコンテンツがある項目「コンテンツ」にある場合
 RSSConnector.UseChromedContentIfNoDechromedContentFound=クロムコンテンツが見つからない場合はクロムコンテンツを使う
-RSSConnector.NeverUseChromedContent=クロムコンテンツは使わない
+#RSSConnector.NeverUseChromedContent=クロムコンテンツは使わない
+RSSConnector.NeverUseChromedContent=Skip documents if dechromed content unavailable
+RSSConnector.NoContentMetadataOnly=Include only metadata if dechromed content unavailable
 RSSConnector.DeleteToken=トークンを削除: #
 RSSConnector.AddAccessToken=アクセストークンを追加
 RSSConnector.DeleteMetadata=メタデータを削除: #

Added: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java?rev=1429250&view=auto
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
(added)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
Sat Jan  5 10:46:02 2013
@@ -0,0 +1,117 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.interfaces;
+
+import java.io.*;
+
+import org.apache.manifoldcf.core.system.ManifoldCF;
+
+/** This class represents a null character stream, which has no characters.
+*/
+public class NullCharacterInput extends CharacterInput
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  /** Construct from nothing.
+  */
+  public NullCharacterInput()
+  {
+    super();
+  }
+
+  @Override
+  public Reader getStream()
+    throws ManifoldCFException
+  {
+    return new StringReader("");
+  }
+
+  @Override
+  public void doneWithStream()
+    throws ManifoldCFException
+  {
+  }
+
+  @Override
+  public long getCharacterLength()
+    throws ManifoldCFException
+  {
+    return 0L;
+  }
+
+  @Override
+  public String getHashValue()
+    throws ManifoldCFException
+  {
+    return ManifoldCF.getHashValue(ManifoldCF.startHash());
+  }
+
+  /** Open a Utf8 stream directly */
+  @Override
+  public InputStream getUtf8Stream()
+    throws ManifoldCFException
+  {
+    return new ByteArrayInputStream(new byte[]{});
+  }
+
+  /** Transfer to a new object; this causes the current object to become "already discarded"
*/
+  @Override
+  public CharacterInput transfer()
+  {
+    return new NullCharacterInput();
+  }
+
+  /** Discard this object permanently */
+  @Override
+  public void discard()
+    throws ManifoldCFException
+  {
+  }
+
+  // Protected methods
+
+  /** Open a reader, for use by a caller, until closeStream is called */
+  @Override
+  protected void openStream()
+    throws ManifoldCFException
+  {
+  }
+
+  /** Close any open reader */
+  @Override
+  protected void closeStream()
+    throws ManifoldCFException
+  {
+  }
+
+  /** Calculate the datum's length in characters */
+  @Override
+  protected void calculateLength()
+    throws ManifoldCFException
+  {
+  }
+
+  /** Calculate the datum's hash value */
+  @Override
+  protected void calculateHashValue()
+    throws ManifoldCFException
+  {
+  }
+
+}

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/NullCharacterInput.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java?rev=1429250&r1=1429249&r2=1429250&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
(original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
Sat Jan  5 10:46:02 2013
@@ -134,6 +134,7 @@ public class TempFileCharacterInput exte
   }
 
   /** Open a Utf8 stream directly from the backing file */
+  @Override
   public InputStream getUtf8Stream()
     throws ManifoldCFException
   {
@@ -151,6 +152,7 @@ public class TempFileCharacterInput exte
     return null;
   }
 
+  @Override
   protected void openStream()
     throws ManifoldCFException
   {
@@ -171,6 +173,7 @@ public class TempFileCharacterInput exte
   }
 
   /** Transfer to a new object; this causes the current object to become "already discarded"
*/
+  @Override
   public CharacterInput transfer()
   {
     // Create a new TempFileCharacterInput object, and fill it with our current stuff
@@ -186,6 +189,7 @@ public class TempFileCharacterInput exte
     return rval;
   }
 
+  @Override
   public void discard()
     throws ManifoldCFException
   {
@@ -199,6 +203,7 @@ public class TempFileCharacterInput exte
   }
 
   /** Calculate the datum's length in characters */
+  @Override
   protected void calculateLength()
     throws ManifoldCFException
   {
@@ -206,6 +211,7 @@ public class TempFileCharacterInput exte
   }
 
   /** Calculate the datum's hash value */
+  @Override
   protected void calculateHashValue()
     throws ManifoldCFException
   {



Mime
View raw message