manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1630188 [2/2] - in /manifoldcf/trunk: ./ connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/ connectors/alfresco-webscript/connector/src/test/java/org/apache/manifoldcf/crawler/...
Date Wed, 08 Oct 2014 17:54:48 GMT
Modified: manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java (original)
+++ manifoldcf/trunk/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java Wed Oct  8 17:54:47 2014
@@ -4185,466 +4185,485 @@ public class LivelinkConnector extends o
     try
     {
       // Check URL first
-      if (activities.checkURLIndexable(viewHttpAddress))
+      if (!activities.checkURLIndexable(viewHttpAddress))
       {
+        // Document not ingestable due to URL
+        resultDescription = "URL ("+viewHttpAddress+") was rejected by output connector";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its URL ("+viewHttpAddress+") was rejected by output connector");
+        resultCode = "URLEXCLUSION";
+        activities.noDocument(documentIdentifier,version);
+        return;
+      }
+      
+      // Add general metadata
+      ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
+      VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
+      if (!objInfo.exists())
+      {
+        resultCode = "OBJECTNOTFOUND";
+        Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
+        return;
+      }
+      if (!versInfo.exists())
+      {
+        resultCode = "VERSIONNOTFOUND";
+        Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
+        return;
+      }
 
-        // Add general metadata
-        ObjectInformation objInfo = llc.getObjectInformation(vol,objID);
-        VersionInformation versInfo = llc.getVersionInformation(vol,objID,0);
-        if (!objInfo.exists())
-        {
-          resultCode = "OBJECTNOTFOUND";
-          Logging.connectors.debug("Livelink: No object "+contextMsg+": not ingesting");
-          return;
-        }
-        if (!versInfo.exists())
-        {
-          resultCode = "VERSIONNOTFOUND";
-          Logging.connectors.debug("Livelink: No version data "+contextMsg+": not ingesting");
-          return;
-        }
+      String mimeType = versInfo.getMimeType();
+      if (!activities.checkMimeTypeIndexable(mimeType))
+      {
+        // Document not indexable because of its mime type
+        resultDescription = "Mime type ("+mimeType+") was rejected by output connector";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its mime type ("+mimeType+") was rejected by output connector");
+        resultCode = "MIMETYPEEXCLUSION";
+        activities.noDocument(documentIdentifier,version);
+        return;
+      }
+        
+      Long dataSize = versInfo.getDataSize();
+      if (dataSize == null)
+      {
+        // Document had no length
+        resultDescription = "Document had no length";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because it had no length");
+        resultCode = "DOCUMENTNOLENGTH";
+        activities.noDocument(documentIdentifier,version);
+        return;
+      }
+      
+      if (!activities.checkLengthIndexable(dataSize.longValue()))
+      {
+        // Document not indexable because of its length
+        resultDescription = "Document length ("+dataSize+") was rejected by output connector";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its length ("+dataSize+") was rejected by output connector");
+        resultCode = "DOCUMENTTOOLONG";
+        activities.noDocument(documentIdentifier,version);
+        return;
+      }
 
-        String mimeType = versInfo.getMimeType();
-        if (activities.checkMimeTypeIndexable(mimeType))
-        {
-          Long dataSize = versInfo.getDataSize();
-          if (dataSize != null && activities.checkLengthIndexable(dataSize.longValue()))
-          {
-            String fileName = versInfo.getFileName();
-            Date creationDate = objInfo.getCreationDate();
-            Date modifyDate = versInfo.getModifyDate();
-            Integer parentID = objInfo.getParentId();
-            RepositoryDocument rd = new RepositoryDocument();
+      Date modifyDate = versInfo.getModifyDate();
+      if (!activities.checkDateIndexable(modifyDate))
+      {
+        // Document not indexable because of its date
+        resultDescription = "Document date ("+modifyDate+") was rejected by output connector";
+        if (Logging.connectors.isDebugEnabled())
+          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its date ("+modifyDate+") was rejected by output connector");
+        resultCode = "DOCUMENTBADDATE";
+        activities.noDocument(documentIdentifier,version);
+        return;
+      }
+      
+      String fileName = versInfo.getFileName();
+      Date creationDate = objInfo.getCreationDate();
+      Integer parentID = objInfo.getParentId();
+      
+      
+      RepositoryDocument rd = new RepositoryDocument();
 
+      // Add general data we need for the output connector
+      if (mimeType != null)
+        rd.setMimeType(mimeType);
+      if (fileName != null)
+        rd.setFileName(fileName);
+      if (creationDate != null)
+        rd.setCreatedDate(creationDate);
+      if (modifyDate != null)
+        rd.setModifiedDate(modifyDate);
             
-            // Add general data we need for the output connector
-            if (mimeType != null)
-              rd.setMimeType(mimeType);
-            if (fileName != null)
-              rd.setFileName(fileName);
-            if (creationDate != null)
-              rd.setCreatedDate(creationDate);
-            if (modifyDate != null)
-              rd.setModifiedDate(modifyDate);
-            
-            rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
-            rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
-            if (creationDate != null)
-              rd.addField(GENERAL_CREATIONDATE_FIELD,new Long(creationDate.getTime()).toString());
-            if (modifyDate != null)
-              rd.addField(GENERAL_MODIFYDATE_FIELD,new Long(modifyDate.getTime()).toString());
-            if (parentID != null)
-              rd.addField(GENERAL_PARENTID,parentID.toString());
-            UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
-            UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
-            UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
-            if (owner != null)
-              rd.addField(GENERAL_OWNER,owner.getName());
-            if (creator != null)
-              rd.addField(GENERAL_CREATOR,creator.getName());
-            if (modifier != null)
-              rd.addField(GENERAL_MODIFIER,modifier.getName());
-
-            // Iterate over the metadata items.  These are organized by category
-            // for speed of lookup.
-
-            Iterator<MetadataItem> catIter = desc.getItems(categoryPaths);
-            while (catIter.hasNext())
+      rd.addField(GENERAL_NAME_FIELD,objInfo.getName());
+      rd.addField(GENERAL_DESCRIPTION_FIELD,objInfo.getComments());
+      if (creationDate != null)
+        rd.addField(GENERAL_CREATIONDATE_FIELD,new Long(creationDate.getTime()).toString());
+      if (modifyDate != null)
+        rd.addField(GENERAL_MODIFYDATE_FIELD,new Long(modifyDate.getTime()).toString());
+      if (parentID != null)
+        rd.addField(GENERAL_PARENTID,parentID.toString());
+      UserInformation owner = llc.getUserInformation(objInfo.getOwnerId().intValue());
+      UserInformation creator = llc.getUserInformation(objInfo.getCreatorId().intValue());
+      UserInformation modifier = llc.getUserInformation(versInfo.getOwnerId().intValue());
+      if (owner != null)
+        rd.addField(GENERAL_OWNER,owner.getName());
+      if (creator != null)
+        rd.addField(GENERAL_CREATOR,creator.getName());
+      if (modifier != null)
+        rd.addField(GENERAL_MODIFIER,modifier.getName());
+
+      // Iterate over the metadata items.  These are organized by category
+      // for speed of lookup.
+
+      Iterator<MetadataItem> catIter = desc.getItems(categoryPaths);
+      while (catIter.hasNext())
+      {
+        MetadataItem item = catIter.next();
+        MetadataPathItem pathItem = item.getPathItem();
+        if (pathItem != null)
+        {
+          int catID = pathItem.getCatID();
+          // grab the associated catversion
+          LLValue catVersion = getCatVersion(objID,catID);
+          if (catVersion != null)
+          {
+            // Go through attributes now
+            Iterator<String> attrIter = item.getAttributeNames();
+            while (attrIter.hasNext())
             {
-              MetadataItem item = catIter.next();
-              MetadataPathItem pathItem = item.getPathItem();
-              if (pathItem != null)
-              {
-                int catID = pathItem.getCatID();
-                // grab the associated catversion
-                LLValue catVersion = getCatVersion(objID,catID);
-                if (catVersion != null)
-                {
-                  // Go through attributes now
-                  Iterator<String> attrIter = item.getAttributeNames();
-                  while (attrIter.hasNext())
-                  {
-                    String attrName = attrIter.next();
-                    // Create a unique metadata name
-                    String metadataName = pathItem.getCatName()+":"+attrName;
-                    // Fetch the metadata and stuff it into the RepositoryData structure
-                    String[] metadataValue = getAttributeValue(catVersion,attrName);
-                    if (metadataValue != null)
-                      rd.addField(metadataName,metadataValue);
-                    else
-                      Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"' does not seem to exist; please correct the job");
-                  }
-                }
-
-              }
+              String attrName = attrIter.next();
+              // Create a unique metadata name
+              String metadataName = pathItem.getCatName()+":"+attrName;
+              // Fetch the metadata and stuff it into the RepositoryData structure
+              String[] metadataValue = getAttributeValue(catVersion,attrName);
+              if (metadataValue != null)
+                rd.addField(metadataName,metadataValue);
+              else
+                Logging.connectors.warn("Livelink: Metadata attribute '"+metadataName+"' does not seem to exist; please correct the job");
             }
+          }
+          
+        }
+      }
 
-            if (actualAcls != null && denyAcls != null)
-              rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,actualAcls,denyAcls);
+      if (actualAcls != null && denyAcls != null)
+        rd.setSecurity(RepositoryDocument.SECURITY_TYPE_DOCUMENT,actualAcls,denyAcls);
 
-            // Add the path metadata item into the mix, if enabled
-            String pathAttributeName = sDesc.getPathAttributeName();
-            if (pathAttributeName != null && pathAttributeName.length() > 0)
-            {
-              String pathString = sDesc.getPathAttributeValue(documentIdentifier);
-              if (pathString != null)
-              {
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+", value is '"+pathString+"'");
-                rd.addField(pathAttributeName,pathString);
-              }
-            }
+      // Add the path metadata item into the mix, if enabled
+      String pathAttributeName = sDesc.getPathAttributeName();
+      if (pathAttributeName != null && pathAttributeName.length() > 0)
+      {
+        String pathString = sDesc.getPathAttributeValue(documentIdentifier);
+        if (pathString != null)
+        {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("Livelink: Path attribute name is '"+pathAttributeName+"'"+contextMsg+", value is '"+pathString+"'");
+          rd.addField(pathAttributeName,pathString);
+        }
+      }
 
-            if (ingestProtocol != null)
-            {
-              // Use HTTP to fetch document!
-              String ingestHttpAddress = convertToIngestURI(documentIdentifier);
-              if (ingestHttpAddress != null)
-              {
+      if (ingestProtocol != null)
+      {
+        // Use HTTP to fetch document!
+        String ingestHttpAddress = convertToIngestURI(documentIdentifier);
+        if (ingestHttpAddress != null)
+        {
 
-                // Set up connection
-                HttpClient client = getInitializedClient(contextMsg);
+          // Set up connection
+          HttpClient client = getInitializedClient(contextMsg);
 
-                long currentTime;
+          long currentTime;
 
-                if (Logging.connectors.isInfoEnabled())
-                  Logging.connectors.info("Livelink: " + ingestHttpAddress);
+          if (Logging.connectors.isInfoEnabled())
+            Logging.connectors.info("Livelink: " + ingestHttpAddress);
 
 
-                HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
-                method.setHeader(new BasicHeader("Accept","*/*"));
+          HttpGet method = new HttpGet(getHost().toURI() + ingestHttpAddress);
+          method.setHeader(new BasicHeader("Accept","*/*"));
 
-                ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
-                methodThread.start();
+          ExecuteMethodThread methodThread = new ExecuteMethodThread(client,method);
+          methodThread.start();
+          try
+          {
+
+            int statusCode = methodThread.getResponseCode();
+            switch (statusCode)
+            {
+            case 500:
+            case 502:
+              Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+" with Livelink HTTP Server, retrying...");
+              throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+" error while fetching"),System.currentTimeMillis()+60000L,
+                System.currentTimeMillis()+600000L,-1,true);
+
+            case HttpStatus.SC_UNAUTHORIZED:
+              Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+" ("+contextMsg+")");
+              // Since we logged in, we should fail here if the ingestion user doesn't have access to the
+              // the document, but if we do, don't fail hard.
+              resultCode = "UNAUTHORIZED";
+              activities.noDocument(documentIdentifier,version);
+              return;
+
+            case HttpStatus.SC_OK:
+              if (Logging.connectors.isDebugEnabled())
+                Logging.connectors.debug("Livelink: Created http document connection to Livelink "+contextMsg);
+              // A non-existent content length will cause a value of -1 to be returned.  This seems to indicate that the session login did not work right.
+              if (methodThread.getResponseContentLength() >= 0)
+              {
                 try
                 {
-
-                  int statusCode = methodThread.getResponseCode();
-                  switch (statusCode)
+                  InputStream is = methodThread.getSafeInputStream();
+                  try
                   {
-                  case 500:
-                  case 502:
-                    Logging.connectors.warn("Livelink: Service interruption during fetch "+contextMsg+" with Livelink HTTP Server, retrying...");
-                    throw new ServiceInterruption("Service interruption during fetch",new ManifoldCFException(Integer.toString(statusCode)+" error while fetching"),System.currentTimeMillis()+60000L,
-                      System.currentTimeMillis()+600000L,-1,true);
-
-                  case HttpStatus.SC_UNAUTHORIZED:
-                    Logging.connectors.warn("Livelink: Document fetch unauthorized for "+ingestHttpAddress+" ("+contextMsg+")");
-                    // Since we logged in, we should fail here if the ingestion user doesn't have access to the
-                    // the document, but if we do, don't fail hard.
-                    resultCode = "UNAUTHORIZED";
-                    activities.noDocument(documentIdentifier,version);
-                    return;
-
-                  case HttpStatus.SC_OK:
-                    if (Logging.connectors.isDebugEnabled())
-                      Logging.connectors.debug("Livelink: Created http document connection to Livelink "+contextMsg);
-                    // A non-existent content length will cause a value of -1 to be returned.  This seems to indicate that the session login did not work right.
-                    if (methodThread.getResponseContentLength() >= 0)
-                    {
-                      try
-                      {
-                        InputStream is = methodThread.getSafeInputStream();
-                        try
-                        {
-                          rd.setBinary(is,dataSize);
+                    rd.setBinary(is,dataSize);
                             
-                          activities.ingestDocumentWithException(documentIdentifier,version,viewHttpAddress,rd);
-
-                          if (Logging.connectors.isDebugEnabled())
-                            Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
+                    activities.ingestDocumentWithException(documentIdentifier,version,viewHttpAddress,rd);
+                    
+                    if (Logging.connectors.isDebugEnabled())
+                      Logging.connectors.debug("Livelink: Ingesting done "+contextMsg);
 
-                        }
-                        finally
-                        {
-                          // Close stream via thread, since otherwise this can hang
-                          is.close();
-                        }
-                      }
-                      catch (java.net.SocketTimeoutException e)
-                      {
-                        resultCode = "DATATIMEOUT";
-                        resultDescription = e.getMessage();
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-                        throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-                      }
-                      catch (java.net.SocketException e)
-                      {
-                        resultCode = "DATASOCKETERROR";
-                        resultDescription = e.getMessage();
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: Livelink socket error ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-                        throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-                      }
-                      catch (javax.net.ssl.SSLHandshakeException e)
-                      {
-                        resultCode = "DATASSLHANDSHAKEERROR";
-                        resultDescription = e.getMessage();
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+": "+e.getMessage(),e);
-                        throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
-                      }
-                      catch (ConnectTimeoutException e)
-                      {
-                        resultCode = "CONNECTTIMEOUT";
-                        resultDescription = e.getMessage();
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: Livelink socket timed out connecting to the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-                        throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-                      }
-                      catch (InterruptedException e)
-                      {
-                        wasInterrupted = true;
-                        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                      }
-                      catch (InterruptedIOException e)
-                      {
-                        wasInterrupted = true;
-                        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                      }
-                      catch (HttpException e)
-                      {
-                        resultCode = "HTTPEXCEPTION";
-                        resultDescription = e.getMessage();
-                        // Treat unknown error ingesting data as a transient condition
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e);
-                        throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-                      }
-                      catch (IOException e)
-                      {
-                        resultCode = "DATAEXCEPTION";
-                        resultDescription = e.getMessage();
-                        // Treat unknown error ingesting data as a transient condition
-                        currentTime = System.currentTimeMillis();
-                        Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
-                        throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
-                      }
-                      readSize = dataSize;
-                    }
-                    else
-                    {
-                      resultCode = "SESSIONLOGINFAILED";
-                      activities.noDocument(documentIdentifier,version);
-                    }
-                    break;
-                  case HttpStatus.SC_BAD_REQUEST:
-                  case HttpStatus.SC_USE_PROXY:
-                  case HttpStatus.SC_GONE:
-                    resultCode = "ERROR "+Integer.toString(statusCode);
-                    throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
-                  default:
-                    resultCode = "UNKNOWN";
-                    Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"' received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
-                    currentTime = System.currentTimeMillis();
-                    throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new ManifoldCFException("Fetch failed with unknown code "+Integer.toString(statusCode)),
-                      currentTime+60000L,currentTime+600000L,-1,true);
                   }
-                }
-                catch (InterruptedException e)
-                {
-                  // Drop the connection on the floor
-                  methodThread.interrupt();
-                  methodThread = null;
-                  throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+                  finally
+                  {
+                    // Close stream via thread, since otherwise this can hang
+                    is.close();
+                  }
                 }
                 catch (java.net.SocketTimeoutException e)
                 {
-                  Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-                  resultCode = "TIMEOUT";
+                  resultCode = "DATATIMEOUT";
                   resultDescription = e.getMessage();
                   currentTime = System.currentTimeMillis();
-                  throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+                  Logging.connectors.warn("Livelink: Livelink socket timed out ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                  throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
                 }
                 catch (java.net.SocketException e)
                 {
-                  Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
-                  resultCode = "SOCKETERROR";
+                  resultCode = "DATASOCKETERROR";
                   resultDescription = e.getMessage();
                   currentTime = System.currentTimeMillis();
-                  throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+                  Logging.connectors.warn("Livelink: Livelink socket error ingesting from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                  throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
                 }
                 catch (javax.net.ssl.SSLHandshakeException e)
                 {
-                  currentTime = System.currentTimeMillis();
-                  Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
-                  resultCode = "SSLHANDSHAKEERROR";
+                  resultCode = "DATASSLHANDSHAKEERROR";
                   resultDescription = e.getMessage();
+                  currentTime = System.currentTimeMillis();
+                  Logging.connectors.warn("Livelink: SSL handshake failed authenticating "+contextMsg+": "+e.getMessage(),e);
                   throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
                 }
                 catch (ConnectTimeoutException e)
                 {
-                  Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
                   resultCode = "CONNECTTIMEOUT";
                   resultDescription = e.getMessage();
                   currentTime = System.currentTimeMillis();
-                  throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+                  Logging.connectors.warn("Livelink: Livelink socket timed out connecting to the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+                  throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
+                }
+                catch (InterruptedException e)
+                {
+                  wasInterrupted = true;
+                  throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
                 }
                 catch (InterruptedIOException e)
                 {
-                  methodThread.interrupt();
+                  wasInterrupted = true;
                   throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
                 }
                 catch (HttpException e)
                 {
-                  resultCode = "EXCEPTION";
+                  resultCode = "HTTPEXCEPTION";
                   resultDescription = e.getMessage();
-                  throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+                  // Treat unknown error ingesting data as a transient condition
+                  currentTime = System.currentTimeMillis();
+                  Logging.connectors.warn("Livelink: HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e);
+                  throw new ServiceInterruption("HTTP exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
                 }
                 catch (IOException e)
                 {
-                  resultCode = "EXCEPTION";
+                  resultCode = "DATAEXCEPTION";
                   resultDescription = e.getMessage();
-                  throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
-                }
-                finally
-                {
-                  if (methodThread != null)
-                  {
-                    methodThread.abort();
-                    if (!wasInterrupted)
-                    {
-                      try
-                      {
-                       methodThread.finishUp();
-                      }
-                      catch (InterruptedException e)
-                      {
-                        wasInterrupted = true;
-                        throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                      }
-                    }
-                  }
+                  // Treat unknown error ingesting data as a transient condition
+                  currentTime = System.currentTimeMillis();
+                  Logging.connectors.warn("Livelink: IO exception ingesting "+contextMsg+": "+e.getMessage(),e);
+                  throw new ServiceInterruption("IO exception ingesting "+contextMsg+": "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,false);
                 }
+                readSize = dataSize;
               }
               else
               {
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: No fetch URI "+contextMsg+" - not ingesting");
-                resultCode = "NOURI";
-                return;
+                resultCode = "SESSIONLOGINFAILED";
+                activities.noDocument(documentIdentifier,version);
               }
+              break;
+            case HttpStatus.SC_BAD_REQUEST:
+            case HttpStatus.SC_USE_PROXY:
+            case HttpStatus.SC_GONE:
+              resultCode = "ERROR "+Integer.toString(statusCode);
+              throw new ManifoldCFException("Unrecoverable request failure; error = "+Integer.toString(statusCode));
+            default:
+              resultCode = "UNKNOWN";
+              Logging.connectors.warn("Livelink: Attempt to retrieve document from '"+ingestHttpAddress+"' received a response of "+Integer.toString(statusCode)+"; retrying in one minute");
+              currentTime = System.currentTimeMillis();
+              throw new ServiceInterruption("Fetch failed; retrying in 1 minute",new ManifoldCFException("Fetch failed with unknown code "+Integer.toString(statusCode)),
+                currentTime+60000L,currentTime+600000L,-1,true);
             }
-            else
+          }
+          catch (InterruptedException e)
+          {
+            // Drop the connection on the floor
+            methodThread.interrupt();
+            methodThread = null;
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            Logging.connectors.warn("Livelink: Socket timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+            resultCode = "TIMEOUT";
+            resultDescription = e.getMessage();
+            currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("Socket timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+          }
+          catch (java.net.SocketException e)
+          {
+            Logging.connectors.warn("Livelink: Socket error reading from Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+            resultCode = "SOCKETERROR";
+            resultDescription = e.getMessage();
+            currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("Socket error: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+          }
+          catch (javax.net.ssl.SSLHandshakeException e)
+          {
+            currentTime = System.currentTimeMillis();
+            Logging.connectors.warn("Livelink: SSL handshake failed "+contextMsg+": "+e.getMessage(),e);
+            resultCode = "SSLHANDSHAKEERROR";
+            resultDescription = e.getMessage();
+            throw new ServiceInterruption("SSL handshake error: "+e.getMessage(),e,currentTime+60000L,currentTime+300000L,-1,true);
+          }
+          catch (ConnectTimeoutException e)
+          {
+            Logging.connectors.warn("Livelink: Connect timed out reading from the Livelink HTTP Server "+contextMsg+": "+e.getMessage(), e);
+            resultCode = "CONNECTTIMEOUT";
+            resultDescription = e.getMessage();
+            currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+          }
+          catch (InterruptedIOException e)
+          {
+            methodThread.interrupt();
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (HttpException e)
+          {
+            resultCode = "EXCEPTION";
+            resultDescription = e.getMessage();
+            throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+          }
+          catch (IOException e)
+          {
+            resultCode = "EXCEPTION";
+            resultDescription = e.getMessage();
+            throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+          }
+          finally
+          {
+            if (methodThread != null)
             {
-              // Use FetchVersion instead
-              long currentTime;
-              
-              // Fire up the document reading thread
-              DocumentReadingThread t = new DocumentReadingThread(vol,objID,0);
-              try 
+              methodThread.abort();
+              if (!wasInterrupted)
               {
-                t.start();
                 try
                 {
-                  InputStream is = t.getSafeInputStream();
-                  try 
-                  {
-                    // Can only index while background thread is running!
-                    rd.setBinary(is, dataSize);
-                    activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd);
-                  }
-                  finally
-                  {
-                    is.close();
-                  }
-                }
-                catch (ManifoldCFException e)
-                {
-                  if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
-                    wasInterrupted = true;
-                  throw e;
+                  methodThread.finishUp();
                 }
-                catch (java.net.SocketTimeoutException e)
-                {
-                  throw e;
-                }
-                catch (InterruptedIOException e)
+                catch (InterruptedException e)
                 {
                   wasInterrupted = true;
-                  throw e;
-                }
-                finally
-                {
-                  if (!wasInterrupted)
-                    t.finishUp();
-                }
-
-                // No errors.  Record the fact that we made it.
-                resultCode = "OK";
-                readSize = dataSize;
-              }
-              catch (InterruptedException e) 
-              {
-                t.interrupt();
-                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
-                  ManifoldCFException.INTERRUPTED);
-              }
-              catch (ConnectTimeoutException e)
-              {
-                Logging.connectors.warn("Livelink: Connect timed out "+contextMsg+": "+e.getMessage(), e);
-                resultCode = "CONNECTTIMEOUT";
-                resultDescription = e.getMessage();
-                currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
-              }
-              catch (InterruptedIOException e)
-              {
-                t.interrupt();
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-              }
-              catch (IOException e)
-              {
-                resultCode = "EXCEPTION";
-                resultDescription = e.getMessage();
-                throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
-              }
-              catch (ManifoldCFException e)
-              {
-                if (e.getErrorCode() != ManifoldCFException.INTERRUPTED)
-                {
-                  resultCode = "EXCEPTION";
-                  resultDescription = e.getMessage();
+                  throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
                 }
-                throw e;
-              }
-              catch (RuntimeException e)
-              {
-                resultCode = "EXCEPTION";
-                resultDescription = e.getMessage();
-                handleLivelinkRuntimeException(e,0,true);
               }
             }
           }
-          else
-          {
-            // Document not indexable because of its length
-            resultDescription = "Document length ("+dataSize+") was rejected by output connector";
-            if (Logging.connectors.isDebugEnabled())
-              Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its length ("+dataSize+") was rejected by output connector");
-            resultCode = "DOCUMENTTOOLONG";
-            activities.noDocument(documentIdentifier,version);
-          }
         }
         else
         {
-          // Document not indexable because of its mime type
-          resultDescription = "Mime type ("+mimeType+") was rejected by output connector";
           if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its mime type ("+mimeType+") was rejected by output connector");
-          resultCode = "MIMETYPEEXCLUSION";
-          activities.noDocument(documentIdentifier,version);
+            Logging.connectors.debug("Livelink: No fetch URI "+contextMsg+" - not ingesting");
+          resultCode = "NOURI";
+          return;
         }
       }
       else
       {
-        // Document not ingestable due to URL
-        resultDescription = "URL ("+viewHttpAddress+") was rejected by output connector";
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its URL ("+viewHttpAddress+") was rejected by output connector");
-        resultCode = "URLEXCLUSION";
-        activities.noDocument(documentIdentifier,version);
+        // Use FetchVersion instead
+        long currentTime;
+              
+        // Fire up the document reading thread
+        DocumentReadingThread t = new DocumentReadingThread(vol,objID,0);
+        try 
+        {
+          t.start();
+          try
+          {
+            InputStream is = t.getSafeInputStream();
+            try 
+            {
+              // Can only index while background thread is running!
+              rd.setBinary(is, dataSize);
+              activities.ingestDocumentWithException(documentIdentifier, version, viewHttpAddress, rd);
+            }
+            finally
+            {
+              is.close();
+            }
+          }
+          catch (ManifoldCFException e)
+          {
+            if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+              wasInterrupted = true;
+            throw e;
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            throw e;
+          }
+          catch (InterruptedIOException e)
+          {
+            wasInterrupted = true;
+            throw e;
+          }
+          finally
+          {
+            if (!wasInterrupted)
+              t.finishUp();
+          }
+
+          // No errors.  Record the fact that we made it.
+          resultCode = "OK";
+          readSize = dataSize;
+        }
+        catch (InterruptedException e) 
+        {
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+            ManifoldCFException.INTERRUPTED);
+        }
+        catch (ConnectTimeoutException e)
+        {
+          Logging.connectors.warn("Livelink: Connect timed out "+contextMsg+": "+e.getMessage(), e);
+          resultCode = "CONNECTTIMEOUT";
+          resultDescription = e.getMessage();
+          currentTime = System.currentTimeMillis();
+          throw new ServiceInterruption("Connect timed out: "+e.getMessage(),e,currentTime+300000L,currentTime+6*3600000L,-1,true);
+        }
+        catch (InterruptedIOException e)
+        {
+          t.interrupt();
+          throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+        }
+        catch (IOException e)
+        {
+          resultCode = "EXCEPTION";
+          resultDescription = e.getMessage();
+          throw new ManifoldCFException("Exception getting response "+contextMsg+": "+e.getMessage(), e);
+        }
+        catch (ManifoldCFException e)
+        {
+          if (e.getErrorCode() != ManifoldCFException.INTERRUPTED)
+          {
+            resultCode = "EXCEPTION";
+            resultDescription = e.getMessage();
+          }
+          throw e;
+        }
+        catch (RuntimeException e)
+        {
+          resultCode = "EXCEPTION";
+          resultDescription = e.getMessage();
+          handleLivelinkRuntimeException(e,0,true);
+        }
       }
     }
     finally

Propchange: manifoldcf/trunk/connectors/sharepoint/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint:r1630049-1630186

Modified: manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java (original)
+++ manifoldcf/trunk/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java Wed Oct  8 17:54:47 2014
@@ -1634,226 +1634,229 @@ public class SharePointRepository extend
     throws ManifoldCFException, ServiceInterruption
   {
     // Before we fetch, confirm that the output connector will accept the document
-    if (activities.checkURLIndexable(fileUrl))
+    if (!activities.checkURLIndexable(fileUrl))
     {
-      // Also check mime type
-      String contentType = mapExtensionToMimeType(documentIdentifier);
-      if (activities.checkMimeTypeIndexable(contentType))
+      // URL failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
+      return false;
+    }
+    
+    // Also check mime type
+    String contentType = mapExtensionToMimeType(documentIdentifier);
+    if (!activities.checkMimeTypeIndexable(contentType))
+    {
+      // Mime type failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
+      return false;
+    }
+    
+    // Now check date stamp
+    if (!activities.checkDateIndexable(modifiedDate))
+    {
+      // Date failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says date '"+((modifiedDate==null)?"null":modifiedDate)+"' is not indexable");
+      return false;
+    }
+    
+    // Set stuff up for fetch activity logging
+    long startFetchTime = System.currentTimeMillis();
+    try
+    {
+      // Read the document into a local temporary file, so I get a reliable length.
+      File tempFile = File.createTempFile("__shp__",".tmp");
+      try
       {
-        // Set stuff up for fetch activity logging
-        long startFetchTime = System.currentTimeMillis();
+        // Open the output stream
+        OutputStream os = new FileOutputStream(tempFile);
         try
         {
-          // Read the document into a local temporary file, so I get a reliable length.
-          File tempFile = File.createTempFile("__shp__",".tmp");
+          // Catch all exceptions having to do with reading the document
           try
           {
-            // Open the output stream
-            OutputStream os = new FileOutputStream(tempFile);
-            try
-            {
-              // Catch all exceptions having to do with reading the document
-              try
-              {
-                ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
-                emt.start();
-                int returnCode = emt.finishUp();
+            ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
+            emt.start();
+            int returnCode = emt.finishUp();
                   
-                if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
-                {
-                  // Well, sharepoint thought the document was there, but it really isn't, so delete it.
-                  if (Logging.connectors.isDebugEnabled())
-                    Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
-                  activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                    null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
-                  return false;
-                }
-                else if (returnCode != 200)
-                {
-                  activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                    null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
-                  throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
-                }
-
-                // Log the normal fetch activity
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Success",null,null);
-                
-              }
-              catch (InterruptedException e)
-              {
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-              }
-              catch (java.net.SocketTimeoutException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (org.apache.http.conn.ConnectTimeoutException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (InterruptedIOException e)
-              {
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-              }
-              catch (IllegalArgumentException e)
-              {
-                Logging.connectors.error("SharePoint: Illegal argument", e);
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
-              }
-              catch (org.apache.http.HttpException e)
-              {
-                Logging.connectors.warn("SharePoint: HttpException thrown",e);
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (IOException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-            }
-            finally
+            if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
             {
-              os.close();
+              // Well, sharepoint thought the document was there, but it really isn't, so delete it.
+              if (Logging.connectors.isDebugEnabled())
+                Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
+              activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
+              return false;
             }
-                      
-            // Ingest the document
-            long documentLength = tempFile.length();
-            if (activities.checkLengthIndexable(documentLength))
+            else if (returnCode != 200)
             {
-              InputStream is = new FileInputStream(tempFile);
-              try
-              {
-                RepositoryDocument data = new RepositoryDocument();
-                data.setBinary( is, documentLength );
-                
-                data.setFileName(mapToFileName(documentIdentifier));
-                          
-                if (contentType != null)
-                  data.setMimeType(contentType);
-                
-                setDataACLs(data,accessTokens,denyTokens);
-
-                setPathAttribute(data,sDesc,documentIdentifier);
-                          
-                if (modifiedDate != null)
-                  data.setModifiedDate(modifiedDate);
-                if (createdDate != null)
-                  data.setCreatedDate(createdDate);
+              activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
+              throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
+            }
 
-                if (metadataValues != null)
-                {
-                  Iterator<String> iter = metadataValues.keySet().iterator();
-                  while (iter.hasNext())
-                  {
-                    String fieldName = iter.next();
-                    String fieldData = metadataValues.get(fieldName);
-                    data.addField(fieldName,fieldData);
-                  }
-                }
-                data.addField("GUID",guid);
+            // Log the normal fetch activity
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                 
-                try
-                {
-                  activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
-                }
-                catch (IOException e)
-                {
-                  handleIOException(e,"reading document");
-                }
-                return true;
-              }
-              finally
-              {
-                try
-                {
-                  is.close();
-                }
-                catch (java.net.SocketTimeoutException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-                catch (org.apache.http.conn.ConnectTimeoutException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-                catch (InterruptedIOException e)
-                {
-                  throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                }
-                catch (IOException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-              }
-            }
-            else
-            {
-              // Document too long
-              if (Logging.connectors.isDebugEnabled())
-                Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
-              return false;
-            }
           }
-          finally
+          catch (InterruptedException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (org.apache.http.conn.ConnectTimeoutException e)
+          {
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (InterruptedIOException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (IllegalArgumentException e)
+          {
+            Logging.connectors.error("SharePoint: Illegal argument", e);
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
+          }
+          catch (org.apache.http.HttpException e)
+          {
+            Logging.connectors.warn("SharePoint: HttpException thrown",e);
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (IOException e)
           {
-            tempFile.delete();
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
           }
         }
-        catch (java.net.SocketTimeoutException e)
+        finally
         {
-          throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+          os.close();
         }
-        catch (org.apache.http.conn.ConnectTimeoutException e)
+                      
+        // Ingest the document
+        long documentLength = tempFile.length();
+        if (!activities.checkLengthIndexable(documentLength))
         {
-          throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+          // Document too long
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
+          return false;
         }
-        catch (InterruptedIOException e)
+        
+        InputStream is = new FileInputStream(tempFile);
+        try
         {
-          throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          RepositoryDocument data = new RepositoryDocument();
+          data.setBinary( is, documentLength );
+                
+          data.setFileName(mapToFileName(documentIdentifier));
+                          
+          if (contentType != null)
+            data.setMimeType(contentType);
+                
+          setDataACLs(data,accessTokens,denyTokens);
+
+          setPathAttribute(data,sDesc,documentIdentifier);
+          
+          if (modifiedDate != null)
+            data.setModifiedDate(modifiedDate);
+          if (createdDate != null)
+            data.setCreatedDate(createdDate);
+
+          if (metadataValues != null)
+          {
+            Iterator<String> iter = metadataValues.keySet().iterator();
+            while (iter.hasNext())
+            {
+              String fieldName = iter.next();
+              String fieldData = metadataValues.get(fieldName);
+              data.addField(fieldName,fieldData);
+            }
+          }
+          data.addField("GUID",guid);
+                
+          try
+          {
+            activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"reading document");
+          }
+          return true;
         }
-        catch (IOException e)
+        finally
         {
-          throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+          try
+          {
+            is.close();
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+          }
+          catch (org.apache.http.conn.ConnectTimeoutException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+          }
+          catch (InterruptedIOException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (IOException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
+          }
         }
       }
-      else
+      finally
       {
-        // Mime type failed
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
-        return false;
+        tempFile.delete();
       }
     }
-    else
+    catch (java.net.SocketTimeoutException e)
     {
-      // URL failed
-      if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
-      return false;
+      throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+    }
+    catch (org.apache.http.conn.ConnectTimeoutException e)
+    {
+      throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
+    }
+    catch (InterruptedIOException e)
+    {
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    }
+    catch (IOException e)
+    {
+      throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
     }
   }
 

Propchange: manifoldcf/trunk/connectors/wiki/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1067/connectors/wiki:r1630049-1630186

Modified: manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ manifoldcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Wed Oct  8 17:54:47 2014
@@ -3866,13 +3866,39 @@ public class WikiConnector extends org.a
               String comment = t.getComment();
               String title = t.getTitle();
               String lastModified = t.getLastModified();
+              Date modifiedDate = (lastModified==null)?null:DateParser.parseISO8601Date(lastModified);
+              String contentType = "text/plain";
+              dataSize = contentFile.length();
+
+              if (!activities.checkURLIndexable(fullURL))
+              {
+                activities.noDocument(documentIdentifier,documentVersion);
+                return;
+              }
+              
+              if (!activities.checkLengthIndexable(dataSize))
+              {
+                activities.noDocument(documentIdentifier,documentVersion);
+                return;
+              }
+              
+              if (!activities.checkMimeTypeIndexable(contentType))
+              {
+                activities.noDocument(documentIdentifier,documentVersion);
+                return;
+              }
+              
+              if (!activities.checkDateIndexable(modifiedDate))
+              {
+                activities.noDocument(documentIdentifier,documentVersion);
+                return;
+              }
               
               RepositoryDocument rd = new RepositoryDocument();
               
               // For wiki, type is always text/plain
-              rd.setMimeType("text/plain");
+              rd.setMimeType(contentType);
               
-              dataSize = contentFile.length();
               InputStream is = new FileInputStream(contentFile);
               try
               {
@@ -3886,7 +3912,7 @@ public class WikiConnector extends org.a
                 if (lastModified != null)
                 {
                   rd.addField("last-modified",lastModified);
-                  rd.setModifiedDate(DateParser.parseISO8601Date(lastModified));
+                  rd.setModifiedDate(modifiedDate);
                 }
 
                 if (allowACL != null && allowACL.length > 0) {

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java Wed Oct  8 17:54:47 2014
@@ -227,6 +227,34 @@ public class IncrementalIngester extends
     return pipelineSpecificationBasic.getStageConnectionName(pipelineSpecificationBasic.getOutputStage(0));
   }
 
+  /** Check if a date is indexable.
+  *@param pipelineSpecification is the pipeline specification.
+  *@param date is the date to check.
+  *@param activity are the activities available to this method.
+  *@return true if the mimeType is indexable.
+  */
+  @Override
+  public boolean checkDateIndexable(
+    IPipelineSpecification pipelineSpecification,
+    Date date,
+    IOutputCheckActivity activity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    PipelineObject pipeline = pipelineGrab(
+      new PipelineConnections(pipelineSpecification));
+    if (pipeline == null)
+      // A connector is not installed; treat this as a service interruption.
+      throw new ServiceInterruption("One or more connectors are not installed",0L);
+    try
+    {
+      return pipeline.checkDateIndexable(date,activity);
+    }
+    finally
+    {
+      pipeline.release();
+    }
+  }
+
   /** Check if a mime type is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param mimeType is the mime type to check.
@@ -2485,6 +2513,18 @@ public class IncrementalIngester extends
       addActivities.noDocument();
     }
 
+    /** Detect if a date is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+    * in the first place.
+    *@param date is the mime type of the document.
+    *@return true if the date can be accepted by the downstream connection.
+    */
+    @Override
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return addActivities.checkDateIndexable(date);
+    }
+
     /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
     * in the first place.
     *@param mimeType is the mime type of the document.
@@ -2562,7 +2602,14 @@ public class IncrementalIngester extends
       this.transformationConnectors = transformationConnectors;
       this.outputConnectors = outputConnectors;
     }
-    
+
+    public boolean checkDateIndexable(Date date, IOutputCheckActivity finalActivity)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      PipelineCheckFanout entryPoint = buildCheckPipeline(finalActivity);
+      return entryPoint.checkDateIndexable(date);
+    }
+
     public boolean checkMimeTypeIndexable(String mimeType, IOutputCheckActivity finalActivity)
       throws ManifoldCFException, ServiceInterruption
     {
@@ -2824,6 +2871,19 @@ public class IncrementalIngester extends
     }
     
     @Override
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      // OR all results
+      for (PipelineCheckEntryPoint p : entryPoints)
+      {
+        if (p.checkDateIndexable(date))
+          return true;
+      }
+      return false;
+    }
+
+    @Override
     public boolean checkMimeTypeIndexable(String mimeType)
       throws ManifoldCFException, ServiceInterruption
     {
@@ -2894,6 +2954,12 @@ public class IncrementalIngester extends
       this.checkActivity = checkActivity;
     }
     
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,checkActivity);
+    }
+
     public boolean checkMimeTypeIndexable(String mimeType)
       throws ManifoldCFException, ServiceInterruption
     {
@@ -2948,6 +3014,19 @@ public class IncrementalIngester extends
     }
     
     @Override
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      // OR all results
+      for (PipelineAddEntryPoint p : entryPoints)
+      {
+        if (p.checkDateIndexable(date))
+          return true;
+      }
+      return false;
+    }
+
+    @Override
     public boolean checkMimeTypeIndexable(String mimeType)
       throws ManifoldCFException, ServiceInterruption
     {
@@ -3137,6 +3216,12 @@ public class IncrementalIngester extends
     {
       return isActive;
     }
+
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return pipelineConnector.checkDateIndexable(pipelineDescriptionString,date,addActivity);
+    }
     
     public boolean checkMimeTypeIndexable(String mimeType)
       throws ManifoldCFException, ServiceInterruption
@@ -3782,6 +3867,18 @@ public class IncrementalIngester extends
       activities.recordActivity(startTime,activityType,dataSize,entityURI,resultCode,resultDescription);
     }
 
+    /** Detect if a date is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+    * in the first place.
+    *@param date is the date of the document.
+    *@return true if the document described by the date can be accepted by the downstream connection.
+    */
+    @Override
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return activities.checkDateIndexable(date);
+    }
+
     /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
     * in the first place.
     *@param mimeType is the mime type of the document.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java Wed Oct  8 17:54:47 2014
@@ -90,6 +90,18 @@ public interface IIncrementalIngester
   public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
+  /** Check if a document date is indexable.
+  *@param pipelineSpecification is the pipeline specification.
+  *@param date is the date to check
+  *@param activity are the activities available to this method.
+  *@return true if the document with that date is indexable.
+  */
+  public boolean checkDateIndexable(
+    IPipelineSpecification pipelineSpecification,
+    Date date,
+    IOutputCheckActivity activity)
+    throws ManifoldCFException, ServiceInterruption;
+
   /** Check if a mime type is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param mimeType is the mime type to check.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputCheckActivity.java Wed Oct  8 17:54:47 2014
@@ -22,6 +22,7 @@ import org.apache.manifoldcf.core.interf
 import org.apache.manifoldcf.agents.interfaces.*;
 
 import java.io.*;
+import java.util.*;
 
 /** This interface abstracts from the activities that a transformation connector can do
 when checking a document.
@@ -30,6 +31,14 @@ public interface IOutputCheckActivity
 {
   public static final String _rcsid = "@(#)$Id$";
 
+  /** Detect if a document date is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param date is the date of the document.
+  *@return true if the document with that date can be accepted by the downstream connection.
+  */
+  public boolean checkDateIndexable(Date date)
+    throws ManifoldCFException, ServiceInterruption;
+
   /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
   * in the first place.
   *@param mimeType is the mime type of the document.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java Wed Oct  8 17:54:47 2014
@@ -57,6 +57,16 @@ public interface IPipelineConnector exte
   public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
+  /** Detect if a document date is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param date is the date of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the document with that date can be accepted by this connector.
+  */
+  public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption;
+
   /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
   * in the first place.
   *@param pipelineDescription is the document's pipeline version string, for this connection.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java Wed Oct  8 17:54:47 2014
@@ -81,6 +81,20 @@ public abstract class BaseOutputConnecto
     // The base implementation does nothing here.
   }
 
+  /** Detect if a document date is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param date is the date of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the document with that date can be accepted by this connector.
+  */
+  @Override
+  public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return true;
+  }
+
   /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
   * in the first place.
   *@param pipelineDescription is the document's pipeline version string, for this connection.

Modified: manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java (original)
+++ manifoldcf/trunk/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java Wed Oct  8 17:54:47 2014
@@ -70,6 +70,20 @@ public abstract class BaseTransformation
     return false;
   }
 
+  /** Detect if a document date is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param date is the date of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the document with that date can be accepted by this connector.
+  */
+  @Override
+  public boolean checkDateIndexable(VersionContext pipelineDescription, Date date, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return checkActivity.checkDateIndexable(date);
+  }
+
   /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
   * in the first place.
   *@param pipelineDescription is the document's pipeline version string, for this connection.

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IFingerprintActivity.java Wed Oct  8 17:54:47 2014
@@ -21,6 +21,7 @@ package org.apache.manifoldcf.crawler.in
 import org.apache.manifoldcf.core.interfaces.*;
 import org.apache.manifoldcf.agents.interfaces.*;
 import java.io.*;
+import java.util.*;
 
 /** This interface abstracts from the activities that handle document fingerprinting and mime type acceptance.
 */
@@ -28,6 +29,14 @@ public interface IFingerprintActivity
 {
   public static final String _rcsid = "@(#)$Id: IFingerprintActivity.java 988245 2010-08-23 18:39:35Z kwright $";
 
+  /** Detect if a date is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
+  * unusable documents that will be passed to this output connector.
+  *@param date is the date of the document; may be null
+  *@return true if a document with that date is indexable by this connector.
+  */
+  public boolean checkDateIndexable(Date date)
+    throws ManifoldCFException, ServiceInterruption;
+
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param mimeType is the mime type of the document.

Modified: manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1630188&r1=1630187&r2=1630188&view=diff
==============================================================================
--- manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java (original)
+++ manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java Wed Oct  8 17:54:47 2014
@@ -1907,6 +1907,19 @@ public class WorkerThread extends Thread
       abortSet.add(localIdentifier);
     }
 
+    /** Detect if a date is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
+    * unusable documents that will be passed to this output connector.
+    *@param date is the date of the document; may be null
+    *@return true if a document with that date is indexable by this connector.
+    */
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return ingester.checkDateIndexable(
+        pipelineSpecification,date,
+        ingestLogger);
+    }
+
     /** Check whether a mime type is indexable by the currently specified output connector.
     *@param mimeType is the mime type to check, not including any character set specification.
     *@return true if the mime type is indexable.
@@ -2318,6 +2331,18 @@ public class WorkerThread extends Thread
     {
     }
 
+    /** Detect if a date is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+    * in the first place.
+    *@param date is the document's date
+    *@return true if the document with that date can be accepted by the downstream connection.
+    */
+    @Override
+    public boolean checkDateIndexable(Date date)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return false;
+    }
+
     /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
     * in the first place.
     *@param mimeType is the mime type of the document.



Mime
View raw message