manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1630077 - in /manifoldcf/branches/CONNECTORS-1067/connectors: alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/ cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis...
Date Wed, 08 Oct 2014 12:05:03 GMT
Author: kwright
Date: Wed Oct  8 12:05:03 2014
New Revision: 1630077

URL: http://svn.apache.org/r1630077
Log:
Hook up date check in alfresco-webscript, cmis, and sharepoint connectors

Modified:
    manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
    manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
    manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

Modified: manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/alfresco-webscript/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfrescowebscript/AlfrescoConnector.java
Wed Oct  8 12:05:03 2014
@@ -271,11 +271,16 @@ public class AlfrescoConnector extends B
           continue;
         }
         
-        if (mimeType != null && !activities.checkMimeTypeIndexable(mimeType)) {
+        if (!activities.checkMimeTypeIndexable(mimeType)) {
           activities.noDocument(doc, documentVersion);
           continue;
         }
 
+        if (!activities.checkDateIndexable(modifiedDate)) {
+          activities.noDocument(doc, documentVersion);
+          continue;
+        }
+        
         RepositoryDocument rd = new RepositoryDocument();
         rd.addField(FIELD_NODEREF, nodeRef);
         rd.addField(FIELD_TYPE, type);

Modified: manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
(original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
Wed Oct  8 12:05:03 2014
@@ -1143,171 +1143,198 @@ public class CmisRepositoryConnector ext
             activities.addDocumentReference(child.getId(), documentIdentifier,
                 RELATIONSHIP_CHILD);
           }
-      } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)){
-        // content ingestion
+        } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)) {
+          // content ingestion
 
-        Document document = (Document) cmisObject;
-        long fileLength;
-        InputStream is;
-        try {
-          fileLength = document.getContentStreamLength();
-          if (fileLength > 0)
-            is = document.getContentStream().getStream();
-          else
-            is = null;
-        } catch (CmisObjectNotFoundException e) {
-          // Document gone
-          activities.deleteDocument(documentIdentifier);
-          continue;
-        }
+          Document document = (Document) cmisObject;
           
-        try {
-          RepositoryDocument rd = new RepositoryDocument();
           Date createdDate = document.getCreationDate().getTime();
           Date modifiedDate = document.getLastModificationDate().getTime();
-            
-          rd.setFileName(document.getContentStreamFileName());
-          rd.setMimeType(document.getContentStreamMimeType());
+          long fileLength = document.getContentStreamLength();
+          String fileName = document.getContentStreamFileName();
+          String mimeType = document.getContentStreamMimeType();
+          //documentURI
+          String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+          
+          // Do any filtering (which will save us work)
+          if (!activities.checkURLIndexable(documentURI))
+          {
+            activities.noDocument(documentIdentifier,versionString);
+            continue;
+          }
+          
+          if (!activities.checkMimeTypeIndexable(mimeType))
+          {
+            activities.noDocument(documentIdentifier,versionString);
+            continue;
+          }
+
+          if (!activities.checkLengthIndexable(fileLength))
+          {
+            activities.noDocument(documentIdentifier,versionString);
+            continue;
+          }
+          
+          if (!activities.checkDateIndexable(modifiedDate))
+          {
+            activities.noDocument(documentIdentifier,versionString);
+            continue;
+          }
+          
+          RepositoryDocument rd = new RepositoryDocument();
+          rd.setFileName(fileName);
+          rd.setMimeType(mimeType);
           rd.setCreatedDate(createdDate);
           rd.setModifiedDate(modifiedDate);
-            
-          //binary
-          if(is != null) {
-            rd.setBinary(is, fileLength);
-          } else {
-            rd.setBinary(new NullInputStream(0),0);
+              
+          InputStream is;
+          try {
+            if (fileLength > 0)
+              is = document.getContentStream().getStream();
+            else
+              is = null;
+          } catch (CmisObjectNotFoundException e) {
+            // Document gone
+            activities.deleteDocument(documentIdentifier);
+            continue;
           }
+            
+          try {
+            //binary
+            if(is != null) {
+              rd.setBinary(is, fileLength);
+            } else {
+              rd.setBinary(new NullInputStream(0),0);
+            }
 
-          //properties
-          List<Property<?>> properties = document.getProperties();
-          String id = StringUtils.EMPTY;
-          for (Property<?> property : properties) {
-            String propertyId = property.getId();
-              
-            if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+            //properties
+            List<Property<?>> properties = document.getProperties();
+            String id = StringUtils.EMPTY;
+            for (Property<?> property : properties) {
+              String propertyId = property.getId();
                 
-              if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
-                id = (String) property.getValue();
-    
-                if (property.getValue() !=null 
-                    || property.getValues() != null) {
-                  PropertyType propertyType = property.getType();
-      
-                  switch (propertyType) {
-      
-                  case STRING:
-                  case ID:
-                  case URI:
-                  case HTML:
-                    if(property.isMultiValued()){
-                      List<String> htmlPropertyValues = (List<String>) property.getValues();
-                      for (String htmlPropertyValue : htmlPropertyValues) {
-                        rd.addField(propertyId, htmlPropertyValue);
-                      }
-                    } else {
-                      String stringValue = (String) property.getValue();
-                      if(StringUtils.isNotEmpty(stringValue)){
-                        rd.addField(propertyId, stringValue);
-                      }
-                    }
-                    break;
-           
-                  case BOOLEAN:
-                    if(property.isMultiValued()){
-                      List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
-                      for (Boolean booleanPropertyValue : booleanPropertyValues) {
-                        rd.addField(propertyId, booleanPropertyValue.toString());
-                      }
-                    } else {
-                      Boolean booleanValue = (Boolean) property.getValue();
-                      if(booleanValue!=null){
-                        rd.addField(propertyId, booleanValue.toString());
-                      }
-                    }
-                    break;
-      
-                  case INTEGER:
-                    if(property.isMultiValued()){
-                      List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
-                      for (BigInteger integerPropertyValue : integerPropertyValues) {
-                        rd.addField(propertyId, integerPropertyValue.toString());
-                      }
-                    } else {
-                      BigInteger integerValue = (BigInteger) property.getValue();
-                      if(integerValue!=null){
-                        rd.addField(propertyId, integerValue.toString());
-                      }
-                    }
-                    break;
-      
-                  case DECIMAL:
-                    if(property.isMultiValued()){
-                      List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
-                      for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
-                        rd.addField(propertyId, decimalPropertyValue.toString());
-                      }
-                    } else {
-                      BigDecimal decimalValue = (BigDecimal) property.getValue();
-                      if(decimalValue!=null){
-                        rd.addField(propertyId, decimalValue.toString());
-                      }
-                    }
-                    break;
+              if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+                  
+                if (propertyId.endsWith(Constants.PARAM_OBJECT_ID)) {
+                  id = (String) property.getValue();
       
-                  case DATETIME:
-                    if(property.isMultiValued()){
-                      List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
-                      for (GregorianCalendar datePropertyValue : datePropertyValues) {
-                        rd.addField(propertyId,
-                            ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
-                      }
-                    } else {
-                      GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
-                      if(dateValue!=null){
-                        rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
-                      }
+                  if (property.getValue() !=null 
+                      || property.getValues() != null) {
+                    PropertyType propertyType = property.getType();
+        
+                    switch (propertyType) {
+        
+                    case STRING:
+                    case ID:
+                    case URI:
+                    case HTML:
+                      if(property.isMultiValued()){
+                        List<String> htmlPropertyValues = (List<String>) property.getValues();
+                        for (String htmlPropertyValue : htmlPropertyValues) {
+                          rd.addField(propertyId, htmlPropertyValue);
+                        }
+                      } else {
+                        String stringValue = (String) property.getValue();
+                        if(StringUtils.isNotEmpty(stringValue)){
+                          rd.addField(propertyId, stringValue);
+                        }
+                      }
+                      break;
+             
+                    case BOOLEAN:
+                      if(property.isMultiValued()){
+                        List<Boolean> booleanPropertyValues = (List<Boolean>)
property.getValues();
+                        for (Boolean booleanPropertyValue : booleanPropertyValues) {
+                          rd.addField(propertyId, booleanPropertyValue.toString());
+                        }
+                      } else {
+                        Boolean booleanValue = (Boolean) property.getValue();
+                        if(booleanValue!=null){
+                          rd.addField(propertyId, booleanValue.toString());
+                        }
+                      }
+                      break;
+        
+                    case INTEGER:
+                      if(property.isMultiValued()){
+                        List<BigInteger> integerPropertyValues = (List<BigInteger>)
property.getValues();
+                        for (BigInteger integerPropertyValue : integerPropertyValues) {
+                          rd.addField(propertyId, integerPropertyValue.toString());
+                        }
+                      } else {
+                        BigInteger integerValue = (BigInteger) property.getValue();
+                        if(integerValue!=null){
+                          rd.addField(propertyId, integerValue.toString());
+                        }
+                      }
+                      break;
+        
+                    case DECIMAL:
+                      if(property.isMultiValued()){
+                        List<BigDecimal> decimalPropertyValues = (List<BigDecimal>)
property.getValues();
+                        for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+                          rd.addField(propertyId, decimalPropertyValue.toString());
+                        }
+                      } else {
+                        BigDecimal decimalValue = (BigDecimal) property.getValue();
+                        if(decimalValue!=null){
+                          rd.addField(propertyId, decimalValue.toString());
+                        }
+                      }
+                      break;
+        
+                    case DATETIME:
+                      if(property.isMultiValued()){
+                        List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>)
property.getValues();
+                        for (GregorianCalendar datePropertyValue : datePropertyValues) {
+                          rd.addField(propertyId,
+                              ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                        }
+                      } else {
+                        GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+                        if(dateValue!=null){
+                          rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                        }
+                      }
+                      break;
+        
+                    default:
+                      break;
                     }
-                    break;
-      
-                  default:
-                    break;
                   }
+                    
                 }
-                  
+                
               }
-              
             }
-          }
-          
-          //ingestion
             
-          //documentURI
-          String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
-            
-          try {
-            activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI,
rd);
-          } catch (IOException e) {
-            errorCode = "IO ERROR";
-            errorDesc = e.getMessage();
-            handleIOException(e, "reading file input stream");
-          }
-        } finally {
-          try {
-            if(is!=null){
-              is.close();
+            //ingestion
+              
+              
+            try {
+              activities.ingestDocumentWithException(documentIdentifier, versionString, documentURI,
rd);
+            } catch (IOException e) {
+              errorCode = "IO ERROR";
+              errorDesc = e.getMessage();
+              handleIOException(e, "reading file input stream");
             }
-          } catch (IOException e) {
-            errorCode = "IO ERROR";
-            errorDesc = e.getMessage();
-            handleIOException(e, "closing file input stream");
           } finally {
-            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-              fileLength, documentIdentifier, errorCode, errorDesc, null);
+            try {
+              if(is!=null){
+                is.close();
+              }
+            } catch (IOException e) {
+              errorCode = "IO ERROR";
+              errorDesc = e.getMessage();
+              handleIOException(e, "closing file input stream");
+            } finally {
+              activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+                fileLength, documentIdentifier, errorCode, errorDesc, null);
+            }
           }
         }
-      }
-      else
-        activities.deleteDocument(documentIdentifier);
+        else
+          activities.noDocument(documentIdentifier,versionString);
       }
     }
     

Modified: manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java?rev=1630077&r1=1630076&r2=1630077&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
(original)
+++ manifoldcf/branches/CONNECTORS-1067/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
Wed Oct  8 12:05:03 2014
@@ -1634,226 +1634,229 @@ public class SharePointRepository extend
     throws ManifoldCFException, ServiceInterruption
   {
     // Before we fetch, confirm that the output connector will accept the document
-    if (activities.checkURLIndexable(fileUrl))
+    if (!activities.checkURLIndexable(fileUrl))
     {
-      // Also check mime type
-      String contentType = mapExtensionToMimeType(documentIdentifier);
-      if (activities.checkMimeTypeIndexable(contentType))
+      // URL failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"'
because output connector says URL '"+fileUrl+"' is not indexable");
+      return false;
+    }
+    
+    // Also check mime type
+    String contentType = mapExtensionToMimeType(documentIdentifier);
+    if (!activities.checkMimeTypeIndexable(contentType))
+    {
+      // Mime type failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"'
because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is
not indexable");
+      return false;
+    }
+    
+    // Now check date stamp
+    if (!activities.checkDateIndexable(modifiedDate))
+    {
+      // Date failed
+      if (Logging.connectors.isDebugEnabled())
+        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"'
because output connector says date '"+((modifiedDate==null)?"null":modifiedDate)+"' is not
indexable");
+      return false;
+    }
+    
+    // Set stuff up for fetch activity logging
+    long startFetchTime = System.currentTimeMillis();
+    try
+    {
+      // Read the document into a local temporary file, so I get a reliable length.
+      File tempFile = File.createTempFile("__shp__",".tmp");
+      try
       {
-        // Set stuff up for fetch activity logging
-        long startFetchTime = System.currentTimeMillis();
+        // Open the output stream
+        OutputStream os = new FileOutputStream(tempFile);
         try
         {
-          // Read the document into a local temporary file, so I get a reliable length.
-          File tempFile = File.createTempFile("__shp__",".tmp");
+          // Catch all exceptions having to do with reading the document
           try
           {
-            // Open the output stream
-            OutputStream os = new FileOutputStream(tempFile);
-            try
-            {
-              // Catch all exceptions having to do with reading the document
-              try
-              {
-                ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
-                emt.start();
-                int returnCode = emt.finishUp();
+            ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
+            emt.start();
+            int returnCode = emt.finishUp();
                   
-                if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode
== 415)
-                {
-                  // Well, sharepoint thought the document was there, but it really isn't,
so delete it.
-                  if (Logging.connectors.isDebugEnabled())
-                    Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed
to fetch with code "+Integer.toString(returnCode)+", deleting");
-                  activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                    null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
-                  return false;
-                }
-                else if (returnCode != 200)
-                {
-                  activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                    null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
-                  throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
-                }
-
-                // Log the normal fetch activity
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Success",null,null);
-                
-              }
-              catch (InterruptedException e)
-              {
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-              }
-              catch (java.net.SocketTimeoutException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (org.apache.http.conn.ConnectTimeoutException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (InterruptedIOException e)
-              {
-                throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-              }
-              catch (IllegalArgumentException e)
-              {
-                Logging.connectors.error("SharePoint: Illegal argument", e);
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
-              }
-              catch (org.apache.http.HttpException e)
-              {
-                Logging.connectors.warn("SharePoint: HttpException thrown",e);
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-              catch (IOException e)
-              {
-                activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
-                  new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
-                Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
-                long currentTime = System.currentTimeMillis();
-                throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
-                  currentTime + 12 * 60 * 60000L,-1,true);
-              }
-            }
-            finally
+            if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode
== 415)
             {
-              os.close();
+              // Well, sharepoint thought the document was there, but it really isn't, so
delete it.
+              if (Logging.connectors.isDebugEnabled())
+                Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to
fetch with code "+Integer.toString(returnCode)+", deleting");
+              activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                null,documentIdentifier,"Not found",Integer.toString(returnCode),null);
+              return false;
             }
-                      
-            // Ingest the document
-            long documentLength = tempFile.length();
-            if (activities.checkLengthIndexable(documentLength))
+            else if (returnCode != 200)
             {
-              InputStream is = new FileInputStream(tempFile);
-              try
-              {
-                RepositoryDocument data = new RepositoryDocument();
-                data.setBinary( is, documentLength );
-                
-                data.setFileName(mapToFileName(documentIdentifier));
-                          
-                if (contentType != null)
-                  data.setMimeType(contentType);
-                
-                setDataACLs(data,accessTokens,denyTokens);
-
-                setPathAttribute(data,sDesc,documentIdentifier);
-                          
-                if (modifiedDate != null)
-                  data.setModifiedDate(modifiedDate);
-                if (createdDate != null)
-                  data.setCreatedDate(createdDate);
+              activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+                null,documentIdentifier,"Error","Http status "+Integer.toString(returnCode),null);
+              throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
+            }
 
-                if (metadataValues != null)
-                {
-                  Iterator<String> iter = metadataValues.keySet().iterator();
-                  while (iter.hasNext())
-                  {
-                    String fieldName = iter.next();
-                    String fieldData = metadataValues.get(fieldName);
-                    data.addField(fieldName,fieldData);
-                  }
-                }
-                data.addField("GUID",guid);
+            // Log the normal fetch activity
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Success",null,null);
                 
-                try
-                {
-                  activities.ingestDocumentWithException( documentIdentifier, version, fileUrl
, data );
-                }
-                catch (IOException e)
-                {
-                  handleIOException(e,"reading document");
-                }
-                return true;
-              }
-              finally
-              {
-                try
-                {
-                  is.close();
-                }
-                catch (java.net.SocketTimeoutException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Timeout before read could finish
for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-                catch (org.apache.http.conn.ConnectTimeoutException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Connect timeout before read could
finish for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-                catch (InterruptedIOException e)
-                {
-                  throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-                }
-                catch (IOException e)
-                {
-                  // This is not fatal
-                  Logging.connectors.debug("SharePoint: Server closed connection before read
could finish for '"+fileUrl+"': "+e.getMessage(),e);
-                }
-              }
-            }
-            else
-            {
-              // Document too long
-              if (Logging.connectors.isDebugEnabled())
-                Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was
too long, according to output connector");
-              return false;
-            }
           }
-          finally
+          catch (InterruptedException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (org.apache.http.conn.ConnectTimeoutException e)
+          {
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (InterruptedIOException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (IllegalArgumentException e)
+          {
+            Logging.connectors.error("SharePoint: Illegal argument", e);
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
+          }
+          catch (org.apache.http.HttpException e)
+          {
+            Logging.connectors.warn("SharePoint: HttpException thrown",e);
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
+          }
+          catch (IOException e)
           {
-            tempFile.delete();
+            activities.recordActivity(new Long(startFetchTime),ACTIVITY_FETCH,
+              new Long(tempFile.length()),documentIdentifier,"Error",e.getMessage(),null);
+            Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
+            long currentTime = System.currentTimeMillis();
+            throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"',
retrying: "+e.getMessage(),e,currentTime + 300000L,
+              currentTime + 12 * 60 * 60000L,-1,true);
           }
         }
-        catch (java.net.SocketTimeoutException e)
+        finally
         {
-          throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary
file: "+e.getMessage(),e);
+          os.close();
         }
-        catch (org.apache.http.conn.ConnectTimeoutException e)
+                      
+        // Ingest the document
+        long documentLength = tempFile.length();
+        if (!activities.checkLengthIndexable(documentLength))
         {
-          throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary
file: "+e.getMessage(),e);
+          // Document too long
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too
long, according to output connector");
+          return false;
         }
-        catch (InterruptedIOException e)
+        
+        InputStream is = new FileInputStream(tempFile);
+        try
         {
-          throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          RepositoryDocument data = new RepositoryDocument();
+          data.setBinary( is, documentLength );
+                
+          data.setFileName(mapToFileName(documentIdentifier));
+                          
+          if (contentType != null)
+            data.setMimeType(contentType);
+                
+          setDataACLs(data,accessTokens,denyTokens);
+
+          setPathAttribute(data,sDesc,documentIdentifier);
+          
+          if (modifiedDate != null)
+            data.setModifiedDate(modifiedDate);
+          if (createdDate != null)
+            data.setCreatedDate(createdDate);
+
+          if (metadataValues != null)
+          {
+            Iterator<String> iter = metadataValues.keySet().iterator();
+            while (iter.hasNext())
+            {
+              String fieldName = iter.next();
+              String fieldData = metadataValues.get(fieldName);
+              data.addField(fieldName,fieldData);
+            }
+          }
+          data.addField("GUID",guid);
+                
+          try
+          {
+            activities.ingestDocumentWithException( documentIdentifier, version, fileUrl
, data );
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"reading document");
+          }
+          return true;
         }
-        catch (IOException e)
+        finally
         {
-          throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file:
"+e.getMessage(),e);
+          try
+          {
+            is.close();
+          }
+          catch (java.net.SocketTimeoutException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"':
"+e.getMessage(),e);
+          }
+          catch (org.apache.http.conn.ConnectTimeoutException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Connect timeout before read could finish
for '"+fileUrl+"': "+e.getMessage(),e);
+          }
+          catch (InterruptedIOException e)
+          {
+            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
+          catch (IOException e)
+          {
+            // This is not fatal
+            Logging.connectors.debug("SharePoint: Server closed connection before read could
finish for '"+fileUrl+"': "+e.getMessage(),e);
+          }
         }
       }
-      else
+      finally
       {
-        // Mime type failed
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"'
because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is
not indexable");
-        return false;
+        tempFile.delete();
       }
     }
-    else
+    catch (java.net.SocketTimeoutException e)
     {
-      // URL failed
-      if (Logging.connectors.isDebugEnabled())
-        Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"'
because output connector says URL '"+fileUrl+"' is not indexable");
-      return false;
+      throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary
file: "+e.getMessage(),e);
+    }
+    catch (org.apache.http.conn.ConnectTimeoutException e)
+    {
+      throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary
file: "+e.getMessage(),e);
+    }
+    catch (InterruptedIOException e)
+    {
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    }
+    catch (IOException e)
+    {
+      throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
     }
   }
 



Mime
View raw message