manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1791549 - in /manifoldcf/branches/release-2.7-branch: ./ connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/
Date Sat, 15 Apr 2017 21:26:14 GMT
Author: kwright
Date: Sat Apr 15 21:26:14 2017
New Revision: 1791549

URL: http://svn.apache.org/viewvc?rev=1791549&view=rev
Log:
Pull up fix for CONNECTORS-1410 from trunk.

Modified:
    manifoldcf/branches/release-2.7-branch/   (props changed)
    manifoldcf/branches/release-2.7-branch/CHANGES.txt
    manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
    manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java

Propchange: manifoldcf/branches/release-2.7-branch/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sat Apr 15 21:26:14 2017
@@ -142,4 +142,4 @@
 /manifoldcf/branches/CONNECTORS-981:1605049-1605773
 /manifoldcf/branches/CONNECTORS-989:1611600-1612101
 /manifoldcf/branches/CONNECTORS-990:1610284-1610707
-/manifoldcf/trunk:1791542
+/manifoldcf/trunk:1791542,1791548

Modified: manifoldcf/branches/release-2.7-branch/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.7-branch/CHANGES.txt?rev=1791549&r1=1791548&r2=1791549&view=diff
==============================================================================
--- manifoldcf/branches/release-2.7-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-2.7-branch/CHANGES.txt Sat Apr 15 21:26:14 2017
@@ -3,9 +3,12 @@ $Id$
 
 ======================= Release 2.7 =====================
 
+CONNECTORS-1410: Body is used as content at emails.
+(Furkan KAMACI)
+
 CONNECTORS-1408: Insure that there's a non-null document name in
 the Solr connector, otherwise there will be no multipart post.
-(Cihad Gozel, Karl Wright)
+(Cihad Guzel, Karl Wright)
 
 CONNECTORS-1409: Fix re-processing email bug.
 (Furkan KAMACI)

Modified: manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java?rev=1791549&r1=1791548&r2=1791549&view=diff
==============================================================================
--- manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
(original)
+++ manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConfig.java
Sat Apr 15 21:26:14 2017
@@ -76,7 +76,7 @@ public class EmailConfig {
   
   public static final String PROTOCOL_DEFAULT_VALUE = "IMAP";
   public static final String PORT_DEFAULT_VALUE = "";
-  public static final String[] BASIC_METADATA = {"To","From","Subject","Body","Date","Encoding
of Attachment",
+  public static final String[] BASIC_METADATA = {"To","From","Subject","Date","Encoding of
Attachment",
       "MIME Type of attachment", "File Name of Attachment"};
   public static final String BASIC_EXTRACT_EMAIL = "Use E-Mail Extractor";
   public static final String[] BASIC_SEARCHABLE_ATTRIBUTES = {"To","From","Subject","Body","Start
Date", "End Date"};

Modified: manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java?rev=1791549&r1=1791548&r2=1791549&view=diff
==============================================================================
--- manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
(original)
+++ manifoldcf/branches/release-2.7-branch/connectors/email/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/email/EmailConnector.java
Sat Apr 15 21:26:14 2017
@@ -33,9 +33,11 @@ import javax.mail.*;
 import javax.mail.internet.MimeBodyPart;
 import javax.mail.internet.MimeMessage;
 import javax.mail.search.*;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InterruptedIOException;
+import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.*;
@@ -593,7 +595,7 @@ public class EmailConnector extends org.
               rd.setMimeType(mimeType);
               rd.setCreatedDate(sentDate);
               rd.setModifiedDate(sentDate);
-              
+
               for (String metadata : requiredMetadata) {
                 if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) {
                   Address[] to = msg.getRecipients(Message.RecipientType.TO);
@@ -616,25 +618,6 @@ public class EmailConnector extends org.
                 } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT))
{
                   String subject = msg.getSubject();
                   rd.addField(EmailConfig.EMAIL_SUBJECT, subject);
-                } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_BODY))
{
-                  Object o = msg.getContent();
-                  if (o instanceof Multipart) {
-                    Multipart mp = (Multipart) msg.getContent();
-                    for (int k = 0, n = mp.getCount(); k < n; k++) {
-                      Part part = mp.getBodyPart(k);
-                      String disposition = part.getDisposition();
-                      if ((disposition == null)) {
-                        MimeBodyPart mbp = (MimeBodyPart) part;
-                        if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
-                          rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
-                        } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
-                          rd.addField(EmailConfig.EMAIL_BODY, mbp.getContent().toString());
//handle html accordingly. Returns content with html tags
-                        }
-                      }
-                    }
-                  } else if (o instanceof String) {
-                    rd.addField(EmailConfig.EMAIL_BODY, (String)o);
-                  }
                 } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE))
{
                   rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString());
                 } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING))
{
@@ -696,8 +679,10 @@ public class EmailConnector extends org.
                   }
                 }
               }
-                  
-              InputStream is = msg.getInputStream();
+
+              //Content includes both body and attachments,
+              //Body will be set as content and attachments will be indexed as separate documents.
+              InputStream is = new ByteArrayInputStream(extractBodyContent(msg).getBytes(StandardCharsets.UTF_8));
               try {
                 rd.setBinary(is, fileLength);
                 activities.ingestDocumentWithException(documentIdentifier, version, msgURL,
rd);
@@ -706,7 +691,7 @@ public class EmailConnector extends org.
               } finally {
                 is.close();
               }
-              
+
               // If we're supposed to deal with attachments, this is the time to queue them
up
               if (attachmentUrlTemplate != null) {
                 if (msg.getContent() != null && msg.getContent() instanceof Multipart)
{
@@ -932,16 +917,39 @@ public class EmailConnector extends org.
 
   }
 
-    /**
-     * Checks whether a Part is an attachment or not
-     * @param part Part to check
-     * @return is attachment or not
-     */
+  private String extractBodyContent(Message msg) throws MessagingException, IOException {
+    String bodyContent = null;
+    Object o = msg.getContent();
+    if (o instanceof Multipart) {
+      Multipart mp = (Multipart) msg.getContent();
+      for (int k = 0, n = mp.getCount(); k < n; k++) {
+        Part part = mp.getBodyPart(k);
+        String disposition = part.getDisposition();
+        if ((disposition == null)) {
+          MimeBodyPart mbp = (MimeBodyPart) part;
+          if (mbp.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) {
+            bodyContent = mbp.getContent().toString();
+          } else if (mbp.isMimeType(EmailConfig.MIMETYPE_HTML)) {
+            bodyContent = mbp.getContent().toString(); //handle html accordingly. Returns
content with html tags
+          }
+        }
+      }
+    } else if (o instanceof String) {
+      bodyContent = (String)o;
+    }
+    return bodyContent;
+  }
+
+  /**
+  * Checks whether a Part is an attachment or not
+  * @param part Part to check
+  * @return is attachment or not
+  */
   private boolean isAttachment(Part part) throws MessagingException {
-      String disposition = part.getDisposition();
-      return ((disposition != null)
-           && ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT)
-           || (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE)))));
+    String disposition = part.getDisposition();
+    return ((disposition != null)
+        && ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT)
+        || (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE)))));
   }
 
   /**



Mime
View raw message