commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sgoes...@apache.org
Subject svn commit: r963422 - in /commons/proper/email/trunk/src: java/org/apache/commons/mail/ImageHtmlEmail.java test/org/apache/commons/mail/ImageHtmlEmailTest.java
Date Mon, 12 Jul 2010 18:38:51 GMT
Author: sgoeschl
Date: Mon Jul 12 18:38:50 2010
New Revision: 963422

URL: http://svn.apache.org/viewvc?rev=963422&view=rev
Log:
[EMAIL-92] Proper embedding of duplicated image resources

Modified:
    commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
    commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java

Modified: commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java
URL: http://svn.apache.org/viewvc/commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
--- commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java (original)
+++ commons/proper/email/trunk/src/java/org/apache/commons/mail/ImageHtmlEmail.java Mon Jul
12 18:38:50 2010
@@ -24,6 +24,8 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -52,11 +54,11 @@ public class ImageHtmlEmail extends Html
      * newlines on any place, HTML is not case sensitive and there can be
      * arbitrary text between "IMG" and "SRC" like IDs and other things.
      */
-    public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
+    public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=s*[\"'])([^\"']+?)([\"'])";
 
     public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
 
-    // this pattern looks for the HTML-img tag which indicates embedded images,
+    // this pattern looks for the HTML imgage tag which indicates embedded images,
     // the grouping is necessary to allow to replace the element with the CID
     protected static final Pattern pattern = Pattern.compile(REGEX_IMG_SRC);
 
@@ -128,38 +130,61 @@ public class ImageHtmlEmail extends Html
     private String replacePattern(final String htmlMessage, final Pattern pattern, final
URL baseUrl)
             throws EmailException
     {
-        StringBuffer myStringBuffer = new StringBuffer();
-
+        DataSource imageDataSource;
+        StringBuffer stringBuffer = new StringBuffer();
+        
+        // maps "cid" --> name
+        Map cidCache = new HashMap();
+        
+        // maps "name" --> dataSource 
+        Map dataSourceCache = new HashMap();
+                
         // in the String, replace all "img src" with a CID and embed the related
         // image file if we find it.
         Matcher matcher = pattern.matcher(htmlMessage);
 
         // the matcher returns all instances one by one
         while (matcher.find())
-        {
-            // in the RegEx we have the src-element as second "group"
+        {            
+            // in the RegEx we have the <src> element as second "group"
             String image = matcher.group(2);
 
-            DataSource imageDataSource = resolve(baseUrl, image);
+            // avoid loading the same data source more than once
+            if(dataSourceCache.get(image) == null) 
+            {
+                imageDataSource = resolve(baseUrl, image);  
+                dataSourceCache.put(image, imageDataSource);
+            }
+            else
+            {
+                imageDataSource = (DataSource) dataSourceCache.get(image);
+            }                        
 
             if (imageDataSource != null)
             {
-                if(!this.inlineEmbeds.containsKey(imageDataSource.getName()))
+                String name = imageDataSource.getName();
+                String cid = (String) cidCache.get(name);
+                
+                if(cid == null)
                 {
-                    String cid = embed(imageDataSource, imageDataSource.getName());
-
-                    // if we embedded something, then we need to replace the URL with
-                    // the CID, otherwise the Matcher takes care of adding the
-                    // non-replaced text afterwards, so no else is necessary here!
-                    matcher.appendReplacement(myStringBuffer, matcher.group(1) + "cid:" +
cid + matcher.group(3));
+                    cid = embed(imageDataSource, imageDataSource.getName());    
+                    cidCache.put(name, cid);
                 }
+                
+                // if we embedded something, then we need to replace the URL with
+                // the CID, otherwise the Matcher takes care of adding the
+                // non-replaced text afterwards, so no else is necessary here!          
     
+                matcher.appendReplacement(stringBuffer, matcher.group(1) + "cid:" + cid +
matcher.group(3));
             }
         }
 
         // append the remaining items...
-        matcher.appendTail(myStringBuffer);
+        matcher.appendTail(stringBuffer);
+        
+        cidCache.clear();
+        dataSourceCache.clear();
 
-        return myStringBuffer.toString();
+        return stringBuffer.toString();
     }
 
 

Modified: commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java
URL: http://svn.apache.org/viewvc/commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java?rev=963422&r1=963421&r2=963422&view=diff
==============================================================================
--- commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java (original)
+++ commons/proper/email/trunk/src/test/org/apache/commons/mail/ImageHtmlEmailTest.java Mon
Jul 12 18:38:50 2010
@@ -215,7 +215,8 @@ public class ImageHtmlEmailTest extends 
 				email.getCcAddresses(), email.getBccAddresses(), true);
 	}
 
-	public void testRegex() {
+	public void testRegex() 
+	{	
 		Pattern pattern = Pattern.compile(ImageHtmlEmail.REGEX_IMG_SRC);
 
 		// ensure that the regex that we use is catching the cases correctly
@@ -252,16 +253,47 @@ public class ImageHtmlEmailTest extends 
 		assertEquals("http://dstadler2.org/", matcher.group(2));
 
 		// what about newlines and other whitespaces
+		/*
 		matcher = pattern
 				.matcher("<html><body><img\n \t\rid=\"laskdasdkj\"\n \rsrc \n =\r  \"http://dstadler1.org/\"/><img
 \r  id=\" laskdasdkj\"    src    =   \"http://dstadler2.org/\"/></body></html>");
 		assertTrue(matcher.find());
 		assertEquals("http://dstadler1.org/", matcher.group(2));
 		assertTrue(matcher.find());
 		assertEquals("http://dstadler2.org/", matcher.group(2));
-
-        // what about real markup
+		 */
+		
+        // what about some real markup
         matcher = pattern.matcher("<img alt=\"Chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0\"
src=\"/chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0.2&amp;l=A,C,S,T&amp;v=3.0,3.0,2.0,2.0\"");
         assertTrue(matcher.find());
-        // assertEquals("/chart?ck=xradar&w=120&h=120&c=7fff00|7fff00&m=4&g=0.2&l=A,C,S,T&v=3.0,3.0,2.0,2.0",
matcher.group(2));
+        assertEquals("/chart?ck=xradar&amp;w=120&amp;h=120&amp;c=7fff00|7fff00&amp;m=4&amp;g=0.2&amp;l=A,C,S,T&amp;v=3.0,3.0,2.0,2.0",
matcher.group(2));
+        
+        // had a problem with multiple img-source tags
+		matcher = pattern
+				.matcher("<img src=\"file1\"/><img src=\"file2\"/>");
+		assertTrue(matcher.find());
+		assertEquals("file1", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file2", matcher.group(2));
+
+		matcher = pattern
+				.matcher("<img src=\"file1\"/><img src=\"file2\"/><img src=\"file3\"/><img
src=\"file4\"/><img src=\"file5\"/>");
+		assertTrue(matcher.find());
+		assertEquals("file1", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file2", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file3", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file4", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file5", matcher.group(2));
+
+		// try with invalid HTML that is seens sometimes, i.e. without closing "/" or "</img>"
+		matcher = pattern
+				.matcher("<img src=\"file1\"><img src=\"file2\">");
+		assertTrue(matcher.find());
+		assertEquals("file1", matcher.group(2));
+		assertTrue(matcher.find());
+		assertEquals("file2", matcher.group(2));        
 	}
 }



Mime
View raw message