poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1175887 - in /poi/trunk/src/ooxml: java/org/apache/poi/xslf/extractor/ java/org/apache/poi/xslf/usermodel/ testcases/org/apache/poi/xslf/extractor/
Date Mon, 26 Sep 2011 14:37:50 GMT
Author: nick
Date: Mon Sep 26 14:37:50 2011
New Revision: 1175887

URL: http://svn.apache.org/viewvc?rev=1175887&view=rev
Log:
XLSF text extraction improvements relating to TIKA-712

Added:
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextPlaceholder.java
Modified:
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java?rev=1175887&r1=1175886&r2=1175887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java Mon
Sep 26 14:37:50 2011
@@ -23,6 +23,8 @@ import org.apache.poi.openxml4j.exceptio
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.xslf.XSLFSlideShow;
 import org.apache.poi.xslf.usermodel.DrawingParagraph;
+import org.apache.poi.xslf.usermodel.DrawingTextBody;
+import org.apache.poi.xslf.usermodel.DrawingTextPlaceholder;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
 import org.apache.poi.xslf.usermodel.XSLFComments;
@@ -30,6 +32,7 @@ import org.apache.poi.xslf.usermodel.XSL
 import org.apache.poi.xslf.usermodel.XSLFNotes;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFSlideLayout;
 import org.apache.poi.xslf.usermodel.XSLFSlideMaster;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
@@ -124,6 +127,7 @@ public class XSLFPowerPointExtractor ext
          try {
             XSLFNotes notes = slide.getNotes();
             XSLFComments comments = slide.getComments();
+            XSLFSlideLayout layout = slide.getSlideLayout();
             XSLFSlideMaster master = slide.getMasterSheet();
 
             // TODO Do the slide's name
@@ -131,11 +135,16 @@ public class XSLFPowerPointExtractor ext
 
             // Do the slide's text if requested
             if (slideText) {
-               extractText(slide.getCommonSlideData(), text);
+               extractText(slide.getCommonSlideData(), false, text);
                
-               // If there's a master sheet and it's requested, grab text from there
-               if(masterText && master != null) {
-                  extractText(master.getCommonSlideData(), text);
+               // If requested, get text from the master and it's layout 
+               if(masterText) {
+                  if(layout != null) {
+                     extractText(layout.getCommonSlideData(), true, text);
+                  }
+                  if(master != null) {
+                     extractText(master.getCommonSlideData(), true, text);
+                  }
                }
 
                // If the slide has comments, do those too
@@ -158,7 +167,7 @@ public class XSLFPowerPointExtractor ext
 
             // Do the notes if requested
             if (notesText && notes != null) {
-               extractText(notes.getCommonSlideData(), text);
+               extractText(notes.getCommonSlideData(), false, text);
             }
          } catch (Exception e) {
             throw new RuntimeException(e);
@@ -168,10 +177,20 @@ public class XSLFPowerPointExtractor ext
       return text.toString();
    }
 	
-	private void extractText(XSLFCommonSlideData data, StringBuffer text) {
-        for (DrawingParagraph p : data.getText()) {
+	private void extractText(XSLFCommonSlideData data, boolean skipPlaceholders, StringBuffer
text) {
+	   for(DrawingTextBody textBody : data.getDrawingText()) {
+	      if(skipPlaceholders && textBody instanceof DrawingTextPlaceholder) {
+	         DrawingTextPlaceholder ph = (DrawingTextPlaceholder)textBody;
+	         if(! ph.isPlaceholderCustom()) {
+	            // Skip non-customised placeholder text
+	            continue;
+	         }
+	      }
+	      
+	      for (DrawingParagraph p : textBody.getParagraphs()) {
             text.append(p.getText());
             text.append("\n");
-        }
-    }
+	      }
+	   }
+	}
 }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java?rev=1175887&r1=1175886&r2=1175887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextBody.java Mon Sep 26
14:37:50 2011
@@ -17,16 +17,16 @@
 
 package org.apache.poi.xslf.usermodel;
 
+import java.util.List;
+
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
 
-import java.util.List;
-
 public class DrawingTextBody {
     private final CTTextBody textBody;
 
     public DrawingTextBody(CTTextBody textBody) {
-        this.textBody = textBody;
+       this.textBody = textBody;
     }
 
     public DrawingParagraph[] getParagraphs() {

Added: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextPlaceholder.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextPlaceholder.java?rev=1175887&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextPlaceholder.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTextPlaceholder.java Mon
Sep 26 14:37:50 2011
@@ -0,0 +1,57 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPlaceholder;
+import org.openxmlformats.schemas.presentationml.x2006.main.STPlaceholderType;
+
+/**
+ * A {@link DrawingTextBody} which is a placeholder
+ * @author nick
+ *
+ */
+public class DrawingTextPlaceholder extends DrawingTextBody {
+    private final CTPlaceholder placeholder;
+
+    public DrawingTextPlaceholder(CTTextBody textBody, CTPlaceholder placeholder) {
+       super(textBody);
+       this.placeholder = placeholder;
+    }
+    
+    /**
+     * What kind of placeholder is this?
+     */
+    public String getPlaceholderType() {
+       return placeholder.getType().toString();
+    }
+
+    /**
+     * What kind of placeholder is this?
+     */
+    public STPlaceholderType.Enum getPlaceholderTypeEnum() {
+       return placeholder.getType();
+    }
+
+    /**
+     * Is the PlaceHolder text customised?
+     */
+    public boolean isPlaceholderCustom() {
+       return placeholder.getHasCustomPrompt();
+    }
+}

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java?rev=1175887&r1=1175886&r2=1175887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFCommonSlideData.java Mon Sep
26 14:37:50 2011
@@ -26,6 +26,7 @@ import org.apache.xmlbeans.impl.values.X
 import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTApplicationNonVisualDrawingProps;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
 import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
@@ -42,11 +43,11 @@ public class XSLFCommonSlideData {
     public XSLFCommonSlideData(CTCommonSlideData data) {
         this.data = data;
     }
-
-    public List<DrawingParagraph> getText() {
+    
+    public List<DrawingTextBody> getDrawingText() {
         CTGroupShape gs = data.getSpTree();
 
-        List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
+        List<DrawingTextBody> out = new ArrayList<DrawingTextBody>();
 
         processShape(gs, out);
 
@@ -77,8 +78,7 @@ public class XSLFCommonSlideData {
                     for (DrawingTableRow row : table.getRows()) {
                         for (DrawingTableCell cell : row.getCells()) {
                             DrawingTextBody textBody = cell.getTextBody();
-
-                            out.addAll(Arrays.asList(textBody.getParagraphs()));
+                            out.add(textBody);
                         }
                     }
                 }
@@ -89,19 +89,31 @@ public class XSLFCommonSlideData {
 
         return out;
     }
+    public List<DrawingParagraph> getText() {
+       List<DrawingParagraph> paragraphs = new ArrayList<DrawingParagraph>();
+       for(DrawingTextBody textBody : getDrawingText()) {
+          paragraphs.addAll(Arrays.asList(textBody.getParagraphs()));
+       }
+       return paragraphs;
+    }
 
-    private void processShape(CTGroupShape gs, List<DrawingParagraph> out) {
+    private void processShape(CTGroupShape gs, List<DrawingTextBody> out) {
         List<CTShape> shapes = gs.getSpList();
-        for (int i = 0; i < shapes.size(); i++) {
-            CTTextBody ctTextBody = shapes.get(i).getTxBody();
+        for (CTShape shape : shapes) {
+            CTTextBody ctTextBody = shape.getTxBody();
             if (ctTextBody==null) {
                 continue;
             }
+            
+            DrawingTextBody textBody;
+            CTApplicationNonVisualDrawingProps nvpr = shape.getNvSpPr().getNvPr(); 
+            if(nvpr.isSetPh()) {
+               textBody = new DrawingTextPlaceholder(ctTextBody, nvpr.getPh());
+            } else {
+               textBody = new DrawingTextBody(ctTextBody);
+            }
 
-            DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
-
-            out.addAll(Arrays.asList(textBody.getParagraphs()));
+            out.add(textBody);
         }
     }
-
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java?rev=1175887&r1=1175886&r2=1175887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
(original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
Mon Sep 26 14:37:50 2011
@@ -58,9 +58,13 @@ public class TestXSLFPowerPointExtractor
 		assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
 		assertTrue(text.contains("amet\n\n"));
 
-		// Our master text, for tests
+		// Our placeholder master text
+		// This shouldn't show up in the output
 		String masterText =
          "Click to edit Master title style\n" +
+         "Click to edit Master subtitle style\n" +
+         "\n\n\n\n\n\n" +
+         "Click to edit Master title style\n" +
          "Click to edit Master text styles\n" +
          "Second level\n" +
          "Third level\n" +
@@ -111,17 +115,13 @@ public class TestXSLFPowerPointExtractor
             "Lorem ipsum dolor sit amet\n" +
             "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
             "\n" +
-            masterText +
-            "\n\n\n" +
             "Lorem ipsum dolor sit amet\n" +
             "Lorem\n" +
             "ipsum\n" +
             "dolor\n" +
             "sit\n" +
             "amet\n" +
-            "\n" +
-            masterText +
-            "\n\n\n"
+            "\n"
             , text
       );
 		
@@ -131,17 +131,14 @@ public class TestXSLFPowerPointExtractor
             "Lorem ipsum dolor sit amet\n" +
             "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
             "\n" +
-            masterText +
-            "\n\n\n\n\n" +
+            "\n\n" +
             "Lorem ipsum dolor sit amet\n" +
             "Lorem\n" +
             "ipsum\n" +
             "dolor\n" +
             "sit\n" +
             "amet\n" +
-            "\n" +
-            masterText +
-            "\n\n\n\n\n"
+            "\n\n\n"
             , text
       );
 		
@@ -176,6 +173,9 @@ public class TestXSLFPowerPointExtractor
          new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
       XSLFPowerPointExtractor extractor = 
          new XSLFPowerPointExtractor(xml);
+      extractor.setSlidesByDefault(true);
+      extractor.setNotesByDefault(false);
+      extractor.setMasterByDefault(true);
       
       String text = extractor.getText();
       assertTrue(text.length() > 0);
@@ -183,17 +183,28 @@ public class TestXSLFPowerPointExtractor
       // Check master text is there
       assertTrue("Unable to find expected word in text\n" + text, 
             text.contains("Footer from the master slide"));
+
+      // Theme text shouldn't show up
+      String themeText = 
+         "Theme Master Title\n" +
+         "Theme Master first level\n" +
+         "And the 2nd level\n" +
+         "Our 3rd level goes here\n" +
+         "And onto the 4th, such fun….\n" +
+         "Finally is the Fifth level\n";
       
       // Check the whole text
       assertEquals(
             "First page title\n" +
             "First page subtitle\n" +
-//            "This text comes from the Master Slide\n" + // TODO
-//            "This is the Master Title\n" + // TODO
-            "\n" + // TODO Should be the above
+            "This is the Master Title\n" +
+            "This text comes from the Master Slide\n" +
+            "\n" +
+            // TODO Detect we didn't have a title, and include the master one
             "2nd page subtitle\n" +
-//          "This text comes from the Master Slide\n" + // TODO
-            "Footer from the master slide\n"
+            "Footer from the master slide\n" +
+            "This is the Master Title\n" +
+            "This text comes from the Master Slide\n"
             , text
       );
 	}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message