poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r684986 - in /poi/trunk/src: documentation/content/xdocs/ scratchpad/src/org/apache/poi/hwpf/ scratchpad/src/org/apache/poi/hwpf/model/ scratchpad/src/org/apache/poi/hwpf/usermodel/ scratchpad/testcases/org/apache/poi/hwpf/model/ scratchpad...
Date Mon, 11 Aug 2008 23:42:40 GMT
Author: nick
Date: Mon Aug 11 16:42:39 2008
New Revision: 684986

URL: http://svn.apache.org/viewvc?rev=684986&view=rev
Log:
Finally get all HWPF tests to pass again, by working around how evil PAPX/CHPX/SEPX byte references
are

Modified:
    poi/trunk/src/documentation/content/xdocs/changes.xml
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java

Modified: poi/trunk/src/documentation/content/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/changes.xml?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/changes.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/changes.xml Mon Aug 11 16:42:39 2008
@@ -37,6 +37,7 @@
 
 		<!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles
unicode text, and more sanity checking of text ranges within HWPF</action>
            <action dev="POI-DEVELOPERS" type="add">Include headers and footers int
he extracted text from HWPF's WordExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers
and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode
internally. Should avoid some odd behaviour when manipulating unicode text</action>

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Mon Aug 11 16:42:39 2008
@@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles
unicode text, and more sanity checking of text ranges within HWPF</action>
            <action dev="POI-DEVELOPERS" type="add">Include headers and footers int
he extracted text from HWPF's WordExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers
and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode
internally. Should avoid some odd behaviour when manipulating unicode text</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java Mon Aug 11 16:42:39
2008
@@ -253,7 +253,7 @@
     // read in the pictures stream
     _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
 
-    _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(),
fcMin, getTextTable().getTextPieces());
+    _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(),
fcMin, _tpt, _cpSplit);
     _ss = new StyleSheet(_tableStream, _fib.getFcStshf());
     _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java Mon Aug 11
16:42:39 2008
@@ -36,6 +36,7 @@
 				generateCp(fcEnd, isUnicode),
 				buf
 		);
+		this.isUnicode = isUnicode;
 	}
 	private static int generateCp(int val, boolean isUnicode) {
 		if(isUnicode)

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java Mon Aug 11 16:42:39
2008
@@ -119,7 +119,7 @@
 
   public void insert(int listIndex, int cpStart, SprmBuffer buf)
   {
-	boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
 	  
     CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
     

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java Mon Aug
11 16:42:39 2008
@@ -61,7 +61,7 @@
 
       for (int x = 0; x < _crun; x++)
       {
-    	boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
+    	boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
         _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
       }
     }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java Mon Aug 11 16:42:39
2008
@@ -76,7 +76,7 @@
 
   public void insert(int listIndex, int cpStart, SprmBuffer buf)
   {
-    boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
+    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
     
     PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
     

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java Mon Aug
11 16:42:39 2008
@@ -67,7 +67,8 @@
       for (int x = 0; x < _crun; x++) {
          int startAt = getStart(x) - fcMin;
          int endAt = getEnd(x) - fcMin;
-    	 boolean isUnicode = tpt.isUnicodeAt(startAt);
+    	 boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
+         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
     	 
          _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream,
isUnicode));
       }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java Mon Aug 11 16:42:39
2008
@@ -48,6 +48,11 @@
       _cpStart = fcStart;
       _cpEnd = fcEnd;
       _buf = buf;
+      
+      if(_cpStart < 0) {
+    	  System.err.println("A property claimed to start before zero, at " + _cpStart + "!
Resetting it to zero, and hoping for the best");
+    	  _cpStart = 0;
+      }
   }
 
   /**

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java Mon Aug 11 16:42:39 2008
@@ -25,17 +25,15 @@
 import org.apache.poi.hwpf.usermodel.SectionProperties;
 
 /**
- * TODO - figure out if this works in characters, like most
- *  things do, or in bytes as PAPX / CHPX does.
  */
-public class SEPX extends PropertyNode
+public class SEPX extends BytePropertyNode
 {
 
   SectionDescriptor _sed;
 
-  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl)
+  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
   {
-    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
+    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
     _sed = sed;
   }
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java Mon Aug 11 16:42:39
2008
@@ -34,6 +34,9 @@
   protected ArrayList _sections = new ArrayList();
   protected List _text;
 
+  /** So we can know if things are unicode or not */
+  private TextPieceTable tpt;
+
   public SectionTable()
   {
   }
@@ -41,10 +44,11 @@
 
   public SectionTable(byte[] documentStream, byte[] tableStream, int offset,
                       int size, int fcMin,
-                      List tpt)
+                      TextPieceTable tpt, CPSplitCalculator cps)
   {
     PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
-    _text = tpt;
+    this.tpt = tpt;
+    this._text = tpt.getTextPieces();
 
     int length = sedPlex.length();
 
@@ -54,11 +58,16 @@
       SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
 
       int fileOffset = sed.getFc();
+      int startAt = CPtoFC(node.getStart());
+      int endAt = CPtoFC(node.getEnd());
+      
+      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
+//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
 
       // check for the optimization
       if (fileOffset == 0xffffffff)
       {
-        _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), new byte[0]));
+        _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
       }
       else
       {
@@ -67,9 +76,34 @@
         byte[] buf = new byte[sepxSize];
         fileOffset += LittleEndian.SHORT_SIZE;
         System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
-        _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), buf));
+        _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
       }
     }
+    
+    // Some files seem to lie about their unicode status, which
+    //  is very very pesky. Try to work around these, but this
+    //  is getting on for black magic...
+    int mainEndsAt = cps.getMainDocumentEnd();
+    boolean matchAt = false;
+    boolean matchHalf = false;
+    for(int i=0; i<_sections.size(); i++) {
+    	SEPX s = (SEPX)_sections.get(i);
+    	if(s.getEnd() == mainEndsAt) {
+    		matchAt = true;
+    	} else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) {
+    		matchHalf = true;
+    	}
+    }
+    if(! matchAt && matchHalf) {
+    	System.err.println("Your document seemed to be mostly unicode, but the section definition
was in bytes! Trying anyway, but things may well go wrong!");
+        for(int i=0; i<_sections.size(); i++) {
+        	SEPX s = (SEPX)_sections.get(i);
+            GenericPropertyNode node = sedPlex.getProperty(i);
+            
+        	s.setStart( CPtoFC(node.getStart()) );
+        	s.setEnd( CPtoFC(node.getEnd()) );
+        }
+    }
   }
 
   public void adjustForInsert(int listIndex, int length)
@@ -171,7 +205,7 @@
 
       // Line using Ryan's FCtoCP() conversion method -
       // unable to observe any effect on our testcases when using this code - piers
-      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()),
sed.toByteArray());
+      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()),
FCtoCP(sepx.getEndBytes()), sed.toByteArray());
 
 
       plex.addProperty(property);

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java Mon Aug 11
16:42:39 2008
@@ -25,6 +25,8 @@
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;
 
@@ -103,6 +105,15 @@
       // And now build the piece
       _textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node.getStart()));
     }
+    
+    // In the interest of our sanity, now sort the text pieces
+    //  into order, if they're not already
+    TextPiece[] tp = (TextPiece[])
+    	_textPieces.toArray(new TextPiece[_textPieces.size()]);
+    Arrays.sort(tp);
+    for(int i=0; i<tp.length; i++) {
+    	_textPieces.set(i, tp[i]);
+    }
   }
 
   public int getCpMin()
@@ -123,9 +134,8 @@
    *  paragraph properties :(
    * @param cp The character offset to check about
    */
-  public boolean isUnicodeAt(int cp) {
+  public boolean isUnicodeAtCharOffset(int cp) {
 	  boolean lastWas = false;
-	  int lastAt = 0;
 	  
 	  Iterator it = _textPieces.iterator();
 	  while(it.hasNext()) {
@@ -135,9 +145,37 @@
 			  return tp.isUnicode();
 		  }
 		  // Otherwise keep track for the last one
-		  if(tp.getStart() > lastAt) {
-			  lastWas = tp.isUnicode();
+		  lastWas = tp.isUnicode();
+	  }
+	  
+	  // If they ask off the end, just go with the last one...
+	  return lastWas;
+  }
+  /**
+   * Is the text at the given byte offset
+   *  unicode, or plain old ascii?
+   * In a very evil fashion, you have to actually 
+   *  know this to make sense of character and
+   *  paragraph properties :(
+   * @param cp The character offset to check about
+   */
+  public boolean isUnicodeAtByteOffset(int bytePos) {
+	  boolean lastWas = false;
+	  int curByte = 0;
+	  
+	  Iterator it = _textPieces.iterator();
+	  while(it.hasNext()) {
+		  TextPiece tp = (TextPiece)it.next();
+		  int nextByte = curByte + tp.bytesLength();
+		  
+		  // If the text piece covers the character, all good
+		  if(curByte <= bytePos && nextByte >= bytePos) {
+			  return tp.isUnicode();
 		  }
+		  // Otherwise keep track for the last one
+		  lastWas = tp.isUnicode();
+		  // Move along
+		  curByte = nextByte;
 	  }
 	  
 	  // If they ask off the end, just go with the last one...

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java Mon Aug 11 16:42:39
2008
@@ -155,6 +155,8 @@
     _characters = _doc.getCharacterTable().getTextRuns();
     _text = _doc.getTextTable().getTextPieces();
     _parent = new WeakReference(null);
+    
+    sanityCheckStartEnd();
   }
 
 
@@ -175,6 +177,8 @@
     _characters = parent._characters;
     _text = parent._text;
     _parent = new WeakReference(parent);
+    
+    sanityCheckStartEnd();
   }
 
   /**
@@ -226,6 +230,22 @@
         _textRangeFound = true;
         break;
     }
+    
+    sanityCheckStartEnd();
+  }
+  
+  /**
+   * Ensures that the start and end were were given
+   *  are actually valid, to avoid issues later on
+   *  if they're not
+   */
+  private void sanityCheckStartEnd() {
+	  if(_start < 0) {
+		  throw new IllegalArgumentException("Range start must not be negative. Given " + _start);
+	  }
+	  if(_end < _start) {
+		  throw new IllegalArgumentException("The end (" + _end + ") must not be before the start
("+_start+")");
+	  }
   }
 
   /**
@@ -537,13 +557,17 @@
     for (int x = _parStart; x < numParagraphs; x++)
     {
       PAPX papx = (PAPX)_paragraphs.get(x);
+      //System.err.println("Paragraph " + x + " was " + papx.getStart() + " -> " + papx.getEnd());
       papx.adjustForDelete(_start, _end - _start);
+      //System.err.println("Paragraph " + x + " is now " + papx.getStart() + " -> " +
papx.getEnd());
     }
 
     for (int x = _sectionStart; x < numSections; x++)
     {
       SEPX sepx = (SEPX)_sections.get(x);
+      //System.err.println("Section " + x + " was " + sepx.getStart() + " -> " + sepx.getEnd());
       sepx.adjustForDelete(_start, _end - _start);
+      //System.err.println("Section " + x + " is now " + sepx.getStart() + " -> " + sepx.getEnd());
     }
     
     for (int x = _textStart; x < numTextPieces; x++)
@@ -806,6 +830,10 @@
     {
       throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range");
     }
+    if (tableEnd < 0)
+    {
+      throw new ArrayIndexOutOfBoundsException("The table's end is negative, which isn't
allowed!");
+    }
     return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
   }
 

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java Mon
Aug 11 16:42:39 2008
@@ -45,13 +45,15 @@
     byte[] tableStream = _hWPFDocFixture._tableStream;
     int fcMin = fib.getFcMin();
 
+    CPSplitCalculator cps = new CPSplitCalculator(fib);
+    
     ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.getFcClx(),
fcMin);
     TextPieceTable tpt = cft.getTextPieceTable();
 
     SectionTable sectionTable = new SectionTable(mainStream, tableStream,
                                                  fib.getFcPlcfsed(),
                                                  fib.getLcbPlcfsed(),
-                                                 fcMin, tpt.getTextPieces());
+                                                 fcMin, tpt, cps);
     HWPFFileSystem fileSys = new HWPFFileSystem();
 
     sectionTable.writeTo(fileSys, 0);
@@ -61,7 +63,9 @@
     byte[] newTableStream = tableOut.toByteArray();
     byte[] newMainStream = mainOut.toByteArray();
 
-    SectionTable newSectionTable = new SectionTable(newMainStream, newTableStream, 0, newTableStream.length,
0, tpt.getTextPieces());
+    SectionTable newSectionTable = new SectionTable(
+    		newMainStream, newTableStream, 0, 
+    		newTableStream.length, 0, tpt, cps);
 
     ArrayList oldSections = sectionTable.getSections();
     ArrayList newSections = newSectionTable.getSections();

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java Mon
Aug 11 16:42:39 2008
@@ -81,9 +81,16 @@
     	HWPFDocument doc = new HWPFDocument(new FileInputStream(
     			new File(dirname, "Bug44292.doc")));
 		Range r = doc.getRange();
+		assertEquals(6, r.numParagraphs());
+		assertEquals(0, r.getStartOffset());
+		assertEquals(87, r.getEndOffset());
 			
-		//get the table
+		// Paragraph with table
 		Paragraph p = r.getParagraph(0);
+		assertEquals(0, p.getStartOffset());
+		assertEquals(20, p.getEndOffset());
+		
+		// Get the table
 		Table t = r.getTable(p);
 		
 		//get the only row

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
(original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java
Mon Aug 11 16:42:39 2008
@@ -23,6 +23,7 @@
 import junit.framework.TestCase;
 
 import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.model.PAPX;
 
 /**
  *	Test to see if Range.delete() works even if the Range contains a
@@ -37,6 +38,8 @@
 		"${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch
0 (OpenOffice.org v. 2.2.1).\r";
 	private String originalText =
 		"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d
(U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.  Everybody
should be thankful to the ${organization} ${delete} and all the POI contributors for their
assistance in this matter.\r";
+	private String lastText =
+		"Thank you, ${organization} ${delete}!\r";
 	private String searchText = "${delete}";
 	private String expectedText1 = " This is an MS-Word 97 formatted document created using
NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
 	private String expectedText2 =
@@ -69,32 +72,60 @@
 		Range range;
 		Section section;
 		Paragraph para;
+		PAPX paraDef;
 
 		// First, check overall
 		range = daDoc.getOverallRange();
 		assertEquals(1, range.numSections());
-		assertEquals(4, range.numParagraphs());
+		assertEquals(5, range.numParagraphs());
 		
 		
 		// Now, onto just the doc bit
 		range = daDoc.getRange();
 
 		assertEquals(1, range.numSections());
+		assertEquals(1, daDoc.getSectionTable().getSections().size());
 		section = range.getSection(0);
-
-		assertEquals(4, section.numParagraphs());
+		
+		assertEquals(5, section.numParagraphs());
 		
 		para = section.getParagraph(0);
 		assertEquals(1, para.numCharacterRuns());
 		assertEquals(introText, para.text());
 		
 		para = section.getParagraph(1);
-		assertEquals(2, para.numCharacterRuns());
+		assertEquals(5, para.numCharacterRuns());
 		assertEquals(fillerText, para.text());
 		
+		
+		paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(2);
+		assertEquals(132, paraDef.getStart());
+		assertEquals(400, paraDef.getEnd());
+		
 		para = section.getParagraph(2);
-		assertEquals(6, para.numCharacterRuns());
+		assertEquals(5, para.numCharacterRuns());
 		assertEquals(originalText, para.text());
+		
+		
+		paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(3);
+		assertEquals(400, paraDef.getStart());
+		assertEquals(438, paraDef.getEnd());
+		
+		para = section.getParagraph(3);
+		assertEquals(1, para.numCharacterRuns());
+		assertEquals(lastText, para.text());
+		
+		
+		// Check things match on text length
+		assertEquals(439, range.text().length());
+		assertEquals(439, section.text().length());
+		assertEquals(439, 
+				section.getParagraph(0).text().length() +
+				section.getParagraph(1).text().length() +
+				section.getParagraph(2).text().length() +
+				section.getParagraph(3).text().length() +
+				section.getParagraph(4).text().length()
+		);
 	}
 
 	/**
@@ -108,7 +139,7 @@
 		assertEquals(1, range.numSections());
 
 		Section section = range.getSection(0);
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		Paragraph para = section.getParagraph(2);
 
@@ -131,7 +162,7 @@
 		assertEquals(1, range.numSections());
 		section = range.getSection(0);
 
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 		para = section.getParagraph(2);
 
 		text = para.text();
@@ -154,7 +185,7 @@
 		assertEquals(1, range.numSections());
 
 		Section section = range.getSection(0);
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		Paragraph para = section.getParagraph(2);
 
@@ -188,7 +219,7 @@
 		assertEquals(1, range.numSections());
 		section = range.getSection(0);
 
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		para = section.getParagraph(0);
 		text = para.text();

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
(original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java
Mon Aug 11 16:42:39 2008
@@ -71,14 +71,11 @@
 		Paragraph para = section.getParagraph(2);
 		assertEquals(originalText, para.text());
 
-		assertEquals(6, para.numCharacterRuns());
+		assertEquals(3, para.numCharacterRuns());
 		String text = 
 			para.getCharacterRun(0).text() + 
 			para.getCharacterRun(1).text() +
-			para.getCharacterRun(2).text() +
-			para.getCharacterRun(3).text() +
-			para.getCharacterRun(4).text() +
-			para.getCharacterRun(5).text()
+			para.getCharacterRun(2).text()
 		;
 
 		assertEquals(originalText, text);
@@ -116,14 +113,11 @@
 		Paragraph para = section.getParagraph(2);
 		assertEquals((textToInsert + originalText), para.text());
 
-		assertEquals(6, para.numCharacterRuns());
+		assertEquals(3, para.numCharacterRuns());
 		String text = 
 			para.getCharacterRun(0).text() + 
 			para.getCharacterRun(1).text() +
-			para.getCharacterRun(2).text() +
-			para.getCharacterRun(3).text() +
-			para.getCharacterRun(4).text() +
-			para.getCharacterRun(5).text()
+			para.getCharacterRun(2).text()
 		;
 
 		// System.out.println(text);

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
(original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java
Mon Aug 11 16:42:39 2008
@@ -87,6 +87,16 @@
 				r.text()
 		);
 		
+		assertEquals(1, r.numSections());
+		assertEquals(1, a.getSectionTable().getSections().size());
+		Section s = r.getSection(0);
+		assertEquals(
+				a_page_1 +
+				page_break + "\r" + 
+				a_page_2,
+				s.text()
+		);
+		
 		assertEquals(
 				7,
 				r.numParagraphs()
@@ -161,6 +171,20 @@
 		assertEquals(
 				408, r.text().length()
 		);
+	
+		
+		assertEquals(1, r.numSections());
+		assertEquals(1, u.getSectionTable().getSections().size());
+		Section s = r.getSection(0);
+		assertEquals(
+				u_page_1 +
+				page_break + "\r" + 
+				u_page_2,
+				s.text()
+		);
+		assertEquals(0, s.getStartOffset());
+		assertEquals(408, s.getEndOffset());
+
 		
 		List pDefs = r._paragraphs;
 		assertEquals(35, pDefs.size());

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java?rev=684986&r1=684985&r2=684986&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
(original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java
Mon Aug 11 16:42:39 2008
@@ -66,21 +66,22 @@
 		HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
 
 		Range range = daDoc.getRange();
+		assertEquals(414, range.text().length());
 
 		assertEquals(1, range.numSections());
 		Section section = range.getSection(0);
+		assertEquals(414, section.text().length());
 
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 		Paragraph para = section.getParagraph(2);
 
-		assertEquals(6, para.numCharacterRuns());
+		assertEquals(5, para.numCharacterRuns());
 		String text = 
 			para.getCharacterRun(0).text() + 
 			para.getCharacterRun(1).text() +
 			para.getCharacterRun(2).text() +
 			para.getCharacterRun(3).text() +
-			para.getCharacterRun(4).text() +
-			para.getCharacterRun(5).text()
+			para.getCharacterRun(4).text()
 		;
 
 		assertEquals(originalText, text);
@@ -97,7 +98,7 @@
 		assertEquals(1, range.numSections());
 
 		Section section = range.getSection(0);
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		Paragraph para = section.getParagraph(2);
 
@@ -130,7 +131,7 @@
 		assertEquals(1, range.numSections());
 
 		Section section = range.getSection(0);
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		Paragraph para = section.getParagraph(2);
 
@@ -141,7 +142,7 @@
 
 		assertEquals(1, range.numSections());
 		section = range.getSection(0);
-		assertEquals(4, section.numParagraphs());
+		assertEquals(5, section.numParagraphs());
 
 		para = section.getParagraph(2);
 		text = para.text();



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message