poi-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Nikola Ivacic <nikola.iva...@rs-pi.com>
Subject Re: [PATCH] WordDocument.java
Date Tue, 20 May 2003 11:27:14 GMT
Sorry;

 I noticed that

Ryan Ackley wrote:

>You need to add patches as attachments through bugzilla. Look at the mailing
>list to see examples on the proper way to do this. I have no idea what your
>patch does.
>
>Ryan Ackley
>
>----- Original Message ----- 
>From: "Nikola Ivacic" <nikola.ivacic@rs-pi.com>
>To: "POI Developers List" <poi-dev@jakarta.apache.org>
>Sent: Tuesday, May 20, 2003 3:56 AM
>Subject: [PATCH] WordDocument.java
>
>
>  
>
>>--- g:\temp\WordDocument.java Wed May 14 15:06:22 2003
>>+++ g:\temp\WordDocument.java.orig Wed May 14 15:05:10 2003
>>@@ -376,8 +376,8 @@
>>                 filePos &= ~(0x40000000);//gives me FC in doc stream
>>                 filePos /= 2;
>>             }
>>-            int totLength = (unicode)?(LittleEndian.getInt(tableStream,
>>    
>>
>pos + (x + 1) * 4) - LittleEndian.getInt(tableStream, pos + (x * 4)))*2:
>  
>
>>-                                      LittleEndian.getInt(tableStream,
>>    
>>
>pos + (x + 1) * 4) - LittleEndian.getInt(tableStream, pos + (x * 4));
>  
>
>>+            int totLength = LittleEndian.getInt(tableStream, pos + (x +
>>    
>>
>1) * 4) -
>  
>
>>+                            LittleEndian.getInt(tableStream, pos + (x *
>>    
>>
>4));
>  
>
>>             TextPiece piece = new TextPiece(filePos, totLength, unicode);
>>             _text.add(piece);
>>
>>    
>>
>
>
>----------------------------------------------------------------------------
>----
>
>
>  
>
>>/* ====================================================================
>> * The Apache Software License, Version 1.1
>> *
>> * Copyright (c) 2003 The Apache Software Foundation.  All rights
>> * reserved.
>> *
>> * Redistribution and use in source and binary forms, with or without
>> * modification, are permitted provided that the following conditions
>> * are met:
>> *
>> * 1. Redistributions of source code must retain the above copyright
>> *    notice, this list of conditions and the following disclaimer.
>> *
>> * 2. Redistributions in binary form must reproduce the above copyright
>> *    notice, this list of conditions and the following disclaimer in
>> *    the documentation and/or other materials provided with the
>> *    distribution.
>> *
>> * 3. The end-user documentation included with the redistribution,
>> *    if any, must include the following acknowledgment:
>> *       "This product includes software developed by the
>> *        Apache Software Foundation (http://www.apache.org/)."
>> *    Alternately, this acknowledgment may appear in the software itself,
>> *    if and wherever such third-party acknowledgments normally appear.
>> *
>> * 4. The names "Apache" and "Apache Software Foundation" and
>> *    "Apache POI" must not be used to endorse or promote products
>> *    derived from this software without prior written permission. For
>> *    written permission, please contact apache@apache.org.
>> *
>> * 5. Products derived from this software may not be called "Apache",
>> *    "Apache POI", nor may "Apache" appear in their name, without
>> *    prior written permission of the Apache Software Foundation.
>> *
>> * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
>> * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
>> * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
>> * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
>> * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
>> * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>> * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
>> * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
>> * SUCH DAMAGE.
>> * ====================================================================
>> *
>> * This software consists of voluntary contributions made by many
>> * individuals on behalf of the Apache Software Foundation.  For more
>> * information on the Apache Software Foundation, please see
>> * <http://www.apache.org/>.
>> */
>>
>>import org.apache.poi.poifs.filesystem.*;
>>import org.apache.poi.util.LittleEndian;
>>
>>import java.io.*;
>>import java.util.Vector;
>>import java.security.MessageDigest;
>>import java.security.NoSuchAlgorithmException;
>>
>>/**
>> * Created by PMS Group RenderSpace Solutions
>> * User: nikolai
>> * Date: May 14, 2003
>> * Time: 10:35:59 AM
>> */
>>public class DocReader {
>>
>>    private POIFSFileSystem fs  = null;
>>    private Writer          out = null;
>>
>>    //jakarta apache HDF project TextPiece.java
>>    public class TextPiece {
>>        private boolean usesUnicode;
>>        private int length;
>>        private int start;
>>        private int end;
>>
>>        public TextPiece(int start, int length, boolean unicode){
>>            usesUnicode = unicode;
>>            this.length = length;
>>            this.start = start;
>>            this.end = start + length;
>>
>>        }
>>
>>        public boolean usesUnicode(){
>>            return usesUnicode;
>>        }
>>
>>        public int getStart(){
>>            return start;
>>        }
>>
>>        public int getEnd(){
>>            return end;
>>        }
>>    }
>>
>>    //jakarta apache HDF project Util.java
>>    public static short convertBytesToShort(byte[] array, int offset){
>>        int firstInt = 0xff & array[offset + 1];
>>        int secondInt = 0xff & array[offset];
>>        return (short)((firstInt << 8) | secondInt);
>>    }
>>
>>
>>    //jakarta apache HDF project WordDocument.java
>>    private Vector findText(byte[] tableStream, int complexOffset) throws
>>    
>>
>IOException{
>  
>
>>       //actual text
>>       Vector text = new Vector();
>>       int pos = complexOffset;
>>       //skips through the prms before we reach the piece table. These
>>    
>>
>contain data
>  
>
>>       //for actual fast saved files
>>       while(tableStream[pos] == 1){
>>           pos++;
>>           int skip = LittleEndian.getShort(tableStream, pos);
>>           pos += 2 + skip;
>>       }
>>       if(tableStream[pos] != 2){
>>           throw new IOException("corrupted Word file");
>>       }
>>       else{
>>           //parse out the text pieces
>>           int pieceTableSize = LittleEndian.getInt(tableStream, ++pos);
>>           pos += 4;
>>           int pieces = (pieceTableSize - 4) / 12;
>>           for (int x = 0; x < pieces; x++){
>>               int filePos = LittleEndian.getInt(tableStream, pos +
>>    
>>
>((pieces + 1) * 4) + (x * 8) + 2);
>  
>
>>               boolean unicode = false;
>>               if ((filePos & 0x40000000) == 0){
>>                   unicode = true;
>>               }
>>               else{
>>                   unicode = false;
>>                   filePos &= ~(0x40000000);//gives me FC in doc stream
>>                   filePos /= 2;
>>               }
>>               int totLength = (unicode)?(LittleEndian.getInt(tableStream,
>>    
>>
>pos + (x + 1) * 4) - LittleEndian.getInt(tableStream, pos + (x * 4)))*2:
>  
>
>>                                         LittleEndian.getInt(tableStream,
>>    
>>
>pos + (x + 1) * 4) - LittleEndian.getInt(tableStream, pos + (x * 4));
>  
>
>>               TextPiece piece = new TextPiece(filePos, totLength,
>>    
>>
>unicode);
>  
>
>>               text.add(piece);
>>           }
>>
>>       }
>>
>>       return text;
>>   }
>>
>>   //jakarta apache HDF project WordDocument.java
>>   private static void writeText(Vector textPieces, byte[]
>>    
>>
>documentContent, Writer out) throws IOException{
>  
>
>>       for(int x = 0; x < textPieces.size(); x++){
>>         TextPiece nextPiece = (TextPiece)textPieces.get(x);
>>         int start = nextPiece.getStart();
>>         int end = nextPiece.getEnd();
>>         boolean unicode = nextPiece.usesUnicode();
>>         int add = 1;
>>
>>         if(unicode){
>>           add = 2;
>>           char ch;
>>           for(int y = start; y < end; y += add){
>>              ch = (char)convertBytesToShort(documentContent, y);
>>              out.write(ch);
>>           }
>>         }
>>         else{
>>           String sText = new String(documentContent, start, end-start);
>>           out.write(sText);
>>         }
>>         out.flush();
>>       }//for 2
>>
>>   }
>>
>>
>>   private static byte[] getDocumentContent(POIFSFileSystem fs) throws
>>    
>>
>FileNotFoundException, IOException{
>  
>
>>      DirectoryEntry root = fs.getRoot();
>>      DocumentEntry document =
>>    
>>
>(DocumentEntry)root.getEntry("WordDocument");
>  
>
>>      DocumentInputStream stream = new DocumentInputStream(document);
>>      byte[] content = new byte[stream.available()];
>>      stream.read(content, 0, stream.available());
>>
>>      return content;
>>   }
>>
>>   //retrives a internal table wich is a mother of a .doc file
>>   private static byte[] getDescrTableContent(POIFSFileSystem fs, byte[]
>>    
>>
>documentContent) throws FileNotFoundException, IOException{
>  
>
>>      DirectoryEntry root = fs.getRoot();
>>      int info = LittleEndian.getShort(documentContent, 0xa);
>>      boolean useTable1 = (info & 0x200) != 0;
>>
>>      //process the text and formatting properties
>>      String tablename = "";
>>      if(useTable1){
>>         tablename="1Table";
>>      }else{
>>         tablename="0Table";
>>      }
>>      int complexOffset = LittleEndian.getInt(documentContent, 0x1a2);
>>
>>      DocumentEntry tableEntry = (DocumentEntry)root.getEntry(tablename);
>>      DocumentInputStream tableStream = new
>>    
>>
>DocumentInputStream(tableEntry);
>  
>
>>      byte[] tcontent = new byte[tableStream.available()];
>>      tableStream.read(tcontent, 0, tableStream.available());
>>
>>      return tcontent;
>>   }
>>
>>
>>   //retrives a internal table wich is a mother of a .doc file
>>   private static byte[] getDescrTableContent(POIFSFileSystem fs) throws
>>    
>>
>FileNotFoundException, IOException{
>  
>
>>      DirectoryEntry root = fs.getRoot();
>>      DocumentEntry document =
>>    
>>
>(DocumentEntry)root.getEntry("WordDocument");
>  
>
>>      DocumentInputStream stream = new DocumentInputStream(document);
>>      byte[] content = new byte[stream.available()];
>>      stream.read(content, 0, stream.available());
>>
>>      int info = LittleEndian.getShort(content, 0xa);
>>      boolean useTable1 = (info & 0x200) != 0;
>>
>>      //process the text and formatting properties
>>      String tablename = "";
>>      if(useTable1){
>>         tablename="1Table";
>>      }else{
>>         tablename="0Table";
>>      }
>>      int complexOffset = LittleEndian.getInt(content, 0x1a2);
>>
>>      DocumentEntry tableEntry = (DocumentEntry)root.getEntry(tablename);
>>      DocumentInputStream tableStream = new
>>    
>>
>DocumentInputStream(tableEntry);
>  
>
>>      byte[] tcontent = new byte[tableStream.available()];
>>      tableStream.read(tcontent, 0, tableStream.available());
>>
>>      return content;
>>   }
>>
>>   public DocReader(InputStream istream, Writer out)throws IOException{
>>      this.out = out;
>>      this.fs = new POIFSFileSystem(istream);
>>   }
>>
>>   public void Process()throws FileNotFoundException, IOException{
>>      byte[] documentContent = getDocumentContent(this.fs);
>>      byte[] descrTableContent = getDescrTableContent(this.fs,
>>    
>>
>documentContent);
>  
>
>>      int complexOffset = LittleEndian.getInt(documentContent, 0x1a2);
>>      Vector textPieces = findText(descrTableContent, complexOffset);
>>      writeText(textPieces, documentContent, this.out);
>>   }
>>
>>   public static void main(String[] args) throws FileNotFoundException,
>>    
>>
>IOException{
>  
>
>>      InputStream istream = new FileInputStream(args[0]);
>>      Writer out = new FileWriter(args[1]);
>>      DocReader reader = new DocReader(istream, out);
>>      reader.Process();
>>      out.close();
>>      istream.close();
>>
>>   }
>>}
>>
>>
>>    
>>
>
>
>----------------------------------------------------------------------------
>----
>
>
>  
>
>>---------------------------------------------------------------------
>>To unsubscribe, e-mail: poi-dev-unsubscribe@jakarta.apache.org
>>For additional commands, e-mail: poi-dev-help@jakarta.apache.org
>>    
>>
>
>
>---------------------------------------------------------------------
>To unsubscribe, e-mail: poi-dev-unsubscribe@jakarta.apache.org
>For additional commands, e-mail: poi-dev-help@jakarta.apache.org
>
>
>
>  
>


Mime
View raw message