xalan-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From minc...@apache.org
Subject cvs commit: xml-xalan/java/src/org/apache/xml/serializer WriterToUTF8Buffered.java ToStream.java WriterToUTF8.java
Date Wed, 09 Jul 2003 22:46:47 GMT
minchau     2003/07/09 15:46:47

  Modified:    java/src/org/apache/xml/serializer WriterToUTF8Buffered.java
                        ToStream.java
  Removed:     java/src/org/apache/xml/serializer WriterToUTF8.java
  Log:
  WriterToUTF8Buffered is now faster because it uses a character
  array rather than a series of string.charAt(i) calls.  Other changes 
  were also made to this class.  
  
  Performance gains are when writing to an output stream that has UTF-8 encoding.
  
  WriterToUTF8   (not buffered) is deleted.  On analysis, its slow feature was that it
  didn't buffer and so it suffered many calls to the underlying OutputStream.  The more work
that was done to
  this class to speed it up, the more it looed like WriterToUTF8Buffered.
  didn't 
  PR: bugzilla 21452
  Submitted by:	Brian Minchau
  
  Revision  Changes    Path
  1.3       +110 -40   xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java
  
  Index: WriterToUTF8Buffered.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- WriterToUTF8Buffered.java	9 Jun 2003 18:59:00 -0000	1.2
  +++ WriterToUTF8Buffered.java	9 Jul 2003 22:46:47 -0000	1.3
  @@ -72,7 +72,7 @@
   {
       
     /** number of characters that the buffer can hold.
  -   * This is a fixed constant is used rather than buf.lenght for performance.
  +   * This is a fixed constant is used rather than m_outputBytes.lenght for performance.
      */
     private static final int buf_length=16*1024;
     
  @@ -83,12 +83,14 @@
      * The internal buffer where data is stored.
      * (sc & sb remove final to compile in JDK 1.1.8)
      */
  -  private final byte buf[];
  +  private final byte m_outputBytes[];
  +  
  +  private final char m_inputChars[];
   
     /**
      * The number of valid bytes in the buffer. This value is always
  -   * in the range <tt>0</tt> through <tt>buf.length</tt>; elements
  -   * <tt>buf[0]</tt> through <tt>buf[count-1]</tt> contain valid
  +   * in the range <tt>0</tt> through <tt>m_outputBytes.length</tt>;
elements
  +   * <tt>m_outputBytes[0]</tt> through <tt>m_outputBytes[count-1]</tt>
contain valid
      * byte data.
      */
     private int count;
  @@ -107,7 +109,11 @@
         m_os = out;
         // get 3 extra bytes to make buffer overflow checking simpler and faster
         // we won't have to keep checking for a few extra characters
  -      buf = new byte[buf_length + 3];
  +      m_outputBytes = new byte[buf_length + 3];
  +      
  +      // Big enough to hold the input chars that will be transformed
  +      // into output bytes in m_ouputBytes.
  +      m_inputChars = new char[(buf_length/3) + 1];
         count = 0;
         
   //      the old body of this constructor, before the buffersize was changed to a constant
     
  @@ -134,7 +140,7 @@
   //        SerializerMessages.createMessage(SerializerErrorResources.ER_BUFFER_SIZE_LESSTHAN_ZERO,
null)); //"Buffer size <= 0");
   //    }
   //
  -//    buf = new byte[size];
  +//    m_outputBytes = new byte[size];
   //    count = 0;
   //  }
   
  @@ -160,18 +166,18 @@
   
       if (c < 0x80)
       {
  -       buf[count++] = (byte) (c);
  +       m_outputBytes[count++] = (byte) (c);
       }
       else if (c < 0x800)
       {
  -      buf[count++] = (byte) (0xc0 + (c >> 6));
  -      buf[count++] = (byte) (0x80 + (c & 0x3f));
  +      m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
  +      m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
       }
       else
       {
  -      buf[count++] = (byte) (0xe0 + (c >> 12));
  -      buf[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  -      buf[count++] = (byte) (0x80 + (c & 0x3f));
  +      m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
  +      m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  +      m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
       }
     }
   
  @@ -186,7 +192,7 @@
      *
      * @throws java.io.IOException
      */
  -  private final void writeDirect(
  +  private final void writeWithoutBuffering(
             final char chars[], final int start, final int length)
               throws java.io.IOException
     {
  @@ -221,7 +227,7 @@
      *
      * @exception  IOException  If an I/O error occurs
      */
  -  private final void writeDirect(final String s) throws IOException
  +  private final void writeWithoutBuffering(final String s) throws IOException
     {
   
       final int n = s.length();
  @@ -281,7 +287,7 @@
            * directly. The buffer is already flushed so this is a 
            * safe thing to do.
            */
  -        writeDirect(chars, start, length);
  +        writeWithoutBuffering(chars, start, length);
           return;
         }
       }
  @@ -289,7 +295,7 @@
   
   
       final int n = length+start;
  -    final byte[] buf_loc = buf; // local reference for faster access
  +    final byte[] buf_loc = m_outputBytes; // local reference for faster access
       int count_loc = count;      // local integer for faster access
       int i = start;
       {
  @@ -336,51 +342,68 @@
     public void write(final String s) throws IOException
     {
   
  -    final int length = s.length();
  -
       // We multiply the length by three since this is the maximum length
       // of the characters that we can put into the buffer.  It is possible
       // for each Unicode character to expand to three bytes.
  -
  +    final int length = s.length();
       int lengthx3 = (length << 1) + length;
   
  -    if (lengthx3 >= buf_length)
  +    if (lengthx3 >= buf_length - count)
       {
  -
  -      /* If the request length exceeds the size of the output buffer,
  -         flush the output buffer and then write the data directly.
  -         In this way buffered streams will cascade harmlessly. */
  +      // The requested length is greater than the unused part of the buffer
         flushBuffer();
  -      writeDirect(s);
   
  -      return;
  +      if (lengthx3 >= buf_length)
  +      {
  +        /*
  +         * The requested length exceeds the size of the buffer,
  +         * so don't bother to buffer this one, just write it out
  +         * directly. The buffer is already flushed so this is a 
  +         * safe thing to do.
  +         */
  +        writeWithoutBuffering(s);
  +        return;
  +      }
       }
   
  -    if (lengthx3 > buf_length - count)
  +
  +    s.getChars(0, length , m_inputChars, 0);
  +    final char[] chars = m_inputChars;
  +    final int n = length;
  +    final byte[] buf_loc = m_outputBytes; // local reference for faster access
  +    int count_loc = count;      // local integer for faster access
  +    int i = 0;
       {
  -      flushBuffer();
  +        /* This block could be omitted and the code would produce
  +         * the same result. But this block exists to give the JIT
  +         * a better chance of optimizing a tight and common loop which
  +         * occurs when writing out ASCII characters. 
  +         */ 
  +        char c;
  +        for(; i < n && (c = chars[i])< 0x80 ; i++ )
  +            buf_loc[count_loc++] = (byte)c;
       }
  -
  -    final OutputStream os = m_os;
  -
  -    for (int i = 0; i < length; i++)
  +    for (; i < n; i++)
       {
  -      final char c = s.charAt(i);
  +
  +      final char c = chars[i];
   
         if (c < 0x80)
  -        buf[count++] = (byte) (c);
  +        buf_loc[count_loc++] = (byte) (c);
         else if (c < 0x800)
         {
  -        buf[count++] = (byte) (0xc0 + (c >> 6));
  -        buf[count++] = (byte) (0x80 + (c & 0x3f));
  +        buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
  +        buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
         }
         else
         {
  -        buf[count++] = (byte) (0xe0 + (c >> 12));
  -        buf[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  -        buf[count++] = (byte) (0x80 + (c & 0x3f));
  +        buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
  +        buf_loc[count_loc++] = (byte) (0x80 + ((c >> 6) & 0x3f));
  +        buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
         }
       }
  +    // Store the local integer back into the instance variable
  +    count = count_loc;
   
     }
   
  @@ -394,7 +417,7 @@
   
       if (count > 0)
       {
  -      m_os.write(buf, 0, count);
  +      m_os.write(m_outputBytes, 0, count);
   
         count = 0;
       }
  @@ -441,5 +464,52 @@
     public OutputStream getOutputStream()
     {
       return m_os;
  +  }
  +  
  +  /**
  +   * 
  +   * @param s A string with only ASCII characters
  +   * @throws IOException
  +   */
  +  public void directWrite(final String s) throws IOException
  +  {
  +
  +    // We multiply the length by three since this is the maximum length
  +    // of the characters that we can put into the buffer.  It is possible
  +    // for each Unicode character to expand to three bytes.
  +    final int length = s.length();
  +    int lengthx3 = (length << 1) + length;
  +
  +    if (lengthx3 >= buf_length - count)
  +    {
  +      // The requested length is greater than the unused part of the buffer
  +      flushBuffer();
  +
  +      if (lengthx3 >= buf_length)
  +      {
  +        /*
  +         * The requested length exceeds the size of the buffer,
  +         * so don't bother to buffer this one, just write it out
  +         * directly. The buffer is already flushed so this is a 
  +         * safe thing to do.
  +         */
  +        writeWithoutBuffering(s);
  +        return;
  +      }
  +    }
  +
  +
  +    s.getChars(0, length , m_inputChars, 0);
  +    final char[] chars = m_inputChars;
  +    final byte[] buf_loc = m_outputBytes; // local reference for faster access
  +    int count_loc = count;      // local integer for faster access
  +    int i = 0;
  +    while( i < length) 
  +        buf_loc[count_loc++] = (byte)chars[i++];
  +
  + 
  +    // Store the local integer back into the instance variable
  +    count = count_loc;
  +
     }
   }
  
  
  
  1.18      +4 -22     xml-xalan/java/src/org/apache/xml/serializer/ToStream.java
  
  Index: ToStream.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToStream.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- ToStream.java	9 Jul 2003 21:02:31 -0000	1.17
  +++ ToStream.java	9 Jul 2003 22:46:47 -0000	1.18
  @@ -295,12 +295,7 @@
                       else
                            ((WriterToUTF8Buffered) writer).flushBuffer();
                   }
  -                if (writer instanceof WriterToUTF8)
  -                {
  -                    if (m_shouldFlush)
  -                        writer.flush();
  -                }
  -                else if (writer instanceof WriterToASCI)
  +                if (writer instanceof WriterToASCI)
                   {
                       if (m_shouldFlush)
                           writer.flush();
  @@ -331,9 +326,7 @@
   
           if (m_writer instanceof WriterToUTF8Buffered)
               return ((WriterToUTF8Buffered) m_writer).getOutputStream();
  -        if (m_writer instanceof WriterToUTF8)
  -            return ((WriterToUTF8) m_writer).getOutputStream();
  -        else if (m_writer instanceof WriterToASCI)
  +        if (m_writer instanceof WriterToASCI)
               return ((WriterToASCI) m_writer).getOutputStream();
           else
               return null;
  @@ -623,24 +616,13 @@
               //                init(new WriterToUTF8(output), format, defaultProperties,
true);
               //            }
            
  -            if (output instanceof java.io.BufferedOutputStream ||
  -            	output.getClass().getName().endsWith("BufferedServletOutputStream"))
  -            {
  -            	/* don't do buffering for 
  -            	 *   BufferedOutputStream
  -            	 *   BufferedServletOutputStream
  -            	 *   ... more ... ?
  -            	 */
  -                init(new WriterToUTF8(output), format, defaultProperties, true);
  -            }
  -            else
  -            {
  +
                   init(
                       new WriterToUTF8Buffered(output),
                       format,
                       defaultProperties,
                       true);
  -            }
  +
   
           }
           else if (
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xalan-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xalan-cvs-help@xml.apache.org


Mime
View raw message