manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1443009 - /manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/
Date Wed, 06 Feb 2013 15:17:50 GMT
Author: kwright
Date: Wed Feb  6 15:17:49 2013
New Revision: 1443009

URL: http://svn.apache.org/viewvc?rev=1443009&view=rev
Log:
Get the basic structure of the parser right.

Added:
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
  (with props)
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
  (with props)
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
  (with props)
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
  (with props)
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
  (with props)
Modified:
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMParseState.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/ByteReceiver.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/CharacterReceiver.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/EncodingDetector.java
    manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMParseState.java?rev=1443009&r1=1443008&r2=1443009&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMParseState.java
(original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMParseState.java
Wed Feb  6 15:17:49 2013
@@ -27,19 +27,20 @@ import java.util.*;
 * Once a preliminary character encoding is determined, an EncodingAccepter is notified,
 * and further bytes are sent to a provided ByteReceiver.
 */
-public class BOMParseState extends EncodingDetector
+public class BOMParseState extends SingleByteReceiver implements EncodingDetector
 {
   protected String encoding = null;
-  protected final ByteReceiver byteReceiver;
+  protected final ByteReceiver overflowByteReceiver;
   
-  /** Constructor.  Pass in the receiver of all overflow bytes.
+  /** Constructor.
+  *@param overflowByteReceiver Pass in the receiver of all overflow bytes.
   * If no receiver is passed in, the detector will stop as soon as the
   * BOM is either seen, or not seen.
   */
-  public BOMParseState(ByteReceiver byteReceiver)
+  public BOMParseState(ByteReceiver overflowByteReceiver)
   {
     super(8);
-    this.byteReceiver = byteReceiver;
+    this.overflowByteReceiver = overflowByteReceiver;
   }
   
   /** Set initial encoding.
@@ -67,14 +68,5 @@ public class BOMParseState extends Encod
     // MHL
     return true;
   }
-  
-  /** Finish up all processing.
-  */
-  @Override
-  public void finishUp()
-    throws ManifoldCFException
-  {
-    // MHL
-  }
 
 }

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/ByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/ByteReceiver.java?rev=1443009&r1=1443008&r2=1443009&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/ByteReceiver.java
(original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/ByteReceiver.java
Wed Feb  6 15:17:49 2013
@@ -21,45 +21,41 @@ package org.apache.manifoldcf.core.fuzzy
 import org.apache.manifoldcf.core.interfaces.*;
 import java.io.*;
 
-/** This interface represents a receiver for bytes.
-* Implementers of this interface will accept documents a byte at a time
+/** This class represents a receiver for bytes.
+* Extenders of this class will accept an input stream,
+* and will read from it as requested a chunk at a time.
 */
 public abstract class ByteReceiver
 {
-  protected final byte[] byteBuffer;
+  protected InputStream inputStream = null;
   
   /** Constructor */
-  public ByteReceiver(int chunkSize)
+  public ByteReceiver()
   {
-    byteBuffer = new byte[chunkSize];
   }
   
-  /** Receive a byte stream and process up to chunksize bytes,
-  *@return true if end reached.
+  /** Set the input stream.  The input stream must be
+  * at the point where the bytes being received would start.
+  * The stream is expected to be closed by the caller, when
+  * the operations are all done.
   */
-  public boolean dealWithBytes(InputStream is)
-    throws IOException, ManifoldCFException
+  public void setInputStream(InputStream is)
+    throws IOException
   {
-    int amt = is.read(byteBuffer);
-    if (amt == -1)
-      return true;
-    for (int i = 0; i < amt; i++)
-    {
-      if (dealWithByte(byteBuffer[i]))
-        return true;
-    }
-    return false;
+    this.inputStream = is;
   }
   
-  /** Receive a byte.
-  *@return true to stop further processing.
+  /** Read the byte stream and process a limited chunk of bytes,
+  *@return true if end reached.
   */
-  public abstract boolean dealWithByte(byte b)
-    throws ManifoldCFException;
+  public abstract boolean dealWithBytes()
+    throws IOException, ManifoldCFException;
   
   /** Finish up all processing.
   */
-  public abstract void finishUp()
-    throws ManifoldCFException;
+  public void finishUp()
+    throws ManifoldCFException
+  {
+  }
 
 }

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/CharacterReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/CharacterReceiver.java?rev=1443009&r1=1443008&r2=1443009&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/CharacterReceiver.java
(original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/CharacterReceiver.java
Wed Feb  6 15:17:49 2013
@@ -22,51 +22,38 @@ import org.apache.manifoldcf.core.interf
 import java.io.*;
 
 /** This interface represents a receiver for characters.
-* Implementers of this interface will accept documents a character at a time.
 */
 public abstract class CharacterReceiver
 {
-  protected final char[] charBuffer;
-  
-  public CharacterReceiver()
-  {
-    this(4096);
-  }
+  protected Reader reader = null;
   
   /** Constructor.
   */
-  public CharacterReceiver(int chunkSize)
+  public CharacterReceiver()
   {
-    charBuffer = new char[chunkSize];
   }
   
-  /** Receive a set of characters; process one
-  * chunksize worth.
-  *@return true if done.
+  /** Set the reader we'll be getting characters from.
+  * It is the caller's responsibility to close this when
+  * the caller has no further use for this CharacterReceiver.
   */
-  public boolean dealWithCharacters(Reader r)
-    throws IOException, ManifoldCFException
+  public void setReader(Reader reader)
+    throws IOException
   {
-    int amt = r.read(charBuffer);
-    if (amt == -1)
-      return true;
-    for (int i = 0; i < amt; i++)
-    {
-      if (dealWithCharacter(charBuffer[i]))
-        return true;
-    }
-    return false;
+    this.reader = reader;
   }
   
-  /** Receive a byte.
-  * @return true if done.
+  /** Receive a set of characters; process one chunk worth.
+  *@return true if done.
   */
-  public abstract boolean dealWithCharacter(char c)
-    throws ManifoldCFException;
+  public abstract boolean dealWithCharacters()
+    throws IOException, ManifoldCFException;
   
   /** Finish up all processing.
   */
-  public abstract void finishUp()
-    throws ManifoldCFException;
+  public void finishUp()
+    throws ManifoldCFException
+  {
+  }
 
 }

Added: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java?rev=1443009&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
(added)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
Wed Feb  6 15:17:49 2013
@@ -0,0 +1,75 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.fuzzyml;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.io.*;
+
+/** This class represents a ByteReceiver that passes
+* decoded characters on to a supplied CharacterReceiver.
+*/
+public class DecodingByteReceiver extends ByteReceiver
+{
+  protected final CharacterReceiver charReceiver;
+  protected final String charSet;
+  
+  public DecodingByteReceiver(int chunkSize, String charSet, CharacterReceiver charReceiver)
+    throws IOException
+  {
+    super();
+    this.charSet = charSet;
+    this.charReceiver = charReceiver;
+  }
+  
+  /** Set the input stream.  The input stream must be
+  * at the point where the bytes being received would start.
+  * The stream is expected to be closed by the caller, when
+  * the operations are all done.
+  */
+  @Override
+  public void setInputStream(InputStream is)
+    throws IOException
+  {
+    super.setInputStream(is);
+    // Create a reader based on the encoding and the input stream
+    Reader reader = new InputStreamReader(is,charSet);
+    charReceiver.setReader(reader);
+  }
+
+  /** Receive a byte stream and process up to chunksize bytes,
+  *@return true if end reached.
+  */
+  @Override
+  public boolean dealWithBytes()
+    throws IOException, ManifoldCFException
+  {
+    return charReceiver.dealWithCharacters();
+  }
+  
+  /** Finish up all processing.
+  */
+  @Override
+  public void finishUp()
+    throws ManifoldCFException
+  {
+    super.finishUp();
+    charReceiver.finishUp();
+  }
+
+}

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/DecodingByteReceiver.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/EncodingDetector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/EncodingDetector.java?rev=1443009&r1=1443008&r2=1443009&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/EncodingDetector.java
(original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/EncodingDetector.java
Wed Feb  6 15:17:49 2013
@@ -20,32 +20,20 @@ package org.apache.manifoldcf.core.fuzzy
 
 import org.apache.manifoldcf.core.interfaces.*;
 
-/** This interface represents a receiver for bytes.
-* Implementers of this interface will accept documents a byte at a time,
-* AFTER an encoding has been set.
+/** This interface represents an encoding detector.
+* Implementers of this interface receive a starting encoding before
+* any other activity takes place, and then allow an updated encoding
+* to be retrieved once the activity is complete.
 */
-public abstract class EncodingDetector extends ByteReceiver
+public interface EncodingDetector
 {
-  protected String currentEncoding = null;
-  
-  /** Constructor */
-  public EncodingDetector(int chunkSize)
-  {
-    super(chunkSize);
-  }
 
   /** Accept a starting encoding value.
   */
-  public void setEncoding(String encoding)
-  {
-    currentEncoding = encoding;
-  }
+  public void setEncoding(String encoding);
   
   /** Read out the detected encoding, when finished.
   */
-  public String getEncoding()
-  {
-    return currentEncoding;
-  }
+  public String getEncoding();
   
 }

Added: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java?rev=1443009&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
(added)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
Wed Feb  6 15:17:49 2013
@@ -0,0 +1,76 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.fuzzyml;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.core.system.Logging;
+import java.util.*;
+
+/** This class represents the ability to pick an encoding out of a <?...?> preamble.
+*/
+public class PretagEncodingDetector extends PretagParseState implements EncodingDetector
+{
+  protected String currentEncoding = null;
+
+  /** Constructor.  Pass in the post-preamble character receiver.
+  * 
+  */
+  public PretagEncodingDetector(CharacterReceiver postPreambleReceiver)
+  {
+    super(postPreambleReceiver);
+  }
+
+  /** Accept a starting encoding value.
+  */
+  @Override
+  public void setEncoding(String encoding)
+  {
+    currentEncoding = encoding;
+  }
+  
+  /** Read out the detected encoding, when finished.
+  */
+  @Override
+  public String getEncoding()
+  {
+    return currentEncoding;
+  }
+
+  /** Receive a pretag.
+  */
+  @Override
+  protected void notePretag(String tagName, Map<String,String> attributes)
+    throws ManifoldCFException
+  {
+    if (tagName.equals("xml"))
+    {
+      String newEncoding = attributes.get("encoding");
+      if (newEncoding != null)
+      {
+        // Here we can do something fancy, like override the old encoding only if it
+        // has the same basic structure as the original encoding; e.g. ignore 8-bit
+        // encodings if the originally specified one is 16-bit etc.
+        // MHL
+        currentEncoding = newEncoding;
+      }
+    }
+  }
+
+
+}

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagEncodingDetector.java
------------------------------------------------------------------------------
    svn:keywords = Id

Added: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java?rev=1443009&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
(added)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
Wed Feb  6 15:17:49 2013
@@ -0,0 +1,59 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.fuzzyml;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.core.system.Logging;
+import java.util.*;
+
+/** This class represents the ability to parse <?...?> preamble tags.
+*/
+public class PretagParseState extends SingleCharacterReceiver
+{
+  protected final CharacterReceiver postPreambleReceiver;
+
+  /** Constructor.  Pass in the post-preamble character receiver.
+  * 
+  */
+  public PretagParseState(CharacterReceiver postPreambleReceiver)
+  {
+    // Small buffer - preambles are short
+    super(1024);
+    this.postPreambleReceiver = postPreambleReceiver;
+  }
+
+  /** Receive a byte.
+  * @return true if done.
+  */
+  @Override
+  public boolean dealWithCharacter(char c)
+    throws ManifoldCFException
+  {
+    // MHL
+    return true;
+  }
+  
+  protected void notePretag(String tagName, Map<String,String> attributes)
+    throws ManifoldCFException
+  {
+    Logging.misc.debug(" Saw pretag '"+tagName+"'");
+  }
+
+
+}

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/PretagParseState.java
------------------------------------------------------------------------------
    svn:keywords = Id

Added: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java?rev=1443009&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
(added)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
Wed Feb  6 15:17:49 2013
@@ -0,0 +1,61 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.fuzzyml;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.io.*;
+
+/** This class represents a receiver for a series of single bytes.
+*/
+public abstract class SingleByteReceiver extends ByteReceiver
+{
+  protected final byte[] byteBuffer;
+  
+  /** Constructor */
+  public SingleByteReceiver(int chunkSize)
+  {
+    byteBuffer = new byte[chunkSize];
+  }
+  
+  /** Read the byte stream and process up to chunksize bytes,
+  *@return true if end reached.
+  */
+  @Override
+  public boolean dealWithBytes()
+    throws IOException, ManifoldCFException
+  {
+    int amt = inputStream.read(byteBuffer);
+    if (amt == -1)
+      return true;
+    for (int i = 0; i < amt; i++)
+    {
+      if (dealWithByte(byteBuffer[i]))
+        return true;
+    }
+    return false;
+  }
+  
+  /** Receive a byte.
+  *@return true to stop further processing.
+  */
+  public abstract boolean dealWithByte(byte b)
+    throws ManifoldCFException;
+
+
+}

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleByteReceiver.java
------------------------------------------------------------------------------
    svn:keywords = Id

Added: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java?rev=1443009&view=auto
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
(added)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
Wed Feb  6 15:17:49 2013
@@ -0,0 +1,61 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.fuzzyml;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.io.*;
+
+/** This interface represents a receiver for a sequence of individual characters.
+*/
+public abstract class SingleCharacterReceiver extends CharacterReceiver
+{
+  protected final char[] charBuffer;
+  
+  /** Constructor.
+  */
+  public SingleCharacterReceiver(int chunkSize)
+  {
+    charBuffer = new char[chunkSize];
+  }
+  
+  /** Receive a set of characters; process one chunksize worth.
+  *@return true if done.
+  */
+  @Override
+  public boolean dealWithCharacters()
+    throws IOException, ManifoldCFException
+  {
+    int amt = reader.read(charBuffer);
+    if (amt == -1)
+      return true;
+    for (int i = 0; i < amt; i++)
+    {
+      if (dealWithCharacter(charBuffer[i]))
+        return true;
+    }
+    return false;
+  }
+  
+  /** Receive a byte.
+  * @return true if done.
+  */
+  public abstract boolean dealWithCharacter(char c)
+    throws ManifoldCFException;
+  
+}

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/SingleCharacterReceiver.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java?rev=1443009&r1=1443008&r2=1443009&view=diff
==============================================================================
--- manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
(original)
+++ manifoldcf/branches/CONNECTORS-633/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/TagParseState.java
Wed Feb  6 15:17:49 2013
@@ -23,7 +23,7 @@ import org.apache.manifoldcf.core.system
 import java.util.*;
 
 /** This class represents the basic, outermost tag parsing state. */
-public class TagParseState extends CharacterReceiver
+public class TagParseState extends SingleCharacterReceiver
 {
   protected static final int TAGPARSESTATE_NORMAL = 0;
   protected static final int TAGPARSESTATE_SAWLEFTBRACKET = 1;
@@ -65,6 +65,7 @@ public class TagParseState extends Chara
 
   public TagParseState()
   {
+    super(65536);
   }
 
   /** Deal with a character.  No exceptions are allowed, since those would represent
@@ -383,13 +384,6 @@ public class TagParseState extends Chara
   {
   }
   
-  @Override
-  public void finishUp()
-    throws ManifoldCFException
-  {
-    // Does nothing
-  }
-
   /** Decode body text */
   protected static String bodyDecode(String input)
   {



Mime
View raw message