hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r399509 [4/5] - in /lucene/hadoop/trunk: ./ bin/ src/c++/ src/c++/librecordio/ src/c++/librecordio/test/ src/java/org/apache/hadoop/record/ src/java/org/apache/hadoop/record/compiler/ src/java/org/apache/hadoop/record/compiler/generated/ sr...
Date Thu, 04 May 2006 02:04:07 GMT
Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java Wed May  3 19:04:01 2006
@@ -0,0 +1,841 @@
+/* Generated By:JavaCC: Do not edit this line. RccTokenManager.java */
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+import org.apache.hadoop.record.compiler.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class RccTokenManager implements RccConstants
+{
+  public  java.io.PrintStream debugStream = System.out;
+  public  void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
+private final int jjMoveStringLiteralDfa0_1()
+{
+   return jjMoveNfa_1(1, 0);
+}
+private final void jjCheckNAdd(int state)
+{
+   if (jjrounds[state] != jjround)
+   {
+      jjstateSet[jjnewStateCnt++] = state;
+      jjrounds[state] = jjround;
+   }
+}
+private final void jjAddStates(int start, int end)
+{
+   do {
+      jjstateSet[jjnewStateCnt++] = jjnextStates[start];
+   } while (start++ != end);
+}
+private final void jjCheckNAddTwoStates(int state1, int state2)
+{
+   jjCheckNAdd(state1);
+   jjCheckNAdd(state2);
+}
+private final void jjCheckNAddStates(int start, int end)
+{
+   do {
+      jjCheckNAdd(jjnextStates[start]);
+   } while (start++ != end);
+}
+private final void jjCheckNAddStates(int start)
+{
+   jjCheckNAdd(jjnextStates[start]);
+   jjCheckNAdd(jjnextStates[start + 1]);
+}
+private final int jjMoveNfa_1(int startState, int curPos)
+{
+   int[] nextStates;
+   int startsAt = 0;
+   jjnewStateCnt = 5;
+   int i = 1;
+   jjstateSet[0] = startState;
+   int j, kind = 0x7fffffff;
+   for (;;)
+   {
+      if (++jjround == 0x7fffffff)
+         ReInitRounds();
+      if (curChar < 64)
+      {
+         long l = 1L << curChar;
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               case 1:
+                  if ((0x2400L & l) != 0L)
+                  {
+                     if (kind > 6)
+                        kind = 6;
+                  }
+                  else if (curChar == 47)
+                     jjstateSet[jjnewStateCnt++] = 0;
+                  if (curChar == 13)
+                     jjstateSet[jjnewStateCnt++] = 3;
+                  break;
+               case 0:
+                  if (curChar == 47 && kind > 6)
+                     kind = 6;
+                  break;
+               case 2:
+                  if ((0x2400L & l) != 0L && kind > 6)
+                     kind = 6;
+                  break;
+               case 3:
+                  if (curChar == 10 && kind > 6)
+                     kind = 6;
+                  break;
+               case 4:
+                  if (curChar == 13)
+                     jjstateSet[jjnewStateCnt++] = 3;
+                  break;
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      else if (curChar < 128)
+      {
+         long l = 1L << (curChar & 077);
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      else
+      {
+         int i2 = (curChar & 0xff) >> 6;
+         long l2 = 1L << (curChar & 077);
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      if (kind != 0x7fffffff)
+      {
+         jjmatchedKind = kind;
+         jjmatchedPos = curPos;
+         kind = 0x7fffffff;
+      }
+      ++curPos;
+      if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt)))
+         return curPos;
+      try { curChar = input_stream.readChar(); }
+      catch(java.io.IOException e) { return curPos; }
+   }
+}
+private final int jjStopStringLiteralDfa_0(int pos, long active0)
+{
+   switch (pos)
+   {
+      case 0:
+         if ((active0 & 0xfff800L) != 0L)
+         {
+            jjmatchedKind = 32;
+            return 4;
+         }
+         return -1;
+      case 1:
+         if ((active0 & 0xfff800L) != 0L)
+         {
+            jjmatchedKind = 32;
+            jjmatchedPos = 1;
+            return 4;
+         }
+         return -1;
+      case 2:
+         if ((active0 & 0x7ef800L) != 0L)
+         {
+            jjmatchedKind = 32;
+            jjmatchedPos = 2;
+            return 4;
+         }
+         if ((active0 & 0x810000L) != 0L)
+            return 4;
+         return -1;
+      case 3:
+         if ((active0 & 0x24000L) != 0L)
+            return 4;
+         if ((active0 & 0x7cb800L) != 0L)
+         {
+            jjmatchedKind = 32;
+            jjmatchedPos = 3;
+            return 4;
+         }
+         return -1;
+      case 4:
+         if ((active0 & 0x41000L) != 0L)
+            return 4;
+         if ((active0 & 0x78a800L) != 0L)
+         {
+            jjmatchedKind = 32;
+            jjmatchedPos = 4;
+            return 4;
+         }
+         return -1;
+      case 5:
+         if ((active0 & 0x680800L) != 0L)
+            return 4;
+         if ((active0 & 0x10a000L) != 0L)
+         {
+            jjmatchedKind = 32;
+            jjmatchedPos = 5;
+            return 4;
+         }
+         return -1;
+      default :
+         return -1;
+   }
+}
+private final int jjStartNfa_0(int pos, long active0)
+{
+   return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1);
+}
+private final int jjStopAtPos(int pos, int kind)
+{
+   jjmatchedKind = kind;
+   jjmatchedPos = pos;
+   return pos + 1;
+}
+private final int jjStartNfaWithStates_0(int pos, int kind, int state)
+{
+   jjmatchedKind = kind;
+   jjmatchedPos = pos;
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) { return pos + 1; }
+   return jjMoveNfa_0(state, pos + 1);
+}
+private final int jjMoveStringLiteralDfa0_0()
+{
+   switch(curChar)
+   {
+      case 44:
+         return jjStopAtPos(0, 29);
+      case 46:
+         return jjStopAtPos(0, 30);
+      case 47:
+         return jjMoveStringLiteralDfa1_0(0x120L);
+      case 59:
+         return jjStopAtPos(0, 28);
+      case 60:
+         return jjStopAtPos(0, 26);
+      case 62:
+         return jjStopAtPos(0, 27);
+      case 98:
+         return jjMoveStringLiteralDfa1_0(0x20c000L);
+      case 99:
+         return jjMoveStringLiteralDfa1_0(0x1000L);
+      case 100:
+         return jjMoveStringLiteralDfa1_0(0x80000L);
+      case 102:
+         return jjMoveStringLiteralDfa1_0(0x40000L);
+      case 105:
+         return jjMoveStringLiteralDfa1_0(0x12000L);
+      case 108:
+         return jjMoveStringLiteralDfa1_0(0x20000L);
+      case 109:
+         return jjMoveStringLiteralDfa1_0(0x800800L);
+      case 117:
+         return jjMoveStringLiteralDfa1_0(0x100000L);
+      case 118:
+         return jjMoveStringLiteralDfa1_0(0x400000L);
+      case 123:
+         return jjStopAtPos(0, 24);
+      case 125:
+         return jjStopAtPos(0, 25);
+      default :
+         return jjMoveNfa_0(0, 0);
+   }
+}
+private final int jjMoveStringLiteralDfa1_0(long active0)
+{
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(0, active0);
+      return 1;
+   }
+   switch(curChar)
+   {
+      case 42:
+         if ((active0 & 0x100L) != 0L)
+            return jjStopAtPos(1, 8);
+         break;
+      case 47:
+         if ((active0 & 0x20L) != 0L)
+            return jjStopAtPos(1, 5);
+         break;
+      case 97:
+         return jjMoveStringLiteralDfa2_0(active0, 0x800000L);
+      case 101:
+         return jjMoveStringLiteralDfa2_0(active0, 0x400000L);
+      case 108:
+         return jjMoveStringLiteralDfa2_0(active0, 0x41000L);
+      case 110:
+         return jjMoveStringLiteralDfa2_0(active0, 0x12000L);
+      case 111:
+         return jjMoveStringLiteralDfa2_0(active0, 0xa8800L);
+      case 115:
+         return jjMoveStringLiteralDfa2_0(active0, 0x100000L);
+      case 117:
+         return jjMoveStringLiteralDfa2_0(active0, 0x200000L);
+      case 121:
+         return jjMoveStringLiteralDfa2_0(active0, 0x4000L);
+      default :
+         break;
+   }
+   return jjStartNfa_0(0, active0);
+}
+private final int jjMoveStringLiteralDfa2_0(long old0, long active0)
+{
+   if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(0, old0); 
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(1, active0);
+      return 2;
+   }
+   switch(curChar)
+   {
+      case 97:
+         return jjMoveStringLiteralDfa3_0(active0, 0x1000L);
+      case 99:
+         return jjMoveStringLiteralDfa3_0(active0, 0x402000L);
+      case 100:
+         return jjMoveStringLiteralDfa3_0(active0, 0x800L);
+      case 102:
+         return jjMoveStringLiteralDfa3_0(active0, 0x200000L);
+      case 110:
+         return jjMoveStringLiteralDfa3_0(active0, 0x20000L);
+      case 111:
+         return jjMoveStringLiteralDfa3_0(active0, 0x48000L);
+      case 112:
+         if ((active0 & 0x800000L) != 0L)
+            return jjStartNfaWithStates_0(2, 23, 4);
+         break;
+      case 116:
+         if ((active0 & 0x10000L) != 0L)
+            return jjStartNfaWithStates_0(2, 16, 4);
+         return jjMoveStringLiteralDfa3_0(active0, 0x104000L);
+      case 117:
+         return jjMoveStringLiteralDfa3_0(active0, 0x80000L);
+      default :
+         break;
+   }
+   return jjStartNfa_0(1, active0);
+}
+private final int jjMoveStringLiteralDfa3_0(long old0, long active0)
+{
+   if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(1, old0); 
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(2, active0);
+      return 3;
+   }
+   switch(curChar)
+   {
+      case 97:
+         return jjMoveStringLiteralDfa4_0(active0, 0x40000L);
+      case 98:
+         return jjMoveStringLiteralDfa4_0(active0, 0x80000L);
+      case 101:
+         if ((active0 & 0x4000L) != 0L)
+            return jjStartNfaWithStates_0(3, 14, 4);
+         break;
+      case 102:
+         return jjMoveStringLiteralDfa4_0(active0, 0x200000L);
+      case 103:
+         if ((active0 & 0x20000L) != 0L)
+            return jjStartNfaWithStates_0(3, 17, 4);
+         break;
+      case 108:
+         return jjMoveStringLiteralDfa4_0(active0, 0xa000L);
+      case 114:
+         return jjMoveStringLiteralDfa4_0(active0, 0x100000L);
+      case 115:
+         return jjMoveStringLiteralDfa4_0(active0, 0x1000L);
+      case 116:
+         return jjMoveStringLiteralDfa4_0(active0, 0x400000L);
+      case 117:
+         return jjMoveStringLiteralDfa4_0(active0, 0x800L);
+      default :
+         break;
+   }
+   return jjStartNfa_0(2, active0);
+}
+private final int jjMoveStringLiteralDfa4_0(long old0, long active0)
+{
+   if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(2, old0); 
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(3, active0);
+      return 4;
+   }
+   switch(curChar)
+   {
+      case 101:
+         return jjMoveStringLiteralDfa5_0(active0, 0x208000L);
+      case 105:
+         return jjMoveStringLiteralDfa5_0(active0, 0x100000L);
+      case 108:
+         return jjMoveStringLiteralDfa5_0(active0, 0x80800L);
+      case 111:
+         return jjMoveStringLiteralDfa5_0(active0, 0x400000L);
+      case 115:
+         if ((active0 & 0x1000L) != 0L)
+            return jjStartNfaWithStates_0(4, 12, 4);
+         break;
+      case 116:
+         if ((active0 & 0x40000L) != 0L)
+            return jjStartNfaWithStates_0(4, 18, 4);
+         break;
+      case 117:
+         return jjMoveStringLiteralDfa5_0(active0, 0x2000L);
+      default :
+         break;
+   }
+   return jjStartNfa_0(3, active0);
+}
+private final int jjMoveStringLiteralDfa5_0(long old0, long active0)
+{
+   if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(3, old0); 
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(4, active0);
+      return 5;
+   }
+   switch(curChar)
+   {
+      case 97:
+         return jjMoveStringLiteralDfa6_0(active0, 0x8000L);
+      case 100:
+         return jjMoveStringLiteralDfa6_0(active0, 0x2000L);
+      case 101:
+         if ((active0 & 0x800L) != 0L)
+            return jjStartNfaWithStates_0(5, 11, 4);
+         else if ((active0 & 0x80000L) != 0L)
+            return jjStartNfaWithStates_0(5, 19, 4);
+         break;
+      case 110:
+         return jjMoveStringLiteralDfa6_0(active0, 0x100000L);
+      case 114:
+         if ((active0 & 0x200000L) != 0L)
+            return jjStartNfaWithStates_0(5, 21, 4);
+         else if ((active0 & 0x400000L) != 0L)
+            return jjStartNfaWithStates_0(5, 22, 4);
+         break;
+      default :
+         break;
+   }
+   return jjStartNfa_0(4, active0);
+}
+private final int jjMoveStringLiteralDfa6_0(long old0, long active0)
+{
+   if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(4, old0); 
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(5, active0);
+      return 6;
+   }
+   switch(curChar)
+   {
+      case 101:
+         if ((active0 & 0x2000L) != 0L)
+            return jjStartNfaWithStates_0(6, 13, 4);
+         break;
+      case 103:
+         if ((active0 & 0x100000L) != 0L)
+            return jjStartNfaWithStates_0(6, 20, 4);
+         break;
+      case 110:
+         if ((active0 & 0x8000L) != 0L)
+            return jjStartNfaWithStates_0(6, 15, 4);
+         break;
+      default :
+         break;
+   }
+   return jjStartNfa_0(5, active0);
+}
+static final long[] jjbitVec0 = {
+   0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
+};
+private final int jjMoveNfa_0(int startState, int curPos)
+{
+   int[] nextStates;
+   int startsAt = 0;
+   jjnewStateCnt = 5;
+   int i = 1;
+   jjstateSet[0] = startState;
+   int j, kind = 0x7fffffff;
+   for (;;)
+   {
+      if (++jjround == 0x7fffffff)
+         ReInitRounds();
+      if (curChar < 64)
+      {
+         long l = 1L << curChar;
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               case 0:
+                  if (curChar == 34)
+                     jjCheckNAdd(1);
+                  break;
+               case 1:
+                  if ((0xfffffffbffffffffL & l) != 0L)
+                     jjCheckNAddTwoStates(1, 2);
+                  break;
+               case 2:
+                  if (curChar == 34 && kind > 31)
+                     kind = 31;
+                  break;
+               case 4:
+                  if ((0x3ff000000000000L & l) == 0L)
+                     break;
+                  if (kind > 32)
+                     kind = 32;
+                  jjstateSet[jjnewStateCnt++] = 4;
+                  break;
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      else if (curChar < 128)
+      {
+         long l = 1L << (curChar & 077);
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               case 0:
+                  if ((0x7fffffe07fffffeL & l) == 0L)
+                     break;
+                  if (kind > 32)
+                     kind = 32;
+                  jjCheckNAdd(4);
+                  break;
+               case 1:
+                  jjAddStates(0, 1);
+                  break;
+               case 4:
+                  if ((0x7fffffe87fffffeL & l) == 0L)
+                     break;
+                  if (kind > 32)
+                     kind = 32;
+                  jjCheckNAdd(4);
+                  break;
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      else
+      {
+         int i2 = (curChar & 0xff) >> 6;
+         long l2 = 1L << (curChar & 077);
+         MatchLoop: do
+         {
+            switch(jjstateSet[--i])
+            {
+               case 1:
+                  if ((jjbitVec0[i2] & l2) != 0L)
+                     jjAddStates(0, 1);
+                  break;
+               default : break;
+            }
+         } while(i != startsAt);
+      }
+      if (kind != 0x7fffffff)
+      {
+         jjmatchedKind = kind;
+         jjmatchedPos = curPos;
+         kind = 0x7fffffff;
+      }
+      ++curPos;
+      if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt)))
+         return curPos;
+      try { curChar = input_stream.readChar(); }
+      catch(java.io.IOException e) { return curPos; }
+   }
+}
+private final int jjMoveStringLiteralDfa0_2()
+{
+   switch(curChar)
+   {
+      case 42:
+         return jjMoveStringLiteralDfa1_2(0x200L);
+      default :
+         return 1;
+   }
+}
+private final int jjMoveStringLiteralDfa1_2(long active0)
+{
+   try { curChar = input_stream.readChar(); }
+   catch(java.io.IOException e) {
+      return 1;
+   }
+   switch(curChar)
+   {
+      case 47:
+         if ((active0 & 0x200L) != 0L)
+            return jjStopAtPos(1, 9);
+         break;
+      default :
+         return 2;
+   }
+   return 2;
+}
+static final int[] jjnextStates = {
+   1, 2, 
+};
+public static final String[] jjstrLiteralImages = {
+"", null, null, null, null, null, null, null, null, null, null, 
+"\155\157\144\165\154\145", "\143\154\141\163\163", "\151\156\143\154\165\144\145", "\142\171\164\145", 
+"\142\157\157\154\145\141\156", "\151\156\164", "\154\157\156\147", "\146\154\157\141\164", 
+"\144\157\165\142\154\145", "\165\163\164\162\151\156\147", "\142\165\146\146\145\162", 
+"\166\145\143\164\157\162", "\155\141\160", "\173", "\175", "\74", "\76", "\73", "\54", "\56", null, null, };
+public static final String[] lexStateNames = {
+   "DEFAULT", 
+   "WithinOneLineComment", 
+   "WithinMultiLineComment", 
+};
+public static final int[] jjnewLexState = {
+   -1, -1, -1, -1, -1, 1, 0, -1, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
+   -1, -1, -1, -1, -1, -1, -1, -1, 
+};
+static final long[] jjtoToken = {
+   0x1fffff801L, 
+};
+static final long[] jjtoSkip = {
+   0x37eL, 
+};
+static final long[] jjtoSpecial = {
+   0x360L, 
+};
+static final long[] jjtoMore = {
+   0x480L, 
+};
+protected SimpleCharStream input_stream;
+private final int[] jjrounds = new int[5];
+private final int[] jjstateSet = new int[10];
+StringBuffer image;
+int jjimageLen;
+int lengthOfMatch;
+protected char curChar;
+public RccTokenManager(SimpleCharStream stream){
+   if (SimpleCharStream.staticFlag)
+      throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+   input_stream = stream;
+}
+public RccTokenManager(SimpleCharStream stream, int lexState){
+   this(stream);
+   SwitchTo(lexState);
+}
+public void ReInit(SimpleCharStream stream)
+{
+   jjmatchedPos = jjnewStateCnt = 0;
+   curLexState = defaultLexState;
+   input_stream = stream;
+   ReInitRounds();
+}
+private final void ReInitRounds()
+{
+   int i;
+   jjround = 0x80000001;
+   for (i = 5; i-- > 0;)
+      jjrounds[i] = 0x80000000;
+}
+public void ReInit(SimpleCharStream stream, int lexState)
+{
+   ReInit(stream);
+   SwitchTo(lexState);
+}
+public void SwitchTo(int lexState)
+{
+   if (lexState >= 3 || lexState < 0)
+      throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE);
+   else
+      curLexState = lexState;
+}
+
+protected Token jjFillToken()
+{
+   Token t = Token.newToken(jjmatchedKind);
+   t.kind = jjmatchedKind;
+   String im = jjstrLiteralImages[jjmatchedKind];
+   t.image = (im == null) ? input_stream.GetImage() : im;
+   t.beginLine = input_stream.getBeginLine();
+   t.beginColumn = input_stream.getBeginColumn();
+   t.endLine = input_stream.getEndLine();
+   t.endColumn = input_stream.getEndColumn();
+   return t;
+}
+
+int curLexState = 0;
+int defaultLexState = 0;
+int jjnewStateCnt;
+int jjround;
+int jjmatchedPos;
+int jjmatchedKind;
+
+public Token getNextToken() 
+{
+  int kind;
+  Token specialToken = null;
+  Token matchedToken;
+  int curPos = 0;
+
+  EOFLoop :
+  for (;;)
+  {   
+   try   
+   {     
+      curChar = input_stream.BeginToken();
+   }     
+   catch(java.io.IOException e)
+   {        
+      jjmatchedKind = 0;
+      matchedToken = jjFillToken();
+      matchedToken.specialToken = specialToken;
+      return matchedToken;
+   }
+   image = null;
+   jjimageLen = 0;
+
+   for (;;)
+   {
+     switch(curLexState)
+     {
+       case 0:
+         try { input_stream.backup(0);
+            while (curChar <= 32 && (0x100002600L & (1L << curChar)) != 0L)
+               curChar = input_stream.BeginToken();
+         }
+         catch (java.io.IOException e1) { continue EOFLoop; }
+         jjmatchedKind = 0x7fffffff;
+         jjmatchedPos = 0;
+         curPos = jjMoveStringLiteralDfa0_0();
+         break;
+       case 1:
+         jjmatchedKind = 0x7fffffff;
+         jjmatchedPos = 0;
+         curPos = jjMoveStringLiteralDfa0_1();
+         if (jjmatchedPos == 0 && jjmatchedKind > 7)
+         {
+            jjmatchedKind = 7;
+         }
+         break;
+       case 2:
+         jjmatchedKind = 0x7fffffff;
+         jjmatchedPos = 0;
+         curPos = jjMoveStringLiteralDfa0_2();
+         if (jjmatchedPos == 0 && jjmatchedKind > 10)
+         {
+            jjmatchedKind = 10;
+         }
+         break;
+     }
+     if (jjmatchedKind != 0x7fffffff)
+     {
+        if (jjmatchedPos + 1 < curPos)
+           input_stream.backup(curPos - jjmatchedPos - 1);
+        if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+        {
+           matchedToken = jjFillToken();
+           matchedToken.specialToken = specialToken;
+       if (jjnewLexState[jjmatchedKind] != -1)
+         curLexState = jjnewLexState[jjmatchedKind];
+           return matchedToken;
+        }
+        else if ((jjtoSkip[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+        {
+           if ((jjtoSpecial[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+           {
+              matchedToken = jjFillToken();
+              if (specialToken == null)
+                 specialToken = matchedToken;
+              else
+              {
+                 matchedToken.specialToken = specialToken;
+                 specialToken = (specialToken.next = matchedToken);
+              }
+              SkipLexicalActions(matchedToken);
+           }
+           else 
+              SkipLexicalActions(null);
+         if (jjnewLexState[jjmatchedKind] != -1)
+           curLexState = jjnewLexState[jjmatchedKind];
+           continue EOFLoop;
+        }
+        jjimageLen += jjmatchedPos + 1;
+      if (jjnewLexState[jjmatchedKind] != -1)
+        curLexState = jjnewLexState[jjmatchedKind];
+        curPos = 0;
+        jjmatchedKind = 0x7fffffff;
+        try {
+           curChar = input_stream.readChar();
+           continue;
+        }
+        catch (java.io.IOException e1) { }
+     }
+     int error_line = input_stream.getEndLine();
+     int error_column = input_stream.getEndColumn();
+     String error_after = null;
+     boolean EOFSeen = false;
+     try { input_stream.readChar(); input_stream.backup(1); }
+     catch (java.io.IOException e1) {
+        EOFSeen = true;
+        error_after = curPos <= 1 ? "" : input_stream.GetImage();
+        if (curChar == '\n' || curChar == '\r') {
+           error_line++;
+           error_column = 0;
+        }
+        else
+           error_column++;
+     }
+     if (!EOFSeen) {
+        input_stream.backup(1);
+        error_after = curPos <= 1 ? "" : input_stream.GetImage();
+     }
+     throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
+   }
+  }
+}
+
+void SkipLexicalActions(Token matchedToken)
+{
+   switch(jjmatchedKind)
+   {
+      default :
+         break;
+   }
+}
+}

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java Wed May  3 19:04:01 2006
@@ -0,0 +1,455 @@
+/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.0 */
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+/**
+ * An implementation of interface CharStream, where the stream is assumed to
+ * contain only ASCII characters (without unicode processing).
+ */
+
+public class SimpleCharStream
+{
+  public static final boolean staticFlag = false;
+  int bufsize;
+  int available;
+  int tokenBegin;
+  public int bufpos = -1;
+  protected int bufline[];
+  protected int bufcolumn[];
+
+  protected int column = 0;
+  protected int line = 1;
+
+  protected boolean prevCharIsCR = false;
+  protected boolean prevCharIsLF = false;
+
+  protected java.io.Reader inputStream;
+
+  protected char[] buffer;
+  protected int maxNextCharInd = 0;
+  protected int inBuf = 0;
+  protected int tabSize = 8;
+
+  protected void setTabSize(int i) { tabSize = i; }
+  protected int getTabSize(int i) { return tabSize; }
+
+
+  protected void ExpandBuff(boolean wrapAround)
+  {
+     char[] newbuffer = new char[bufsize + 2048];
+     int newbufline[] = new int[bufsize + 2048];
+     int newbufcolumn[] = new int[bufsize + 2048];
+
+     try
+     {
+        if (wrapAround)
+        {
+           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+           System.arraycopy(buffer, 0, newbuffer,
+                                             bufsize - tokenBegin, bufpos);
+           buffer = newbuffer;
+
+           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+           System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
+           bufline = newbufline;
+
+           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+           System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
+           bufcolumn = newbufcolumn;
+
+           maxNextCharInd = (bufpos += (bufsize - tokenBegin));
+        }
+        else
+        {
+           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+           buffer = newbuffer;
+
+           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+           bufline = newbufline;
+
+           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+           bufcolumn = newbufcolumn;
+
+           maxNextCharInd = (bufpos -= tokenBegin);
+        }
+     }
+     catch (Throwable t)
+     {
+        throw new Error(t.getMessage());
+     }
+
+
+     bufsize += 2048;
+     available = bufsize;
+     tokenBegin = 0;
+  }
+
+  protected void FillBuff() throws java.io.IOException
+  {
+     if (maxNextCharInd == available)
+     {
+        if (available == bufsize)
+        {
+           if (tokenBegin > 2048)
+           {
+              bufpos = maxNextCharInd = 0;
+              available = tokenBegin;
+           }
+           else if (tokenBegin < 0)
+              bufpos = maxNextCharInd = 0;
+           else
+              ExpandBuff(false);
+        }
+        else if (available > tokenBegin)
+           available = bufsize;
+        else if ((tokenBegin - available) < 2048)
+           ExpandBuff(true);
+        else
+           available = tokenBegin;
+     }
+
+     int i;
+     try {
+        if ((i = inputStream.read(buffer, maxNextCharInd,
+                                    available - maxNextCharInd)) == -1)
+        {
+           inputStream.close();
+           throw new java.io.IOException();
+        }
+        else
+           maxNextCharInd += i;
+        return;
+     }
+     catch(java.io.IOException e) {
+        --bufpos;
+        backup(0);
+        if (tokenBegin == -1)
+           tokenBegin = bufpos;
+        throw e;
+     }
+  }
+
+  public char BeginToken() throws java.io.IOException
+  {
+     tokenBegin = -1;
+     char c = readChar();
+     tokenBegin = bufpos;
+
+     return c;
+  }
+
+  protected void UpdateLineColumn(char c)
+  {
+     column++;
+
+     if (prevCharIsLF)
+     {
+        prevCharIsLF = false;
+        line += (column = 1);
+     }
+     else if (prevCharIsCR)
+     {
+        prevCharIsCR = false;
+        if (c == '\n')
+        {
+           prevCharIsLF = true;
+        }
+        else
+           line += (column = 1);
+     }
+
+     switch (c)
+     {
+        case '\r' :
+           prevCharIsCR = true;
+           break;
+        case '\n' :
+           prevCharIsLF = true;
+           break;
+        case '\t' :
+           column--;
+           column += (tabSize - (column % tabSize));
+           break;
+        default :
+           break;
+     }
+
+     bufline[bufpos] = line;
+     bufcolumn[bufpos] = column;
+  }
+
+  public char readChar() throws java.io.IOException
+  {
+     if (inBuf > 0)
+     {
+        --inBuf;
+
+        if (++bufpos == bufsize)
+           bufpos = 0;
+
+        return buffer[bufpos];
+     }
+
+     if (++bufpos >= maxNextCharInd)
+        FillBuff();
+
+     char c = buffer[bufpos];
+
+     UpdateLineColumn(c);
+     return (c);
+  }
+
+  /**
+   * @deprecated 
+   * @see #getEndColumn
+   */
+
+  public int getColumn() {
+     return bufcolumn[bufpos];
+  }
+
+  /**
+   * @deprecated 
+   * @see #getEndLine
+   */
+
+  public int getLine() {
+     return bufline[bufpos];
+  }
+
+  public int getEndColumn() {
+     return bufcolumn[bufpos];
+  }
+
+  public int getEndLine() {
+     return bufline[bufpos];
+  }
+
+  public int getBeginColumn() {
+     return bufcolumn[tokenBegin];
+  }
+
+  public int getBeginLine() {
+     return bufline[tokenBegin];
+  }
+
+  public void backup(int amount) {
+
+    inBuf += amount;
+    if ((bufpos -= amount) < 0)
+       bufpos += bufsize;
+  }
+
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    available = bufsize = buffersize;
+    buffer = new char[buffersize];
+    bufline = new int[buffersize];
+    bufcolumn = new int[buffersize];
+  }
+
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+                          int startcolumn)
+  {
+     this(dstream, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.Reader dstream)
+  {
+     this(dstream, 1, 1, 4096);
+  }
+  public void ReInit(java.io.Reader dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    if (buffer == null || buffersize != buffer.length)
+    {
+      available = bufsize = buffersize;
+      buffer = new char[buffersize];
+      bufline = new int[buffersize];
+      bufcolumn = new int[buffersize];
+    }
+    prevCharIsLF = prevCharIsCR = false;
+    tokenBegin = inBuf = maxNextCharInd = 0;
+    bufpos = -1;
+  }
+
+  public void ReInit(java.io.Reader dstream, int startline,
+                     int startcolumn)
+  {
+     ReInit(dstream, startline, startcolumn, 4096);
+  }
+
+  public void ReInit(java.io.Reader dstream)
+  {
+     ReInit(dstream, 1, 1, 4096);
+  }
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+  int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+     this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+     this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+     this(dstream, encoding, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+                          int startcolumn)
+  {
+     this(dstream, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+     this(dstream, encoding, 1, 1, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream)
+  {
+     this(dstream, 1, 1, 4096);
+  }
+
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  public void ReInit(java.io.InputStream dstream, int startline,
+                          int startcolumn, int buffersize)
+  {
+     ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(dstream, encoding, 1, 1, 4096);
+  }
+
+  public void ReInit(java.io.InputStream dstream)
+  {
+     ReInit(dstream, 1, 1, 4096);
+  }
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                     int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(dstream, encoding, startline, startcolumn, 4096);
+  }
+  public void ReInit(java.io.InputStream dstream, int startline,
+                     int startcolumn)
+  {
+     ReInit(dstream, startline, startcolumn, 4096);
+  }
+  public String GetImage()
+  {
+     if (bufpos >= tokenBegin)
+        return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
+     else
+        return new String(buffer, tokenBegin, bufsize - tokenBegin) +
+                              new String(buffer, 0, bufpos + 1);
+  }
+
+  public char[] GetSuffix(int len)
+  {
+     char[] ret = new char[len];
+
+     if ((bufpos + 1) >= len)
+        System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
+     else
+     {
+        System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
+                                                          len - bufpos - 1);
+        System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
+     }
+
+     return ret;
+  }
+
+  public void Done()
+  {
+     buffer = null;
+     bufline = null;
+     bufcolumn = null;
+  }
+
+  /**
+   * Method to adjust line and column numbers for the start of a token.
+   */
+  public void adjustBeginLineColumn(int newLine, int newCol)
+  {
+     int start = tokenBegin;
+     int len;
+
+     if (bufpos >= tokenBegin)
+     {
+        len = bufpos - tokenBegin + inBuf + 1;
+     }
+     else
+     {
+        len = bufsize - tokenBegin + bufpos + 1 + inBuf;
+     }
+
+     int i = 0, j = 0, k = 0;
+     int nextColDiff = 0, columnDiff = 0;
+
+     while (i < len &&
+            bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
+     {
+        bufline[j] = newLine;
+        nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
+        bufcolumn[j] = newCol + columnDiff;
+        columnDiff = nextColDiff;
+        i++;
+     } 
+
+     if (i < len)
+     {
+        bufline[j] = newLine++;
+        bufcolumn[j] = newCol + columnDiff;
+
+        while (i++ < len)
+        {
+           if (bufline[j = start % bufsize] != bufline[++start % bufsize])
+              bufline[j] = newLine++;
+           else
+              bufline[j] = newLine;
+        }
+     }
+
+     line = bufline[j];
+     column = bufcolumn[j];
+  }
+
+}

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Token.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Token.java?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Token.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Token.java Wed May  3 19:04:01 2006
@@ -0,0 +1,97 @@
+/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+/**
+ * Describes the input token stream.
+ */
+
+public class Token {
+
+  /**
+   * An integer that describes the kind of this token.  This numbering
+   * system is determined by JavaCCParser, and a table of these numbers is
+   * stored in the file ...Constants.java.
+   */
+  public int kind;
+
+  /**
+   * beginLine and beginColumn describe the position of the first character
+   * of this token; endLine and endColumn describe the position of the
+   * last character of this token.
+   */
+  public int beginLine, beginColumn, endLine, endColumn;
+
+  /**
+   * The string image of the token.
+   */
+  public String image;
+
+  /**
+   * A reference to the next regular (non-special) token from the input
+   * stream.  If this is the last token from the input stream, or if the
+   * token manager has not read tokens beyond this one, this field is
+   * set to null.  This is true only if this token is also a regular
+   * token.  Otherwise, see below for a description of the contents of
+   * this field.
+   */
+  public Token next;
+
+  /**
+   * This field is used to access special tokens that occur prior to this
+   * token, but after the immediately preceding regular (non-special) token.
+   * If there are no such special tokens, this field is set to null.
+   * When there are more than one such special token, this field refers
+   * to the last of these special tokens, which in turn refers to the next
+   * previous special token through its specialToken field, and so on
+   * until the first special token (whose specialToken field is null).
+   * The next fields of special tokens refer to other special tokens that
+   * immediately follow it (without an intervening regular token).  If there
+   * is no such token, this field is null.
+   */
+  public Token specialToken;
+
+  /**
+   * Returns the image.
+   */
+  public String toString()
+  {
+     return image;
+  }
+
+  /**
+   * Returns a new Token object, by default. However, if you want, you
+   * can create and return subclass objects based on the value of ofKind.
+   * Simply add the cases to the switch for all those special cases.
+   * For example, if you have a subclass of Token called IDToken that
+   * you want to create if ofKind is ID, simlpy add something like :
+   *
+   *    case MyParserConstants.ID : return new IDToken();
+   *
+   * to the following switch statement. Then you can cast matchedToken
+   * variable to the appropriate type and use it in your lexical actions.
+   */
+  public static final Token newToken(int ofKind)
+  {
+     switch(ofKind)
+     {
+       default : return new Token();
+     }
+  }
+
+}

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java Wed May  3 19:04:01 2006
@@ -0,0 +1,149 @@
+/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+public class TokenMgrError extends Error
+{
+   /*
+    * Ordinals for various reasons why an Error of this type can be thrown.
+    */
+
+   /**
+    * Lexical error occured.
+    */
+   static final int LEXICAL_ERROR = 0;
+
+   /**
+    * An attempt wass made to create a second instance of a static token manager.
+    */
+   static final int STATIC_LEXER_ERROR = 1;
+
+   /**
+    * Tried to change to an invalid lexical state.
+    */
+   static final int INVALID_LEXICAL_STATE = 2;
+
+   /**
+    * Detected (and bailed out of) an infinite loop in the token manager.
+    */
+   static final int LOOP_DETECTED = 3;
+
+   /**
+    * Indicates the reason why the exception is thrown. It will have
+    * one of the above 4 values.
+    */
+   int errorCode;
+
+   /**
+    * Replaces unprintable characters by their espaced (or unicode escaped)
+    * equivalents in the given string
+    */
+   protected static final String addEscapes(String str) {
+      StringBuffer retval = new StringBuffer();
+      char ch;
+      for (int i = 0; i < str.length(); i++) {
+        switch (str.charAt(i))
+        {
+           case 0 :
+              continue;
+           case '\b':
+              retval.append("\\b");
+              continue;
+           case '\t':
+              retval.append("\\t");
+              continue;
+           case '\n':
+              retval.append("\\n");
+              continue;
+           case '\f':
+              retval.append("\\f");
+              continue;
+           case '\r':
+              retval.append("\\r");
+              continue;
+           case '\"':
+              retval.append("\\\"");
+              continue;
+           case '\'':
+              retval.append("\\\'");
+              continue;
+           case '\\':
+              retval.append("\\\\");
+              continue;
+           default:
+              if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+                 String s = "0000" + Integer.toString(ch, 16);
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+              } else {
+                 retval.append(ch);
+              }
+              continue;
+        }
+      }
+      return retval.toString();
+   }
+
+   /**
+    * Returns a detailed message for the Error when it is thrown by the
+    * token manager to indicate a lexical error.
+    * Parameters : 
+    *    EOFSeen     : indicates if EOF caused the lexicl error
+    *    curLexState : lexical state in which this error occured
+    *    errorLine   : line number when the error occured
+    *    errorColumn : column number when the error occured
+    *    errorAfter  : prefix that was seen before this error occured
+    *    curchar     : the offending character
+    * Note: You can customize the lexical error message by modifying this method.
+    */
+   protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
+      return("Lexical error at line " +
+           errorLine + ", column " +
+           errorColumn + ".  Encountered: " +
+           (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
+           "after : \"" + addEscapes(errorAfter) + "\"");
+   }
+
+   /**
+    * You can also modify the body of this method to customize your error messages.
+    * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
+    * of end-users concern, so you can return something like : 
+    *
+    *     "Internal Error : Please file a bug report .... "
+    *
+    * from this method for such cases in the release version of your parser.
+    */
+   public String getMessage() {
+      return super.getMessage();
+   }
+
+   /*
+    * Constructors of various flavors follow.
+    */
+
+   public TokenMgrError() {
+   }
+
+   public TokenMgrError(String message, int reason) {
+      super(message);
+      errorCode = reason;
+   }
+
+   public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
+      this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
+   }
+}

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj Wed May  3 19:04:01 2006
@@ -0,0 +1,366 @@
+options {
+STATIC=false;
+}
+
+PARSER_BEGIN(Rcc)
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+import org.apache.hadoop.record.compiler.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class Rcc {
+    private static String language = "java";
+    private static ArrayList recFiles = new ArrayList();
+    private static JFile curFile;
+    private static Hashtable recTab;
+    private static String curDir = System.getProperty("user.dir");
+    private static String curFileName;
+    private static String curModuleName;
+
+    public static void main(String args[]) {
+        for (int i=0; i<args.length; i++) {
+            if ("-l".equalsIgnoreCase(args[i]) ||
+                "--language".equalsIgnoreCase(args[i])) {
+                language = args[i+1].toLowerCase();
+                i++;
+            } else {
+                recFiles.add(args[i]);
+            }
+        }
+        if (!"c++".equals(language) && !"java".equals(language)) {
+            System.out.println("Cannot recognize language:" + language);
+            System.exit(1);
+        }
+        if (recFiles.size() == 0) {
+            System.out.println("No record files specified. Exiting.");
+            System.exit(1);
+        }
+        for (int i=0; i<recFiles.size(); i++) {
+            curFileName = (String) recFiles.get(i);
+            File file = new File(curDir, curFileName);
+            try {
+                FileReader reader = new FileReader(file);
+                Rcc parser = new Rcc(reader);
+                try {
+                    recTab = new Hashtable();
+                    curFile = parser.Input();
+                    System.out.println((String) recFiles.get(i) +
+                        " Parsed Successfully");
+                } catch (ParseException e) {
+                    System.out.println(e.toString());
+                    System.exit(1);
+                }
+                try {
+                    reader.close();
+                } catch (IOException e) {
+                }
+            } catch (FileNotFoundException e) {
+                System.out.println("File " + (String) recFiles.get(i) +
+                    " Not found.");
+                System.exit(1);
+            }
+            try {
+                curFile.genCode(language);
+            } catch (IOException e) {
+                System.out.println(e.toString());
+                System.exit(1);
+            }
+        }
+    }
+}
+
+PARSER_END(Rcc)
+
+SKIP :
+{
+  " "
+| "\t"
+| "\n"
+| "\r"
+}
+
+SPECIAL_TOKEN :
+{
+  "//" : WithinOneLineComment
+}
+
+<WithinOneLineComment> SPECIAL_TOKEN :
+{
+  <("//" | "\n" | "\r" | "\r\n" )> : DEFAULT
+}
+
+<WithinOneLineComment> MORE :
+{
+  <~[]>
+}
+
+SPECIAL_TOKEN :
+{
+  "/*" : WithinMultiLineComment
+}
+
+<WithinMultiLineComment> SPECIAL_TOKEN :
+{
+  "*/" : DEFAULT
+}
+
+<WithinMultiLineComment> MORE :
+{
+  <~[]>
+}
+
+TOKEN :
+{
+    <MODULE_TKN: "module">
+|   <RECORD_TKN: "class">
+|   <INCLUDE_TKN: "include">
+|   <BYTE_TKN: "byte">
+|   <BOOLEAN_TKN: "boolean">
+|   <INT_TKN: "int">
+|   <LONG_TKN: "long">
+|   <FLOAT_TKN: "float">
+|   <DOUBLE_TKN: "double">
+|   <USTRING_TKN: "ustring">
+|   <BUFFER_TKN: "buffer">
+|   <VECTOR_TKN: "vector">
+|   <MAP_TKN: "map">
+|   <LBRACE_TKN: "{">
+|   <RBRACE_TKN: "}">
+|   <LT_TKN: "<">
+|   <GT_TKN: ">">
+|   <SEMICOLON_TKN: ";">
+|   <COMMA_TKN: ",">
+|   <DOT_TKN: ".">
+|   <CSTRING_TKN: "\"" ( ~["\""] )+ "\"">
+|   <IDENT_TKN: ["A"-"Z","a"-"z"] (["a"-"z","A"-"Z","0"-"9","_"])*>
+}
+
+JFile Input() :
+{
+    ArrayList ilist = new ArrayList();
+    ArrayList rlist = new ArrayList();
+    JFile i;
+    ArrayList l;
+}
+{
+    (
+        i = Include()
+        { ilist.add(i); }
+    |   l = Module()
+        { rlist.addAll(l); }
+    )+
+    <EOF>
+    { return new JFile(curFileName, ilist, rlist); }
+}
+
+JFile Include() :
+{
+    String fname;
+    Token t;
+}
+{
+    <INCLUDE_TKN>
+    t = <CSTRING_TKN>
+    {
+        JFile ret = null;
+        fname = t.image.replaceAll("^\"", "").replaceAll("\"$","");
+        File file = new File(curDir, fname);
+        String tmpDir = curDir;
+        String tmpFile = curFileName;
+        curDir = file.getParent();
+        curFileName = file.getName();
+        try {
+            FileReader reader = new FileReader(file);
+            Rcc parser = new Rcc(reader);
+            try {
+                ret = parser.Input();
+                System.out.println(fname + " Parsed Successfully");
+            } catch (ParseException e) {
+                System.out.println(e.toString());
+                System.exit(1);
+            }
+            try {
+                reader.close();
+            } catch (IOException e) {
+            }
+        } catch (FileNotFoundException e) {
+            System.out.println("File " + fname +
+                " Not found.");
+            System.exit(1);
+        }
+        curDir = tmpDir;
+        curFileName = tmpFile;
+        return ret;
+    }
+}
+
+ArrayList Module() :
+{
+    String mName;
+    ArrayList rlist;
+}
+{
+    <MODULE_TKN>
+    mName = ModuleName()
+    { curModuleName = mName; }
+    <LBRACE_TKN>
+    rlist = RecordList()
+    <RBRACE_TKN>
+    { return rlist; }
+}
+
+String ModuleName() :
+{
+    String name = "";
+    Token t;
+}
+{
+    t = <IDENT_TKN>
+    { name += t.image; }
+    (
+        <DOT_TKN>
+        t = <IDENT_TKN>
+        { name += "." + t.image; }
+    )*
+    { return name; }
+}
+
+ArrayList RecordList() :
+{
+    ArrayList rlist = new ArrayList();
+    JRecord r;
+}
+{
+    (
+        r = Record()
+        { rlist.add(r); }
+    )+
+    { return rlist; }
+}
+
+JRecord Record() :
+{
+    String rname;
+    ArrayList flist = new ArrayList();
+    Token t;
+    JField f;
+}
+{
+    <RECORD_TKN>
+    t = <IDENT_TKN>
+    { rname = t.image; }
+    <LBRACE_TKN>
+    (
+        f = Field()
+        { flist.add(f); }
+        <SEMICOLON_TKN>
+    )+
+    <RBRACE_TKN>
+    {
+        String fqn = curModuleName + "." + rname;
+        JRecord r = new JRecord(fqn, flist);
+        recTab.put(fqn, r);
+        return r;
+    }
+}
+
+JField Field() :
+{
+    JType jt;
+    Token t;
+}
+{
+    jt = Type()
+    t = <IDENT_TKN>
+    { return new JField(jt, t.image); }
+}
+
+JType Type() :
+{
+    JType jt;
+    Token t;
+    String rname;
+}
+{
+    jt = Map()
+    { return jt; }
+|   jt = Vector()
+    { return jt; }
+|   <BYTE_TKN>
+    { return new JByte(); }
+|   <BOOLEAN_TKN>
+    { return new JBoolean(); }
+|   <INT_TKN>
+    { return new JInt(); }
+|   <LONG_TKN>
+    { return new JLong(); }
+|   <FLOAT_TKN>
+    { return new JFloat(); }
+|   <DOUBLE_TKN>
+    { return new JDouble(); }
+|   <USTRING_TKN>
+    { return new JString(); }
+|   <BUFFER_TKN>
+    { return new JBuffer(); }
+|   rname = ModuleName()
+    {
+        if (rname.indexOf('.', 0) < 0) {
+            rname = curModuleName + "." + rname;
+        }
+        JRecord r = (JRecord) recTab.get(rname);
+        if (r == null) {
+            System.out.println("Type " + rname + " not known. Exiting.");
+            System.exit(1);
+        }
+        return r;
+    }
+}
+
+JMap Map() :
+{
+    JType jt1;
+    JType jt2;
+}
+{
+    <MAP_TKN>
+    <LT_TKN>
+    jt1 = Type()
+    <COMMA_TKN>
+    jt2 = Type()
+    <GT_TKN>
+    { return new JMap(jt1, jt2); }
+}
+
+JVector Vector() :
+{
+    JType jt;
+}
+{
+    <VECTOR_TKN>
+    <LT_TKN>
+    jt = Type()
+    <GT_TKN>
+    { return new JVector(jt); }
+}

Added: lucene/hadoop/trunk/src/java/org/apache/hadoop/record/package.html
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/record/package.html?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/record/package.html (added)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/record/package.html Wed May  3 19:04:01 2006
@@ -0,0 +1,785 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+
+<html>
+  <head>
+    <title>Hadoop Record I/O</title>
+  </head>
+  <body>
+  Hadoop record I/O contains classes and a record description language
+  translator for simplifying serialization and deserialization of records in a
+  language-neutral manner.
+  
+  <h2>Introduction</h2>
+  
+  Software systems of any significant complexity require mechanisms for data 
+interchange with the outside world. These interchanges typically involve the
+marshaling and unmarshaling of logical units of data to and from data streams
+(files, network connections, memory buffers etc.). Applications usually have
+some code for serializing and deserializing the data types that they manipulate
+embedded in them. The work of serialization has several features that make
+automatic code generation for it worthwhile. Given a particular output encoding
+(binary, XML, etc.), serialization of primitive types and simple compositions
+of primitives (structs, vectors etc.) is a very mechanical task. Manually
+written serialization code can be susceptible to bugs especially when records
+have a large number of fields or a record definition changes between software
+versions. Lastly, it can be very useful for applications written in different
+programming languages to be able to share and interchange data. This can be 
+made a lot easier by describing the data records manipulated by these
+applications in a language agnostic manner and using the descriptions to derive
+implementations of serialization in multiple target languages. 
+
+This document describes Hadoop Record I/O, a mechanism that is aimed 
+at
+<ul> 
+<li> enabling the specification of simple serializable data types (records) 
+<li> enabling the generation of code in multiple target languages for
+marshaling and unmarshaling such types
+<li> providing target language specific support that will enable application 
+programmers to incorporate generated code into their applications
+</ul>
+
+The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR,
+ASN.1, PADS and ICE. While these systems all include a DDL that enables
+the specification of most record types, they differ widely in what else they
+focus on. The focus in Hadoop Record I/O is on data marshaling and
+multi-lingual support.  We take a translator-based approach to serialization.
+Hadoop users have to describe their data in a simple data description
+language. The Hadoop DDL translator rcc generates code that users
+can invoke in order to read/write their data from/to simple stream 
+abstractions. Next we list explicitly some of the goals and non-goals of
+Hadoop Record I/O.
+
+
+<h3>Goals</h3>
+
+<ul>
+<li> Support for commonly used primitive types. Hadoop should include as
+primitives commonly used builtin types from programming languages we intend to
+support.
+
+<li> Support for common data compositions (including recursive compositions).
+Hadoop should support widely used composite types such as structs and
+vectors.
+
+<li> Code generation in multiple target languages. Hadoop should be capable of
+generating serialization code in multiple target languages and should be
+easily extensible to new target languages. The initial target languages are
+C++ and Java.
+
+<li> Support for generated target languages. Hadooop should include support
+in the form of headers, libraries, packages for supported target languages 
+that enable easy inclusion and use of generated code in applications.
+
+<li> Support for multiple output encodings. Candidates include
+packed binary, comma-separated text, XML etc.
+
+<li> Support for specifying record types in a backwards/forwards compatible
+manner. This will probably be in the form of support for optional fields in
+records. This version of the document does not include a description of the
+planned mechanism, we intend to include it in the next iteration.
+
+</ul>
+
+<h3>Non-Goals</h3>
+
+<ul>
+  <li> Serializing existing arbitrary C++ classes.
+  <li> Serializing complex data structures such as trees, linked lists etc.
+  <li> Built-in indexing schemes, compression, or check-sums.
+  <li> Dynamic construction of objects from an XML schema.
+</ul>
+
+The remainder of this document describes the features of Hadoop record I/O
+in more detail. Section 2 describes the data types supported by the system.
+Section 3 lays out the DDL syntax with some examples of simple records. 
+Section 4 describes the process of code generation with rcc. Section 5
+describes target language mappings and support for Hadoop types. We include a
+fairly complete description of C++ mappings with intent to include Java and
+others in upcoming iterations of this document. The last section talks about
+supported output encodings.
+
+
+<h2>Data Types and Streams</h2>
+
+This section describes the primitive and composite types supported by Hadoop.
+We aim to support a set of types that can be used to simply and efficiently
+express a wide range of record types in different programming languages.
+
+<h3>Primitive Types</h3>
+
+For the most part, the primitive types of Hadoop map directly to primitive
+types in high level programming languages. Special cases are the
+ustring (a Unicode string) and buffer types, which we believe
+find wide use and which are usually implemented in library code and not
+available as language built-ins. Hadoop also supplies these via library code
+when a target language built-in is not present and there is no widely
+adopted "standard" implementation. The complete list of primitive types is:
+
+<ul>
+  <li> byte: An 8-bit unsigned integer.
+  <li> boolean: A boolean value.
+  <li> int: A 32-bit signed integer.
+  <li> long: A 64-bit signed integer.
+  <li> float: A single precision floating point number as described by
+    IEEE-754.
+  <li> double: A double precision floating point number as described by
+    IEEE-754.
+  <li> ustring: A string consisting of Unicode characters.
+  <li> buffer: An arbitrary sequence of bytes. 
+</ul>
+
+
+<h3>Composite Types</h3>
+Hadoop supports a small set of composite types that enable the description
+of simple aggregate types and containers. A composite type is serialized
+by sequentially serializing it constituent elements. The supported
+composite types are:
+
+<ul>
+
+  <li> record: An aggregate type like a C-struct. This is a list of
+typed fields that are together considered a single unit of data. A record
+is serialized by sequentially serializing its constituent fields. In addition
+to serialization a record has comparison operations (equality and less-than)
+implemented for it, these are defined as memberwise comparisons.
+
+  <li>vector: A sequence of entries of the same data type, primitive
+or composite.
+
+  <li> map: An associative container mapping instances of a key type to
+instances of a value type. The key and value types may themselves be primitive
+or composite types. 
+
+</ul>
+
+<h3>Streams</h3>
+
+Hadoop generates code for serializing and deserializing record types to
+abstract streams. For each target language Hadoop defines very simple input
+and output stream interfaces. Application writers can usually develop
+concrete implementations of these by putting a one method wrapper around
+an existing stream implementation.
+
+
+<h2>DDL Syntax and Examples</h2>
+
+We now describe the syntax of the Hadoop data description language. This is
+followed by a few examples of DDL usage.
+ 
+<h3>Hadoop DDL Syntax</h3>
+
+<pre><code>
+recfile = *include module *record
+include = "include" path
+path = (relative-path / absolute-path)
+module = "module" module-name
+module-name = name *("." name)
+record := "class" name "{" 1*(field) "}"
+field := type name ";"
+name :=  ALPHA (ALPHA / DIGIT / "_" )*
+type := (ptype / ctype)
+ptype := ("byte" / "boolean" / "int" |
+          "long" / "float" / "double"
+          "ustring" / "buffer")
+ctype := (("vector" "<" type ">") /
+          ("map" "<" type "," type ">" ) ) / name)
+</code></pre>
+
+A DDL file describes one or more record types. It begins with zero or
+more include declarations, a single mandatory module declaration
+followed by zero or more class declarations. The semantics of each of
+these declarations are described below:
+
+<ul>
+
+<li>include: An include declaration specifies a DDL file to be
+referenced when generating code for types in the current DDL file. Record types
+in the current compilation unit may refer to types in all included files.
+File inclusion is recursive. An include does not trigger code
+generation for the referenced file.
+
+<li> module: Every Hadoop DDL file must have a single module
+declaration that follows the list of includes and precedes all record
+declarations. A module declaration identifies a scope within which
+the names of all types in the current file are visible. Module names are
+mapped to C++ namespaces, Java packages etc. in generated code.
+
+<li> class: Records types are specified through class
+declarations. A class declaration is like a Java class declaration.
+It specifies a named record type and a list of fields that constitute records
+of the type. Usage is illustrated in the following examples.
+
+</ul>
+
+<h3>Examples</h3>
+
+<ul>
+<li>A simple DDL file links.jr with just one record declaration. 
+<pre><code>
+module links {
+    class Link {
+        ustring URL;
+        boolean isRelative;
+        ustring anchorText;
+    };
+}
+</code></pre>
+
+<li> A DDL file outlinks.jr which includes another
+<pre><code>
+include "links.jr"
+
+module outlinks {
+    class OutLinks {
+        ustring baseURL;
+        vector<links.Link> outLinks;
+    };
+}
+</code></pre>
+</ul>
+
+<h2>Code Generation</h2>
+
+The Hadoop translator is written in Java. Invocation is done by executing a 
+wrapper shell script named named rcc. It takes a list of
+record description files as a mandatory argument and an
+optional language argument (the default is Java) --language or
+-l. Thus a typical invocation would look like:
+<pre><code>
+$ rcc -l C++ <filename> ...
+</code></pre>
+
+
+<h2>Target Language Mappings and Support</h2>
+
+For all target languages, the unit of code generation is a record type. 
+For each record type, Hadoop generates code for serialization and
+deserialization, record comparison and access to record members.
+
+<h3>C++</h3>
+
+Support for including Hadoop generated C++ code in applications comes in the
+form of a header file recordio.hh which needs to be included in source
+that uses Hadoop types and a library librecordio.a which applications need
+to be linked with. The header declares the Hadoop C++ namespace which defines
+appropriate types for the various primitives, the basic interfaces for
+records and streams and enumerates the supported serialization encodings.
+Declarations of these interfaces and a description of their semantics follow:
+
+<pre><code>
+namespace hadoop {
+
+  enum RecFormat { kBinary, kXML, kCSV };
+
+  class InStream {
+  public:
+    virtual ssize_t read(void *buf, size_t n) = 0;
+  };
+
+  class OutStream {
+  public:
+    virtual ssize_t write(const void *buf, size_t n) = 0;
+  };
+
+  class IOError : public runtime_error {
+  public:
+    explicit IOError(const std::string& msg);
+  };
+
+  class IArchive;
+  class OArchive;
+
+  class RecordReader {
+  public:
+    RecordReader(InStream& in, RecFormat fmt);
+    virtual ~RecordReader(void);
+
+    virtual void read(Record& rec);
+  };
+
+  class RecordWriter {
+  public:
+    RecordWriter(OutStream& out, RecFormat fmt);
+    virtual ~RecordWriter(void);
+
+    virtual void write(Record& rec);
+  };
+
+
+  class Record {
+  public:
+    virtual std::string type(void) const = 0;
+    virtual std::string signature(void) const = 0;
+  protected:
+    virtual bool validate(void) const = 0;
+
+    virtual void
+    serialize(OArchive& oa, const std::string& tag) const = 0;
+
+    virtual void
+    deserialize(IArchive& ia, const std::string& tag) = 0;
+  };
+}
+</code></pre>
+
+<ul>
+
+<li> RecFormat: An enumeration of the serialization encodings supported
+by this implementation of Hadoop.
+
+<li> InStream: A simple abstraction for an input stream. This has a 
+single public read method that reads n bytes from the stream into
+the buffer buf. Has the same semantics as a blocking read system
+call. Returns the number of bytes read or -1 if an error occurs.
+
+<li> OutStream: A simple abstraction for an output stream. This has a 
+single write method that writes n bytes to the stream from the
+buffer buf. Has the same semantics as a blocking write system
+call. Returns the number of bytes written or -1 if an error occurs.
+
+<li> RecordReader: A RecordReader reads records one at a time from
+an underlying stream in a specified record format. The reader is instantiated
+with a stream and a serialization format. It has a read method that
+takes an instance of a record and deserializes the record from the stream.
+
+<li> RecordWriter: A RecordWriter writes records one at a
+time to an underlying stream in a specified record format. The writer is
+instantiated with a stream and a serialization format. It has a
+write method that takes an instance of a record and serializes the
+record to the stream.
+
+<li> Record: The base class for all generated record types. This has two
+public methods type and signature that return the typename and the
+type signature of the record.
+
+</ul>
+
+Two files are generated for each record file (note: not for each record). If a
+record file is named "name.jr", the generated files are 
+"name.jr.cc" and "name.jr.hh" containing serialization 
+implementations and record type declarations respectively.
+
+For each record in the DDL file, the generated header file will contain a
+class definition corresponding to the record type, method definitions for the
+generated type will be present in the '.cc' file.  The generated class will
+inherit from the abstract class hadoop::Record. The DDL files
+module declaration determines the namespace the record belongs to.
+Each '.' delimited token in the module declaration results in the
+creation of a namespace. For instance, the declaration module docs.links
+results in the creation of a docs namespace and a nested 
+docs::links namespace. In the preceding examples, the Link class
+is placed in the links namespace. The header file corresponding to
+the links.jr file will contain:
+
+<pre><code>
+namespace links {
+  class Link : public hadoop::Record {
+    // ....
+  };
+};
+</code></pre>
+
+Each field within the record will cause the generation of a private member
+declaration of the appropriate type in the class declaration, and one or more
+acccessor methods. The generated class will implement the serialize and
+deserialize methods defined in hadoop::Record+. It will also 
+implement the inspection methods type and signature from
+hadoop::Record. A default constructor and virtual destructor will also
+be generated. Serialization code will read/write records into streams that
+implement the hadoop::InStream and the hadoop::OutStream interfaces.
+
+For each member of a record an accessor method is generated that returns 
+either the member or a reference to the member. For members that are returned 
+by value, a setter method is also generated. This is true for primitive 
+data members of the types byte, int, long, boolean, float and 
+double. For example, for a int field called MyField the folowing
+code is generated.
+
+<pre><code>
+...
+private:
+  int32_t mMyField;
+  ...
+public:
+  int32_t getMyField(void) const {
+    return mMyField;
+  };
+
+  void setMyField(int32_t m) {
+    mMyField = m;
+  };
+  ...
+</code></pre>
+
+For a ustring or buffer or composite field. The generated code
+only contains accessors that return a reference to the field. A const
+and a non-const accessor are generated. For example:
+
+<pre><code>
+...
+private:
+  std::string mMyBuf;
+  ...
+public:
+
+  std::string& getMyBuf() {
+    return mMyBuf;
+  };
+
+  const std::string& getMyBuf() const {
+    return mMyBuf;
+  };
+  ...
+</code></pre>
+
+<h4>Examples</h4>
+
+Suppose the inclrec.jr file contains:
+<pre><code>
+module inclrec {
+    class RI {
+        int      I32;
+        double   D;
+        ustring  S;
+    };
+}
+</code></pre>
+
+and the testrec.jr file contains:
+
+<pre><code>
+include "inclrec.jr"
+module testrec {
+    class R {
+        vector<float> VF;
+        RI            Rec;
+        buffer        Buf;
+    };
+}
+</code></pre>
+
+Then the invocation of rcc such as:
+<pre><code>
+$ rcc -l c++ inclrec.jr testrec.jr
+</code></pre>
+will result in generation of four files:
+inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}.
+
+The inclrec.jr.hh will contain:
+
+<pre><code>
+#ifndef _INCLREC_JR_HH_
+#define _INCLREC_JR_HH_
+
+#include "recordio.hh"
+
+namespace inclrec {
+  
+  class RI : public hadoop::Record {
+
+  private:
+
+    int32_t      mI32;
+    double       mD;
+    std::string  mS;
+
+  public:
+
+    RI(void);
+    virtual ~RI(void);
+
+    virtual bool operator==(const RI& peer) const;
+    virtual bool operator<(const RI& peer) const;
+
+    virtual int32_t getI32(void) const { return mI32; }
+    virtual void setI32(int32_t v) { mI32 = v; }
+
+    virtual double getD(void) const { return mD; }
+    virtual void setD(double v) { mD = v; }
+
+    virtual std::string& getS(void) const { return mS; }
+    virtual const std::string& getS(void) const { return mS; }
+
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+
+  protected:
+
+    virtual void serialize(hadoop::OArchive& a) const;
+    virtual void deserialize(hadoop::IArchive& a);
+
+    virtual bool validate(void);
+  };
+} // end namespace inclrec
+
+#endif /* _INCLREC_JR_HH_ */
+
+</code></pre>
+
+The testrec.jr.hh file will contain:
+
+
+<pre><code>
+
+#ifndef _TESTREC_JR_HH_
+#define _TESTREC_JR_HH_
+
+#include "inclrec.jr.hh"
+
+namespace testrec {
+  class R : public hadoop::Record {
+
+  private:
+
+    std::vector<float> mVF;
+    inclrec::RI        mRec;
+    std::string        mBuf;
+
+  public:
+
+    R(void);
+    virtual ~R(void);
+
+    virtual bool operator==(const R& peer) const;
+    virtual bool operator<(const R& peer) const;
+
+    virtual std::vector<float>& getVF(void) const;
+    virtual const std::vector<float>& getVF(void) const;
+
+    virtual std::string& getBuf(void) const ;
+    virtual const std::string& getBuf(void) const;
+
+    virtual inclrec::RI& getRec(void) const;
+    virtual const inclrec::RI& getRec(void) const;
+    
+    virtual bool serialize(hadoop::OutArchive& a) const;
+    virtual bool deserialize(hadoop::InArchive& a);
+    
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+  };
+}; // end namespace testrec
+#endif /* _TESTREC_JR_HH_ */
+
+</code></pre>
+
+<h3>Java</h3>
+
+Code generation for Java is similar to that for C++. A Java class is generated
+for each record type with private members corresponding to the fields. Getters
+and setters for fields are also generated. Some differences arise in the
+way comparison is expressed and in the mapping of modules to packages and
+classes to files. For equality testing, an equals method is generated
+for each record type. As per Java requirements a hashCode method is also
+generated. For comparison a compareTo method is generated for each
+record type. This has the semantics as defined by the Java Comparable
+interface, that is, the method returns a negative integer, zero, or a positive
+integer as the invoked object is less than, equal to, or greater than the
+comparison parameter.
+
+A .java file is generated per record type as opposed to per DDL
+file as in C++. The module declaration translates to a Java
+package declaration. The module name maps to an identical Java package
+name. In addition to this mapping, the DDL compiler creates the appropriate
+directory hierarchy for the package and places the generated .java
+files in the correct directories.
+
+<h2>Mapping Summary</h2>
+
+<pre><code>
+DDL Type        C++ Type            Java Type 
+
+boolean         bool                boolean
+byte            int8_t              byte
+int             int32_t             int
+long            int64_t             long
+float           float               float
+double          double              double
+ustring         std::string         java.lang.String
+buffer          std::string         java.io.ByteArrayOutputStream
+class type      class type          class type
+vector<type>    std::vector<type>   java.util.ArrayList
+map<type,type>  std::map<type,type> java.util.TreeMap
+</code></pre>
+
+<h2>Data encodings</h2>
+
+This section describes the format of the data encodings supported by Hadoop.
+Currently, three data encodings are supported, namely binary, CSV and XML.
+
+<h3>Binary Serialization Format</h3>
+
+The binary data encoding format is fairly dense. Serialization of composite
+types is simply defined as a concatenation of serializations of the constituent
+elements (lengths are included in vectors and maps).
+
+Composite types are serialized as follows:
+<ul>
+<li> class: Sequence of serialized members.
+<li> vector: The number of elements serialized as an int. Followed by a
+sequence of serialized elements.
+<li> map: The number of key value pairs serialized as an int. Followed
+by a sequence of serialized (key,value) pairs.
+</ul>
+
+Serialization of primitives is more interesting, with a zero compression
+optimization for integral types and normalization to UTF-8 for strings. 
+Primitive types are serialized as follows:
+
+<ul>
+<li> byte: Represented by 1 byte, as is.
+<li> boolean: Represented by 1-byte (0 or 1)
+<li> int/long: Integers and longs are serialized zero compressed.
+Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a
+sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents
+the number of trailing bytes, N, as the negative number (-120-N). For example,
+the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'.
+This doesn't help much for 4-byte integers but does a reasonably good job with
+longs without bit twiddling.
+<li> float/double: Serialized in IEEE 754 single and double precision
+format in network byte order. This is the format used by Java.
+<li> ustring: Serialized as 4-byte zero compressed length followed by
+data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native
+language representation.
+<li> buffer: Serialized as a 4-byte zero compressed length followed by the
+raw bytes in the buffer.
+</ul>
+
+
+<h3>CSV Serialization Format</h3>
+
+The CSV serialization format has a lot more structure than the "standard"
+Excel CSV format, but we believe the additional structure is useful because
+
+<ul>
+<li> it makes parsing a lot easier without detracting too much from legibility
+<li> the delimiters around composites make it obvious when one is reading a
+sequence of Hadoop records
+</ul>
+
+Serialization formats for the various types are detailed in the grammar that
+follows. The notable feature of the formats is the use of delimiters for 
+indicating the certain field types.
+
+<ul>
+<li> A string field begins with a single quote (').
+<li> A buffer field begins with a sharp (#).
+<li> A class, vector or map begins with 's{', 'v{' or 'm{' respectively and
+ends with '}'.
+</ul>
+
+The CSV format can be described by the following grammar:
+
+<pre><code>
+record = primitive / struct / vector / map
+primitive = boolean / int / long / float / double / ustring / buffer
+
+boolean = "T" / "F"
+int = ["-"] 1*DIGIT
+long = ";" ["-"] 1*DIGIT
+float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+
+ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+struct = "s{" record *("," record) "}"
+vector = "v{" [record *("," record)] "}"
+map = "m{" [*(record "," record)] "}"
+</code></pre>
+
+<h3>XML Serialization Format</h3>
+
+The XML serialization format is the same used by Apache XML-RPC
+(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original
+XML-RPC format and adds some additional data types. All record I/O types are
+not directly expressible in this format, and access to a DDL is required in
+order to convert these to valid types. All types primitive or composite are
+represented by &lt;value&gt; elements. The particular XML-RPC type is
+indicated by a nested element in the &lt;value&gt; element. The encoding for
+records is always UTF-8. Primitive types are serialized as follows:
+
+<ul>
+<li> byte: XML tag &lt;ex:i1&gt;. Values: 1-byte unsigned 
+integers represented in US-ASCII
+<li> boolean: XML tag &lt;boolean&gt;. Values: "0" or "1"
+<li> int: XML tags &lt;i4&gt; or &lt;int&gt;. Values: 4-byte
+signed integers represented in US-ASCII.
+<li> long: XML tag &lt;ex:i8&gt;. Values: 8-byte signed integers
+represented in US-ASCII.
+<li> float: XML tag &lt;ex:float&gt;. Values: Single precision
+floating point numbers represented in US-ASCII.
+<li> double: XML tag &lt;double&gt;. Values: Double precision
+floating point numbers represented in US-ASCII.
+<li> ustring: XML tag &lt;;string&gt;. Values: String values
+represented as UTF-8. XML does not permit all Unicode characters in literal
+data. In particular, NULLs and control chars are not allowed. Additionally,
+XML processors are required to replace carriage returns with line feeds and to
+replace CRLF sequences with line feeds. Programming languages that we work
+with do not impose these restrictions on string types. To work around these
+restrictions, disallowed characters and CRs are percent escaped in strings.
+The '%' character is also percent escaped.
+<li> buffer: XML tag &lt;string&&gt;. Values: Arbitrary binary
+data. Represented as hexBinary, each byte is replaced by its 2-byte
+hexadecimal representation.
+</ul>
+
+Composite types are serialized as follows:
+
+<ul>
+<li> class: XML tag &lt;struct&gt;. A struct is a sequence of
+&lt;member&gt; elements. Each &lt;member&gt; element has a &lt;name&gt;
+element and a &lt;value&gt; element. The &lt;name&gt; is a string that must
+match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented
+by a &lt;value&gt; element.
+
+<li> vector: XML tag &lt;array&lt;. An &lt;array&gt; contains a
+single &lt;data&gt; element. The &lt;data&gt; element is a sequence of
+&lt;value&gt; elements each of which represents an element of the vector.
+
+<li> map: XML tag &lt;array&gt;. Same as vector.
+
+</ul>
+
+For example:
+
+<pre><code>
+class {
+  int           MY_INT;            // value 5
+  vector<float> MY_VEC;            // values 0.1, -0.89, 2.45e4
+  buffer        MY_BUF;            // value '\00\n\tabc%'
+}
+</code></pre>
+
+is serialized as
+
+<pre><code class="XML">
+&lt;value&gt;
+  &lt;struct&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_INT&lt;/name&gt;
+      &lt;value&gt;&lt;i4&gt;5&lt;/i4&gt;&lt;/value&gt;
+    &lt;/member&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_VEC&lt;/name&gt;
+      &lt;value&gt;
+        &lt;array&gt;
+          &lt;data&gt;
+            &lt;value&gt;&lt;ex:float&gt;0.1&lt;/ex:float&gt;&lt;/value&gt;
+            &lt;value&gt;&lt;ex:float&gt;-0.89&lt;/ex:float&gt;&lt;/value&gt;
+            &lt;value&gt;&lt;ex:float&gt;2.45e4&lt;/ex:float&gt;&lt;/value&gt;
+          &lt;/data&gt;
+        &lt;/array&gt;
+      &lt;/value&gt;
+    &lt;/member&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_BUF&lt;/name&gt;
+      &lt;value&gt;&lt;string&gt;%00\n\tabc%25&lt;/string&gt;&lt;/value&gt;
+    &lt;/member&gt;
+  &lt;/struct&gt;
+&lt;/value&gt; 
+</code></pre>
+
+  </body>
+</html>

Added: lucene/hadoop/trunk/src/test/ddl/buffer.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/buffer.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/buffer.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/buffer.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,6 @@
+module org.apache.hadoop.record.test {
+    class RecBuffer {
+        buffer Data;
+    }
+}
+

Added: lucene/hadoop/trunk/src/test/ddl/int.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/int.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/int.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/int.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,6 @@
+module org.apache.hadoop.record.test {
+    class RecInt {
+        int Data;
+    }
+}
+

Added: lucene/hadoop/trunk/src/test/ddl/links.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/links.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/links.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/links.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,18 @@
+include "location.jr"
+
+module org.apache.hadoop.record.Links {
+    class Link {
+        ustring URL;
+        vector<org.apache.hadoop.record.Location.LinkLocation> Locations;
+        boolean IsRelative;
+        double Weight;
+    }
+
+    class HTTP_Transaction {
+        ustring URL;
+        ustring Request;
+        map<ustring,ustring> ResponseHeader;
+        buffer Source;
+    }
+} // end module
+

Added: lucene/hadoop/trunk/src/test/ddl/location.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/location.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/location.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/location.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,8 @@
+module org.apache.hadoop.record.Location {
+    class LinkLocation {
+        int RowNum;
+        int ColNum;
+        ustring AnchorText;
+    }
+}
+

Added: lucene/hadoop/trunk/src/test/ddl/string.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/string.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/string.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/string.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,6 @@
+module org.apache.hadoop.record.test {
+    class RecString {
+        ustring Data;
+    }
+}
+

Added: lucene/hadoop/trunk/src/test/ddl/test.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/test/ddl/test.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/ddl/test.jr (added)
+++ lucene/hadoop/trunk/src/test/ddl/test.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,19 @@
+module org.apache.hadoop.record.test {
+    class RecRecord0 {
+        ustring     StringVal;
+    }
+
+    class RecRecord1 {
+        boolean         BoolVal;
+        byte            ByteVal;
+        int             IntVal;
+        long            LongVal;
+        float           FloatVal;
+        double          DoubleVal;
+        ustring         StringVal;
+        buffer          BufferVal;
+        vector<ustring> VectorVal;
+        map<ustring,ustring>   MapVal;
+    }
+}
+



Mime
View raw message