pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pradeep...@apache.org
Subject svn commit: r897283 [3/5] - in /hadoop/pig/branches/load-store-redesign: ./ contrib/piggybank/java/ contrib/zebra/ contrib/zebra/src/java/org/apache/hadoop/zebra/pig/ contrib/zebra/src/java/org/apache/hadoop/zebra/types/ contrib/zebra/src/test/e2e/merg...
Date Fri, 08 Jan 2010 18:17:12 GMT
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,40 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+--a1 = LOAD '$inputDir/10k1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+--a2 = LOAD '$inputDir/10k2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+--a3 = LOAD '$inputDir/10k3' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+--a4 = LOAD '$inputDir/10k4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+
+--sort1 = order a1 by long1 parallel 6;
+--sort2 = order a2 by long1 parallel 5;
+--sort3 = order a3 by long1 parallel 7;
+--sort4 = order a4 by long1 parallel 4;
+
+--store sort1 into '$outputDir/sortedlong110k1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+--store sort2 into '$outputDir/sortedlong110k2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+--store sort3 into '$outputDir/sortedlong110k3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+--store sort4 into '$outputDir/sortedlong110k4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+joinl = LOAD '$outputDir/sortedlong110k1,$outputDir/sortedlong110k2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+--joinll = order joinl by long1 parallel 7;
+--store joinll into '$outputDir/union10kl' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+joinr = LOAD '$outputDir/sortedlong110k3,$outputDir/sortedlong110k4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+--joinrr = order joinr by long1 parallel 4;
+--store joinrr into '$outputDir/union10kr' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+--rec1 = load '$outputDir/union10kl' using org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+--rec2 = load '$outputDir/union10kr' using org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+
+--joina = join rec1 by long1, rec2 by long1 using "merge" ; 
+
+joina = join joinl by long1, joinr by long1 using "merge" ; 
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2, $4 as long1;
+joinE = order E by long1 parallel 25;
+
+limitedVals = LIMIT joinE 10;
+dump limitedVals;
+
+store joinE into '$outputDir/join_jira' using org.apache.hadoop.zebra.pig.TableStorer('');     

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira1.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira1.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira1.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira1.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,27 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+
+--a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--a2 = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+ 
+--sort1 = order a1 by a parallel 6;
+--sort2 = order a2 by a parallel 5;
+
+--store sort1 into 'asort1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c,d]');
+--store sort2 into 'asort2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c,d]');
+--store sort1 into 'asort3' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c,d]');
+--store sort2 into 'asort4' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c,d]');
+
+joinl = LOAD 'asort1,asort2' USING org.apache.hadoop.zebra.pig.TableLoader('a,b,c,d', 'sorted');
+
+joinr = LOAD 'asort3,asort4' USING org.apache.hadoop.zebra.pig.TableLoader('a,b,c,d', 'sorted');
+
+
+joina = join joinl by a, joinr by a using "merge" ;
+dump joina;
+--E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2, $4 as long1;
+--joinE = order E by long1 parallel 25;
+
+--limitedVals = LIMIT joinE 10;
+--dump limitedVals;
+
+--store joinE into 'join_jira' using org.apache.hadoop.zebra.pig.TableStorer('');                     

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/readme
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/readme?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/readme (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/readme Fri Jan  8 18:17:07 2010
@@ -0,0 +1,10 @@
+1. run pig scritp,for example,
+java -cp /grid/0/dev/hadoopqa/jing1234/conf:/grid/0/dev/hadoopqa/jars/pig.jar:/grid/0/dev/hadoopqa/jars/tfile.jar:/grid/0/dev/hadoopqa/jars/zebra.jar org.apache.pig.Main -m config -M stress_union_02.pig
+
+2.compile ToolTestComparator.java from contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ToolTestComparator.java
+and ship the ToolTestComparator.class to cluster 
+run the tool to verifiy, for example,
+java -DwhichCluster="realCluster" -DHADOOP_HOME=$HADOOP_HOME -DUSER=$USER org.apache.hadoop.zebra.mapred.ToolTestComparator -verifyOption merge-join -pathTable1 /data/zebraStress/output/join4 -sortCol 4 -numbCols 5 -sortString byte1,int1
+
+3. For details on how to run,please refer to 
+http://twiki.corp.yahoo.com/pub/Grid/Release2TestPlan/zebra_stress_test.html

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/sortSimpleString.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/sortSimpleString.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/sortSimpleString.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/sortSimpleString.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,9 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+
+--test case : unsort table is /data/all1, sort table is /data/bcookie_sort, sort on bcookie
+
+a1 = LOAD '/data/all1' USING org.apache.hadoop.zebra.pig.TableLoader();
+                      
+a1order = order a1 by SF_bcookie;
+
+STORE a1order INTO '/data/bcookie_sort' USING org.apache.hadoop.zebra.pig.TableStorer('[SF_bcookie,SF_yuid,SF_ip];[SF_action,SF_afcookie,SF_browser,SF_bucket,SF_cbrn,SF_csc,SF_datestamp,SF_dst_spaceid,SF_dstid,SF_dstpvid,SF_error,SF_match_ts,SF_media,SF_ms,SF_os,SF_pcookie,SF_pg_load_time,SF_pg_size,SF_pg_spaceid,SF_query_term,SF_referrer,SF_server_code,SF_src_spaceid,SF_srcid,SF_srcpvid,SF_timestamp,SF_type,SF_ultspaceid,SF_ydod,MF_demog];[MF_page_params,MF_clickinfo,MLF_viewinfo]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_01.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_01.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_01.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_01.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,23 @@
+
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+--a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+--a2 = LOAD '$inputDir/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+--sort1 = order a1 by str2;
+--sort2 = order a2 by str2;
+
+--store sort1 into '$outputDir/sorted11' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+--store sort2 into '$outputDir/sorted21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+rec1 = load '$outputDir/sorted11' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sorted21' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = join rec1 by str2, rec2 by str2 using "merge" ;
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2;
+
+
+store E into '$outputDir/join1' using org.apache.hadoop.zebra.pig.TableStorer('');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_02.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_02.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_02.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_02.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,22 @@
+
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+a2 = LOAD '$inputDir/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+sort1 = order a1 by str2;
+sort2 = order a2 by str2;
+
+store sort1 into '$outputDir/100Msortedstr21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+store sort2 into '$outputDir/100Msortedstr22' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+rec1 = load '$outputDir/100Msortedstr21' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+rec2 = load '$outputDir/100Msortedstr22' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+
+joina = join rec1 by str2, rec2 by str2 using "merge" ;
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2;
+
+store E into '$outputDir/join2' using org.apache.hadoop.zebra.pig.TableStorer('');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_03.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_03.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_03.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_03.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,22 @@
+
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte2');
+a2 = LOAD '$inputDir/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte2');
+
+sort1 = order a1 by byte2;
+sort2 = order a2 by byte2;
+
+store sort1 into '$outputDir/100Msortedbyte21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte2]');
+store sort2 into '$outputDir/100Msortedbyte22' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte2]');
+
+rec1 = load '$outputDir/100Msortedbyte21' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+rec2 = load '$outputDir/100Msortedbyte22' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+
+joina = join rec1 by byte2, rec2 by byte2 using "merge" ;
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2, $4 as byte2;
+
+store E into '$outputDir/join3' using org.apache.hadoop.zebra.pig.TableStorer('');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_04.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_04.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_04.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_04.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,32 @@
+
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+--a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+--a2 = LOAD '$inputDir/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+
+--sort1 = order a1 by byte1,int1;
+--sort2 = order a2 by byte1,int1;
+
+--store sort1 into '$outputDir/sortedbyteint1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+--store sort2 into '$outputDir/sortedbyteint2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+
+rec1 = load '$outputDir/sortedbyteint1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sortedbyteint2' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = join rec1 by (byte1,int1), rec2 by (byte1,int1) using "merge" ;
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2, $4 as byte1;
+
+
+--limitedVals = LIMIT E 5;
+--dump limitedVals;
+
+--store E into '$outputDir/join4' using org.apache.hadoop.zebra.pig.TableStorer('');
+
+
+join4 = load '$outputDir/join4' using org.apache.hadoop.zebra.pig.TableLoader();
+orderjoin = order join4 by byte1,int1;
+store orderjoin into '$outputDir/join4_order' using org.apache.hadoop.zebra.pig.TableStorer('');
+

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_05.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_05.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_05.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_05.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,21 @@
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+
+a1 = LOAD '/user/hadoopqa/zebra/data/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+a2 = LOAD '/user/hadoopqa/zebra/data/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+sort1 = order a1 by str2;
+sort2 = order a2 by str2;
+
+store sort1 into '/user/hadoopqa/zebra/temp/sorted1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+store sort2 into '/user/hadoopqa/zebra/temp/sorted2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+rec1 = load '/user/hadoopqa/zebra/temp/sorted1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '/user/hadoopqa/zebra/temp/sorted2' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = join rec1 by str2, rec2 by str2 using "merge" ;
+
+limitedVals = LIMIT joina 5;
+dump limitedVals;
+
+--store joina into '/user/hadoopqa/zebra/temp/join1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2];[count,seed,int1,str2]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_00.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_00.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_00.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_00.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+fs -rmr $outputDir
+
+a1 = LOAD '$inputDir/$unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('m1');
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+store a1 into '$outputDir/store1' using org.apache.hadoop.zebra.pig.TableStorer('[m1]');    
+
+a2 = LOAD '$outputDir/store1' USING org.apache.hadoop.zebra.pig.TableLoader('m1');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a2 into '$outputDir/store2' using org.apache.hadoop.zebra.pig.TableStorer('[m1]');    
+
+                

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_01.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_01.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_01.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_01.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+fs -rmr $outputDir
+
+a1 = LOAD '$inputDir/$unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader();
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+store a1 into '$outputDir/store1' using org.apache.hadoop.zebra.pig.TableStorer('');    
+
+a2 = LOAD '$outputDir/store1' USING org.apache.hadoop.zebra.pig.TableLoader();
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a2 into '$outputDir/store2' using org.apache.hadoop.zebra.pig.TableStorer('');    
+
+                

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_02.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_02.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_02.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_02.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,27 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+
+--a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('int1,int2,str1,str2,byte1,byte2');
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+--store a1 into '$outputDir/mix1' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1]');
+
+--a2 = LOAD '$outputDir/mix1' USING org.apache.hadoop.zebra.pig.TableLoader('byte2,int2,int1,str1,str2');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+--store a2 into '$outputDir/mix1_2' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1]');      
+
+a3 = LOAD '$outputDir/mix1_2' USING org.apache.hadoop.zebra.pig.TableLoader('byte2,int2,int1,str1,str2');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a3 into '$outputDir/mix1_1' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1]');   
+
+--if only store once, and compare mix1 with mix1_1, table one has column number 6, table two has 5 (default column for table one)
+--now we should compare mix1_1 and mix1_2 . they should be idential        

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_03.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_03.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_03.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_03.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,16 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('m1');
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+store a1 into '$outputDir/store1' using org.apache.hadoop.zebra.pig.TableStorer('[m1]');
+
+a2 = LOAD '$outputDir/store1' USING org.apache.hadoop.zebra.pig.TableLoader('m1');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a2 into '$outputDir/store2' using org.apache.hadoop.zebra.pig.TableStorer('[m1]'); 

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_04.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_04.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_04.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_04.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,18 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+
+a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('c1');
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+store a1 into '$outputDir/c1' using org.apache.hadoop.zebra.pig.TableStorer('[c1]');    
+
+a2 = LOAD '$outputDir/c1' USING org.apache.hadoop.zebra.pig.TableLoader('c1');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a2 into '$outputDir/c1_2' using org.apache.hadoop.zebra.pig.TableStorer('[c1]');    
+  

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_05.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_05.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_05.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_05.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,27 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+
+a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('int1,int2,str1,str2,byte1,byte2,r1');
+--limitedVals = LIMIT a1 10;
+--dump limitedVals;
+
+store a1 into '$outputDir/r1' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1,r1]');
+
+a2 = LOAD '$outputDir/r1' USING org.apache.hadoop.zebra.pig.TableLoader('byte2,int2,int1,str1,str2,r1');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a2 into '$outputDir/r1_2' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1,r1]');  
+
+a3 = LOAD '$outputDir/r1_2' USING org.apache.hadoop.zebra.pig.TableLoader('byte2,int2,int1,str1,str2,r1');
+--limitedVals = LIMIT a2 10;
+--dump limitedVals;
+
+
+store a3 into '$outputDir/r1_1' using org.apache.hadoop.zebra.pig.TableStorer('[int1];[int2];[byte2];[str2,str1,r1]');
+
+--if only store once, and compare r1 with r1_1, table one has column number 6, table two has 5 (default column for table one)
+--now we should compare r1_1 and r1_2 . they should be identia

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,11 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/$unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1,m1,r1,c1');
+
+store a1 into '$outputDir/unsorted1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]');
+
+sort1 = ORDER a1 BY int2;
+
+store sort1 into '$outputDir/sorted111' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]'); 

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01_save.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01_save.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01_save.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01_save.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,11 @@
+
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+
+a1 = LOAD '/user/hadoopqa/zebra/data/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+store a1 into '/user/hadoopqa/zebra/temp/unsorted1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+sort1 = ORDER a1 BY str2;
+
+store sort1 into '/user/hadoopqa/zebra/temp/sorted1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]'); 

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_02.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_02.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_02.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_02.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,11 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/$unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1,m1,r1,c1');
+
+--store a1 into '$outputDir/unsortedbyte2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]');
+
+sort1 = ORDER a1 BY byte2;
+
+store sort1 into '$outputDir/sortedbyte2_1' using org.apache.hadoop.zebra.pig.TableStorer('[seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a},r1,c1]'); 

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_03.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_03.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_03.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_03.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,11 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/$unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1,m1,r1,c1');
+
+--store a1 into '$outputDir/unsortedbyte2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a}];[r1,c1]');
+
+sort1 = ORDER a1 BY byte2;
+
+store sort1 into '$outputDir/sortedbyte2_1' using org.apache.hadoop.zebra.pig.TableStorer('[seed,int1,int2,str1,str2,byte1,byte2,float1,long1,double1];[m1#{a},r1,c1]'); 

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_01.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_01.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_01.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_01.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,20 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+--a1 = LOAD '$inputDir/25Munsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2');
+--a2 = LOAD '$inputDir/25Munsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2');
+
+--sort1 = order a1 by int2;
+--sort2 = order a2 by int2;
+
+--store sort1 into '$outputDir/sortedint21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');
+--store sort2 into '$outputDir/sortedint22' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');
+
+rec1 = load '$outputDir/sortedint21' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sortedint22' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = LOAD '$outputDir/sortedint21,$outputDir/sortedint22' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2', 'sorted');
+
+joinaa = order joina by int2;
+store joinaa into '$outputDir/union1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,20 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/25Munsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+a2 = LOAD '$inputDir/25Munsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+
+sort1 = order a1 by byte1;
+sort2 = order a2 by byte1;
+
+--store sort1 into '$outputDir/sortedbyte1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+--store sort2 into '$outputDir/sortedbyte2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+
+rec1 = load '$outputDir/sortedbyte1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sortedbyte2' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = LOAD '$outputDir/sortedbyte1,$outputDir/sortedbyte2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1', 'sorted');
+    
+joinaa = order joina by byte1;
+store joinaa into '$outputDir/union2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02_2.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02_2.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02_2.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02_2.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,20 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/25Munsorted3' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+a2 = LOAD '$inputDir/25Munsorted4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1');
+
+sort1 = order a1 by byte1;
+sort2 = order a2 by byte1;
+
+store sort1 into '$outputDir/sortedbyte3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+store sort2 into '$outputDir/sortedbyte4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');
+
+rec1 = load '$outputDir/sortedbyte3' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sortedbyte4' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = LOAD '$outputDir/sortedbyte3,$outputDir/sortedbyte4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,byte1', 'sorted');
+    
+
+store joina into '$outputDir/union2_2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,byte1]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_03.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_03.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_03.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_03.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,22 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/25Munsorted3' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+a2 = LOAD '$inputDir/25Munsorted4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+sort1 = order a1 by str2;
+sort2 = order a2 by str2;
+
+--store sort1 into '$outputDir/strsorted1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+--store sort2 into '$outputDir/strsorted2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+rec1 = load '$outputDir/strsorted1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/strsorted2' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = LOAD '$outputDir/strsorted1,$outputDir/strsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2', 'sorted');
+    
+joinaa = order joina by str2;
+
+store joinaa into '$outputDir/union3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_04.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_04.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_04.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_04.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,20 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+--a1 = LOAD '$inputDir/25Munsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2');
+--a2 = LOAD '$inputDir/25Munsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2');
+
+--sort1 = order a1 by int2,byte2;
+--sort2 = order a2 by int2,byte2;
+
+--store sort1 into '$outputDir/sortedintbyte21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');
+--store sort2 into '$outputDir/sortedintbyte22' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');
+
+rec1 = load '$outputDir/sortedintbyte21' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load '$outputDir/sortedintbyte22' using org.apache.hadoop.zebra.pig.TableLoader();
+
+joina = LOAD '$outputDir/sortedintbyte21,$outputDir/sortedintbyte22' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int2,str2,byte2', 'sorted');
+
+joinaa = order joina by int2,byte2;
+store joinaa into '$outputDir/union4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int2,str2,byte2]');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/testjoing1.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/testjoing1.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/testjoing1.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/testjoing1.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,23 @@
+
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+--a1 = LOAD '$inputDir/unsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+--a2 = LOAD '$inputDir/unsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2');
+
+--sort1 = order a1 by str2;
+--sort2 = order a2 by str2;
+
+--store sort1 into '$outputDir/sorted11' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+--store sort2 into '$outputDir/sorted21' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2]');
+
+rec1 = load '$outputDir/sorted11' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+rec2 = load '$outputDir/sorted21' using org.apache.hadoop.zebra.pig.TableLoader('','sorted');
+
+joina = join rec1 by str2, rec2 by str2 using "merge" ;
+
+E = foreach joina  generate $0 as count,  $1 as seed,  $2 as int1,  $3 as str2;
+
+
+store E into '$outputDir/testjoin21' using org.apache.hadoop.zebra.pig.TableStorer('');

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union1.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union1.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union1.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union1.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/25Munsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+a2 = LOAD '$inputDir/25Munsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+
+sort1 = order a1 by long1 parallel 6;
+sort2 = order a2 by long1 parallel 5;
+
+store sort1 into '$outputDir/25MS1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+store sort2 into '$outputDir/25MS2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+
+union1 = LOAD '$outputDir/25MS1,$outputDir/25MS2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+orderunion1 = order union1 by long1 parallel 7;
+store orderunion1 into '$outputDir/u1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');    

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union2.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union2.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union2.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union2.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/25Munsorted3' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+a2 = LOAD '$inputDir/25Munsorted4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+
+sort1 = order a1 by long1 parallel 6;
+sort2 = order a2 by long1 parallel 5;
+
+store sort1 into '$outputDir/25MS3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+store sort2 into '$outputDir/25MS4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+
+union1 = LOAD '$outputDir/25MS3,$outputDir/25MS4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+orderunion1 = order union1 by long1 parallel 7;
+store orderunion1 into '$outputDir/u2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');        

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union3.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union3.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union3.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union3.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/50Munsorted1' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+a2 = LOAD '$inputDir/50Munsorted2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+
+sort1 = order a1 by long1 parallel 6;
+sort2 = order a2 by long1 parallel 5;
+
+store sort1 into '$outputDir/50MS1' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+store sort2 into '$outputDir/50MS2' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+
+union1 = LOAD '$outputDir/50MS1,$outputDir/50MS2' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+orderunion1 = order union1 by long1 parallel 7;
+store orderunion1 into '$outputDir/u3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');    

Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union4.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union4.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union4.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union4.pig Fri Jan  8 18:17:07 2010
@@ -0,0 +1,17 @@
+register $zebraJar;
+--fs -rmr $outputDir
+
+
+a1 = LOAD '$inputDir/50Munsorted3' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+a2 = LOAD '$inputDir/50Munsorted4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1');
+
+sort1 = order a1 by long1 parallel 6;
+sort2 = order a2 by long1 parallel 5;
+
+store sort1 into '$outputDir/50MS3' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+store sort2 into '$outputDir/50MS4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');
+
+
+union1 = LOAD '$outputDir/50MS3,$outputDir/50MS4' USING org.apache.hadoop.zebra.pig.TableLoader('count,seed,int1,str2,long1', 'sorted');
+orderunion1 = order union1 by long1 parallel 7;
+store orderunion1 into '$outputDir/u4' using org.apache.hadoop.zebra.pig.TableStorer('[count,seed,int1,str2,long1]');    

Modified: hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_reference.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_reference.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_reference.xml (original)
+++ hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_reference.xml Fri Jan  8 18:17:07 2010
@@ -4919,58 +4919,7 @@
 
    </section></section>
    
-   <section>
-   <title>DUMP</title>
-   <para>Displays the contents of a relation.</para>
-   
-   <section>
-   <title>Syntax</title>
-   <informaltable frame="all">
-      <tgroup cols="1"><tbody><row>
-            <entry>
-               <para>DUMP alias;        </para>
-            </entry>
-         </row></tbody></tgroup>
-   </informaltable></section>
-   
-   <section>
-   <title>Terms</title>
-   <informaltable frame="all">
-      <tgroup cols="2"><tbody><row>
-            <entry>
-               <para>alias</para>
-            </entry>
-            <entry>
-               <para>The name of a relation.</para>
-            </entry>
-         </row></tbody></tgroup>
-   </informaltable></section>
-   
-   <section>
-   <title>Usage</title>
-   <para>Use the DUMP operator to run (execute) a Pig Latin statement and to display the contents of an alias. You can use DUMP as a debugging device to make sure the results you are expecting are being generated.</para></section>
-   
-   <section>
-   <title>Example</title>
-   <para>In this example a dump is performed after each statement.</para>
-<programlisting>
-A = LOAD 'student' AS (name:chararray, age:int, gpa:float);
-
-DUMP A;
-(John,18,4.0F)
-(Mary,19,3.7F)
-(Bill,20,3.9F)
-(Joe,22,3.8F)
-(Jill,20,4.0F)
-
-B = FILTER A BY name matches 'J.+';
-
-DUMP B;
-(John,18,4.0F)
-(Joe,22,3.8F)
-(Jill,20,4.0F)
-</programlisting>
-</section></section>
+  
    
    <section>
    <title>FILTER </title>
@@ -6521,7 +6470,7 @@
    
    <section>
    <title>STORE </title>
-   <para>Stores data to the file system.</para>
+   <para>Stores or saves results to the file system.</para>
    
    <section>
    <title>Syntax</title>
@@ -6591,7 +6540,10 @@
    
    <section>
    <title>Usage</title>
-   <para>Use the STORE operator to run (execute) Pig Latin statements and to store data on the file system. </para></section>
+   <para>Use the STORE operator to run (execute) Pig Latin statements and save (persist) results to the file system. Use STORE for production scripts and batch mode processing.</para>
+   
+   <para>Note: To debug scripts during development, you can use <ulink url="piglatin_reference.html#DUMP">DUMP</ulink> to check intermediate results.</para>
+</section>
    
    <section>
    <title>Examples</title>
@@ -6962,6 +6914,68 @@
    
    </section></section>
    
+   
+ <section>
+   <title>DUMP</title>
+   <para>Dumps or displays results to screen.</para>
+   
+   <section>
+   <title>Syntax</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>DUMP alias;        </para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>alias</para>
+            </entry>
+            <entry>
+               <para>The name of a relation.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Usage</title>
+   <para>Use the DUMP operator to run (execute) Pig Latin statements and display the results to your screen. DUMP is meant for interactive mode; statements are executed immediately and the results are not saved (persisted). You can use DUMP as a debugging device to make sure that the results you are expecting are actually generated. </para>
+   
+   <para>
+   Note that production scripts <emphasis>should not</emphasis> use DUMP as it will disable multi-query optimizations and is likely to slow down execution 
+   (see <ulink url="piglatin_users.html#Store+vs.+Dump">Store vs. Dump</ulink>).
+   </para>
+   </section>
+   
+   <section>
+   <title>Example</title>
+   <para>In this example a dump is performed after each statement.</para>
+<programlisting>
+A = LOAD 'student' AS (name:chararray, age:int, gpa:float);
+
+DUMP A;
+(John,18,4.0F)
+(Mary,19,3.7F)
+(Bill,20,3.9F)
+(Joe,22,3.8F)
+(Jill,20,4.0F)
+
+B = FILTER A BY name matches 'J.+';
+
+DUMP B;
+(John,18,4.0F)
+(Joe,22,3.8F)
+(Jill,20,4.0F)
+</programlisting>
+</section></section>   
+   
+   
+   
    <section>
    <title>EXPLAIN</title>
    <para>Displays execution plans.</para>

Modified: hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_users.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_users.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_users.xml (original)
+++ hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_users.xml Fri Jan  8 18:17:07 2010
@@ -54,7 +54,7 @@
   
    <section>
    <title>Running Pig Latin </title>
-   <p>You can execute Pig Latin statements interactively or in batch mode using Pig scripts (see the EXEC and RUN operators).</p>
+   <p>You can execute Pig Latin statements interactively or in batch mode using Pig scripts (see the <a href="piglatin_reference.html#exec">exec</a> and <a href="piglatin_reference.html#run">run</a> commands).</p>
    
    <p>Grunt Shell, Interactive or Batch Mode</p>
    <source>
@@ -228,15 +228,12 @@
 <!-- MULTI-QUERY EXECUTION-->
 <section>
 <title>Multi-Query Execution</title>
-<p>With multi-query execution Pig processes an entire script or a batch of statements at once 
-(as opposed to processing statements when a DUMP or STORE is encountered). </p>
-
-
+<p>With multi-query execution Pig processes an entire script or a batch of statements at once.</p>
 
 <section>
 	<title>Turning Multi-Query Execution On or Off</title>	
 	<p>Multi-query execution is turned on by default. 
-	To turn it off and revert to Pi'gs "execute-on-dump/store" behavior, use the "-M" or "-no_multiquery" options. </p>
+	To turn it off and revert to Pig's "execute-on-dump/store" behavior, use the "-M" or "-no_multiquery" options. </p>
 	<p>To run script "myscript.pig" without the optimization, execute Pig as follows: </p>
 <source>
 $ pig -M myscript.pig
@@ -253,7 +250,8 @@
 <li>
 <p>For batch mode execution, the entire script is first parsed to determine if intermediate tasks 
 can be combined to reduce the overall amount of work that needs to be done; execution starts only after the parsing is completed 
-(see the EXPLAIN operator and the EXEC and RUN commands). </p>
+(see the <a href="piglatin_reference.html#EXPLAIN">EXPLAIN</a> operator and the <a href="piglatin_reference.html#exec">exec</a> and <a href="piglatin_reference.html#run">run</a> commands). </p>
+
 </li>
 <li>
 <p>Two run scenarios are optimized, as explained below: explicit and implicit splits, and storing intermediate results.</p>
@@ -316,7 +314,32 @@
 </section>
 </section>
 
+<section>
+	<title>Store vs. Dump</title>
+	<p>With multi-query exection, you want to use <a href="piglatin_reference.html#STORE">STORE</a> to save (persist) your results. 
+	You do not want to use <a href="piglatin_reference.html#DUMP">DUMP</a> as it will disable multi-query execution and is likely to slow down execution. (If you have included DUMP statements in your scripts for debugging purposes, you should remove them.) </p>
+	
+	<p>DUMP Example: In this script, because the DUMP command is interactive, the multi-query execution will be disabled and two separate jobs will be created to execute this script. The first job will execute A > B > DUMP while the second job will execute A > B > C > STORE.</p>
+	
+<source>
+A = LOAD ‘input’ AS (x, y, z);
+B = FILTER A BY x > 5;
+DUMP B;
+C = FOREACH B GENERATE y, z;
+STORE C INTO ‘output’;
+</source>
+	
+	<p>STORE Example: In this script, multi-query optimization will kick in allowing the entire script to be executed as a single job. Two outputs are produced: output1 and output2.</p>
+	
+<source>
+A = LOAD ‘input’ AS (x, y, z);
+B = FILTER A BY x > 5;
+STORE B INTO ‘output1’;
+C = FOREACH B GENERATE y, z;
+STORE C INTO ‘output2’;	
+</source>
 
+</section>
 <section>
 	<title>Error Handling</title>
 	<p>With multi-query execution Pig processes an entire script or a batch of statements at once. 
@@ -352,10 +375,10 @@
 	<title>Backward Compatibility</title>
 	
 	<p>Most existing Pig scripts will produce the same result with or without the multi-query execution. 
-	There are cases though were this is not true. Path names and schemes are discussed here.</p>
+	There are cases though where this is not true. Path names and schemes are discussed here.</p>
 	
 	<p>Any script is parsed in it's entirety before it is sent to execution. Since the current directory can change 
-	throughout the script any path used in load or store is translated to a fully qualified and absolute path.</p>
+	throughout the script any path used in LOAD or STORE statement is translated to a fully qualified and absolute path.</p>
 		
 	<p>In map-reduce mode, the following script will load from "hdfs://&lt;host&gt;:&lt;port&gt;/data1" and store into "hdfs://&lt;host&gt;:&lt;port&gt;/tmp/out1". </p>
 <source>
@@ -375,7 +398,7 @@
 		<li><p>Specify a custom scheme for the LoadFunc/Slicer </p></li>
 	</ol>	
 	
-	<p>Arguments used in a load statement that have a scheme other than "hdfs" or "file" will not be expanded and passed to the LoadFunc/Slicer unchanged.</p>
+	<p>Arguments used in a LOAD statement that have a scheme other than "hdfs" or "file" will not be expanded and passed to the LoadFunc/Slicer unchanged.</p>
 	<p>In the SQL case, the SQLLoader function is invoked with "sql://mytable". </p>
 
 <source>
@@ -416,7 +439,7 @@
 
 <section>
 	<title>Example</title>
-<p>In this script, the store/load operators have different file paths; however, the load operator depends on the store operator.</p>
+<p>In this script, the STORE/LOAD operators have different file paths; however, the LOAD operator depends on the STORE operator.</p>
 <source>
 A = LOAD '/user/xxx/firstinput' USING PigStorage();
 B = group ....

Modified: hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml (original)
+++ hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml Fri Jan  8 18:17:07 2010
@@ -45,14 +45,215 @@
     </section>
 <!-- END HADOOP M/R API--> 
 
+ <!-- ZEBRA API-->
+   <section>
+   <title>Zebra MapReduce APIs</title>
+    <p>Zebra includes several classes for use in MapReduce programs. The main entry point into Zebra are the two classes for reading and writing tables, namely TableInputFormat and BasicTableOutputFormat. </p>
+
+    	<section>
+         <title>BasicTableOutputFormat  </title>    	
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>void setOutputPath(JobConf, Path)  </td>
+			<td>Set the output path of the BasicTable in JobConf  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>Path[] getOutputPaths(JobConf) </td>
+			<td>Get the output paths of the BasicTable from JobConf </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>void setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) </td>
+			<td>Set the table storage information (schema, storagehint, sortinfo) in JobConf</td>
+		</tr>
+			<tr>
+			<td>yes</td>
+			<td>Schema getSchema(JobConf)  </td>
+			<td>Get the table schema in JobConf  </td>
+		</tr>
+	    <tr>
+			<td>yes</td>
+			<td>BytesWritable generateSortKey(JobConf, Tuple)  </td>
+			<td>Generates a BytesWritable key for the input key </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>String getStorageHint(JobConf)  </td>
+			<td>Get the table storage hint in JobConf  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>SortInfo getSortInfo(JobConf)  </td>
+			<td>Get the SortInfo object  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>void close(JobConf)  </td>
+			<td>Close the output BasicTable, No more rows can be added into the table  </td>
+		</tr>
+	   <tr>
+			<td>yes</td>
+			<td>void setMultipleOutputs(JobConf, String commaSeparatedLocs, Class &lt; extends ZebraOutputPartition&gt; theClass)  </td>
+			<td>Enables data to be written to multiple zebra tables based on the ZebraOutputPartition class. 
+			See <a href="zebra_mapreduce.html#Multiple+Table+Outputs">Multiple Table Outputs.</a></td>
+		</tr>
+    	</table> 
+        </section>
+        
+          	<section>
+         <title>TableInputFormat   </title>    
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>void setInputPaths(JobConf, Path... paths)  </td>
+			<td>Set the paths to the input table </td>
+
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>Path[] getInputPaths(JobConf)  </td>
+			<td>Get the comma-separated paths to the input table or table union  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>Schema getSchema(JobConf)  </td>
+			<td>Get the schema of a table expr  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>void setProjection(JobConf, ZebraProjection)  </td>
+			<td>Set the input projection in the JobConf object  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>String getProjection(JobConf) </td>
+			<td>Get the projection from the JobConf </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>SortInfo getSortInfo(JobConf)  </td>
+			<td>Get the SortInfo object regarding a Zebra table  </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>void requireSortedTable(JobConf, String sortcolumns, BytesComparator comparator) </td>
+			<td>Requires sorted table or table union </td>
+		</tr>
+	    <tr>
+			<td> yes </td>
+			<td>TableRecordReader getTableRecordReader(JobConf, ZebraProjection) </td>
+			<td>Get a TableRecordReader on a single split </td>
+		</tr>
+		<tr>
+			<td>yes</td>
+			<td>void setMinSplitSize(JobConf, long minSize) </td>
+			<td>Set the minimum split size, default of 1M bytes </td>
+		</tr>
+    	</table>
+    	</section>
+
+    <section>
+    <title>TableRecordReader </title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>no</td>
+			<td>boolean seekTo(BytesWritable key) </td>
+			<td>Seek to the position at the first row which has the key (returning true) or just after the key(returning false); only applicable for sorted Zebra table.  </td>
+		</tr>
+    	</table>
+     </section>
+     
+     
+     
+     <section>
+         <title>ZebraOutputPartition </title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td> no </td>
+			<td>public abstract int getOutputPartition(BytesWritable key, Tuple value) </td>
+			<td>Abstract method from ZebraOutputPartition abstract class. App implements this to stream data to different table  </td>
+		</tr>
+		<tr>
+			<td> no </td>
+			<td>void setConf(Configuration jobConf)  </td>
+			<td>Initialization routine giving JobConf to application. Zebra implements it  </td>
+		</tr>
+		<tr>
+			<td> no </td>
+			<td>Configuration getConf()  </td>
+			<td> returns JobConf. Zebra implements it</td>
+		</tr>
+		<tr>
+			<td>yes </td>
+			<td>Class&lt; extends ZebraOutputPartition&gt; getZebraOutputPartitionClass(JobConf conf) </td>
+			<td>return user implemented ZebraOutputPartition class  </td>
+		</tr>
+    	</table>
+   	   </section>
+   	   
+   	   
+    <section>
+    <title>ZebraProjection </title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>ZebraProjection createZebraProjection(String) </td>
+			<td>Create a ZebraProjection object from a string representing projection information. </td>
+		</tr>
+    	</table>
+     </section>
+     
+    <section>
+    <title>ZebraSchema</title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>ZebraSchema createZebraSchema(String)  </td>
+			<td>Create a ZebraStorageHint object from a string representing storage hint information.</td>
+		</tr>
+    	</table>
+     </section>     
+     
+    <section>
+    <title>ZebraStorageHint </title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>ZebraStorageHint createZebraStorageHint(String) </td>
+			<td>Create a ZebraStorageHint object from a string representing storage hint information. </td>
+		</tr>
+    	</table>
+     </section>   
+     
+    <section>
+    <title>ZebraSortInfo </title>
+    	<table>
+		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
+		<tr>
+			<td>yes</td>
+			<td>ZebraSortInfo createZebraSortInfo(String sortColumns, Class&lt; extends RawComparator &lt; Object &gt;&gt; comparatorClass) </td>
+			<td>Create a ZebraSortInfo object from a sort columns string and a comparator class. </td>
+		</tr>
+    	</table>
+     </section>   
+         </section>
+ <!-- END ZEBRA API--> 
+
+
  
  <!-- ZEBRA M/R EXAMPLES-->
 <section>
 <title>Zebra MapReduce Examples</title> 
  
- 
-
- 
  <!-- ZEBRA OUTPUT EXAMPLE-->
 <section>
 <title>Table Output Format</title>

Modified: hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_pig.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_pig.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_pig.xml (original)
+++ hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_pig.xml Fri Jan  8 18:17:07 2010
@@ -142,7 +142,7 @@
 -- Load an existing table (one column is projected)
 
 C = LOAD '$PATH/tbl4' USING org.apache.hadoop.zebra.pig.TableLoader('c2');   
-    </source>
+</source>
    </section>
    <!--end example: simple types-->
     
@@ -177,11 +177,32 @@
 -- Load an existing table (two columns are projected)
 
 B = LOAD '$PATH/tbl3' USING org.apache.hadoop.zebra.pig.TableLoader('s1, r1');    
-    </source>
+</source>
    </section>    
    <!--end example: complex types--> 
+
+
+       <!--example: HDFS Globs-->
+   <section>
+    <title>HDFS File Globs</title>
+        <p>Pig supports HDFS file globs 
+    (for more information about globs, see <a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/fs/FileSystem.html">FileSystem</a> and GlobStatus).</p>
+    <p>In this example, all Zebra tables in the directory of /path/to/PIG/tables will be loaded as a union (table union). </p>
+ <source>
+ A = LOAD ‘/path/to/PIG/tables/*’ USING org.apache.hadoop.zebra.pig.TableLoader(‘’);
+</source>
+    
+<p>In this example, three Zebra tables of t1, t2 and t3 in /path/to/PIG/tables will be loaded as a union (table union). Note that the ordering of the three tables in the union may not necessarily be t1 followed by t2 followed by t3 as you would expect if you specified ‘/path/to/PIG/tables/t1, /path/to/PIG/tables/t2, /path/to/PIG/tables/t3’. Instead, the ordering is determined by the ordering the HDFS glob expansion generates, namely, the <em>string ordering</em> of the expanded paths.
+</p>
+
+ <source>
+ A = LOAD ‘/path/to/PIG/tables/{t1, t2, t3}’ USING org.apache.hadoop.zebra.pig.TableLoader(‘’);
+</source>
+   <p></p>
+    </section>    
+   <!--end example: HDFS Globs-->
     </section>
-  <!-- END ZEBRA EXAMPLES-->    
+<!-- END ZEBRA EXAMPLES-->    
   
  </body>
  </document>

Modified: hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_reference.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_reference.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_reference.xml (original)
+++ hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_reference.xml Fri Jan  8 18:17:07 2010
@@ -144,8 +144,8 @@
  
    <!-- STORE SCHEMA SPECIFICATION -->
    <section>
-   <title>Store Schema Specification</title>
-<p>The Zebra store schema is used to write or store Zebra columns and to specify column types. 
+   <title>Store Schema</title>
+<p>Use the Zebra store schema to write or store Zebra columns and to specify column types. 
 The schema  supports data type compatibility and conversion between Zebra/Pig, Zebra/MapReduce, and Zebra/Streaming.
 (<em>In a future release, the schema will also support type compatibility between Zebra/Pig-SQL and will guide the underlying serialization formats provided by Avro for projection, filtering, and so on. </em>)
 </p>   
@@ -361,24 +361,23 @@
    <section>
    <title>Storage Specification</title>
    
- <p> The Zebra storage specification is used to specify column groups and the columns in those groups. 
-</p>
- 
- <p>
- The storage specification describes the physical structure of a Zebra table where $PATH/tbl indicates the table_directory and column_group_name
- indicates the subdirectories within the table_directory. The STORE statement below indicates the following: 
+ <p> Use the Zebra storage specification to define Zebra column groups. The storage specification, when combined with a STORE statement, describes the physical structure of a Zebra table. Suppose we have the following statement:
  </p>
- <ul>
-		<li>$PATH/mytable - directory</li>
-		<li>$PATH/mytable/A - subdirectory </li>
-		<li>$PATH/mytable/A/part00001 </li>
-		<li><em>etc ...</em></li>
- </ul>
-
+ 
 <source>
-STORE A INTO '$PATH/mytable' USING org.apache.hadoop.zebra.pig.TableStorer('[a1, a2] AS A; [a3, a4, a5] AS B');
+STORE A INTO '$PATH/mytable' USING org.apache.hadoop.zebra.pig.TableStorer('[a1, a2] AS cg1; [a3, a4, a5] AS cg2');
 </source>
 
+<p>The statement describes a table that has two column groups; the first column group has two columns, the second column group has three columns. The statement can be interpreted as follows:</p>
+
+ <ul>
+		<li>$PATH/mytable - the table, a file path to a directory named mytable</li>
+		<li>$PATH/mytable/cg1 - the first column group, a subdirectory named cg1 under directory mytable </li>
+		<li>$PATH/mytable/cg1/part00001 - a file consisting, conceptually, of columns a1 and a2</li>
+	    <li>$PATH/mytable/cg2 - the second column group, a subdirectory named cg2 under directory mytable </li>
+		<li>$PATH/mytable/cg2/part00001 - a file consisting, conceptually, of columns a3, a4, and a5</li>
+ </ul>
+
   <section>
    <title>Specification</title>
     <p>  The basic format for the Zebra storage specification is shown here. 
@@ -448,9 +447,9 @@
    
    <!-- LOAD SCHEMA SPECIFICATION -->
    <section>
-   <title>Load Schema Specification</title>
+   <title>Load Schema</title>
    
-   <p>The Zebra load schema is load or read table columns. </p>
+   <p>Use the Zebra load schema to load or read table columns.</p>
      <section>
    <title>Schema</title>
    <p>The basic format for the Zebra load (read) schema is shown here. The column name can be any valid Zebra type.  
@@ -490,209 +489,6 @@
    </section>
    </section>
    <!-- END LOAD SCHEMA SPECIFICATION -->   
-
-   <!-- ZEBRA API-->
-   <section>
-   <title>Zebra MapReduce Interfaces</title>
-    <p>Zebra includes several classes for use in MapReduce programs. The main entry point into Zebra are the two classes for reading and writing tables, namely TableInputFormat and BasicTableOutputFormat. </p>
-
-    	<section>
-         <title>BasicTableOutputFormat  </title>    	
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>void setOutputPath(JobConf, Path)  </td>
-			<td>Set the output path of the BasicTable in JobConf  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>Path[] getOutputPaths(JobConf) </td>
-			<td>Get the output paths of the BasicTable from JobConf </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>void setStorageInfo(JobConf, ZebraSchema, ZebraStorageHint, ZebraSortInfo) </td>
-			<td>Set the table storage information (schema, storagehint, sortinfo) in JobConf</td>
-		</tr>
-			<tr>
-			<td>yes</td>
-			<td>Schema getSchema(JobConf)  </td>
-			<td>Get the table schema in JobConf  </td>
-		</tr>
-	    <tr>
-			<td>yes</td>
-			<td>BytesWritable generateSortKey(JobConf, Tuple)  </td>
-			<td>Generates a BytesWritable key for the input key </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>String getStorageHint(JobConf)  </td>
-			<td>Get the table storage hint in JobConf  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>SortInfo getSortInfo(JobConf)  </td>
-			<td>Get the SortInfo object  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>void close(JobConf)  </td>
-			<td>Close the output BasicTable, No more rows can be added into the table  </td>
-		</tr>
-	   <tr>
-			<td>yes</td>
-			<td>void setMultipleOutputs(JobConf, String commaSeparatedLocs, Class &lt; extends ZebraOutputPartition&gt; theClass)  </td>
-			<td>Enables data to be written to multiple zebra tables based on the ZebraOutputPartition class. 
-			See <a href="zebra_mapreduce.html#Multiple+Table+Outputs">Multiple Table Outputs.</a></td>
-		</tr>
-    	</table> 
-        </section>
-        
-          	<section>
-         <title>TableInputFormat   </title>    
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>void setInputPaths(JobConf, Path... paths)  </td>
-			<td>Set the paths to the input table </td>
-
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>Path[] getInputPaths(JobConf)  </td>
-			<td>Get the comma-separated paths to the input table or table union  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>Schema getSchema(JobConf)  </td>
-			<td>Get the schema of a table expr  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>void setProjection(JobConf, ZebraProjection)  </td>
-			<td>Set the input projection in the JobConf object  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>String getProjection(JobConf) </td>
-			<td>Get the projection from the JobConf </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>SortInfo getSortInfo(JobConf)  </td>
-			<td>Get the SortInfo object regarding a Zebra table  </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>void requireSortedTable(JobConf, String sortcolumns, BytesComparator comparator) </td>
-			<td>Requires sorted table or table union </td>
-		</tr>
-	    <tr>
-			<td> yes </td>
-			<td>TableRecordReader getTableRecordReader(JobConf, ZebraProjection) </td>
-			<td>Get a TableRecordReader on a single split </td>
-		</tr>
-		<tr>
-			<td>yes</td>
-			<td>void setMinSplitSize(JobConf, long minSize) </td>
-			<td>Set the minimum split size, default of 1M bytes </td>
-		</tr>
-    	</table>
-    	</section>
-
-    <section>
-    <title>TableRecordReader </title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>no</td>
-			<td>boolean seekTo(BytesWritable key) </td>
-			<td>Seek to the position at the first row which has the key (returning true) or just after the key(returning false); only applicable for sorted Zebra table.  </td>
-		</tr>
-    	</table>
-     </section>
-     
-     
-     
-     <section>
-         <title>ZebraOutputPartition </title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td> no </td>
-			<td>public abstract int getOutputPartition(BytesWritable key, Tuple value) </td>
-			<td>Abstract method from ZebraOutputPartition abstract class. App implements this to stream data to different table  </td>
-		</tr>
-		<tr>
-			<td> no </td>
-			<td>void setConf(Configuration jobConf)  </td>
-			<td>Initialization routine giving JobConf to application. Zebra implements it  </td>
-		</tr>
-		<tr>
-			<td> no </td>
-			<td>Configuration getConf()  </td>
-			<td> returns JobConf. Zebra implements it</td>
-		</tr>
-		<tr>
-			<td>yes </td>
-			<td>Class&lt; extends ZebraOutputPartition&gt; getZebraOutputPartitionClass(JobConf conf) </td>
-			<td>return user implemented ZebraOutputPartition class  </td>
-		</tr>
-    	</table>
-   	   </section>
-   	   
-   	   
-    <section>
-    <title>ZebraProjection </title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>ZebraProjection createZebraProjection(String) </td>
-			<td>Create a ZebraProjection object from a string representing projection information. </td>
-		</tr>
-    	</table>
-     </section>
-     
-    <section>
-    <title>ZebraSchema</title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>ZebraSchema createZebraSchema(String)  </td>
-			<td>Create a ZebraStorageHint object from a string representing storage hint information.</td>
-		</tr>
-    	</table>
-     </section>     
-     
-    <section>
-    <title>ZebraStorageHint </title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>ZebraStorageHint createZebraStorageHint(String) </td>
-			<td>Create a ZebraStorageHint object from a string representing storage hint information. </td>
-		</tr>
-    	</table>
-     </section>   
-     
-    <section>
-    <title>ZebraSortInfo </title>
-    	<table>
-		<tr><th>Static</th><th>Method</th><th>Description</th></tr>
-		<tr>
-			<td>yes</td>
-			<td>ZebraSortInfo createZebraSortInfo(String sortColumns, Class&lt; extends RawComparator &lt; Object &gt;&gt; comparatorClass) </td>
-			<td>Create a ZebraSortInfo object from a sort columns string and a comparator class. </td>
-		</tr>
-    	</table>
-     </section>   
-         </section>
- <!-- END ZEBRA API--> 
     
  </body>
  </document>

Added: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/PigCounters.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/PigCounters.java?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/PigCounters.java (added)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/PigCounters.java Fri Jan  8 18:17:07 2010
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig;
+
+/**
+ * The following enum will contain the general counters that pig uses.
+ * 
+ */
+public enum PigCounters {
+    SPILLABLE_MEMORY_MANAGER_SPILL_COUNT,
+    PROACTIVE_SPILL_COUNT;
+}
\ No newline at end of file

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Fri Jan  8 18:17:07 2010
@@ -585,17 +585,27 @@
                     conf.set("pig.quantilesFile", mro.getQuantFile());
                     nwJob.setPartitionerClass(WeightedRangePartitioner.class);
                 }
-                if(mro.UDFs.size()==1){
-                    String compFuncSpec = mro.UDFs.get(0);
-                    Class comparator = PigContext.resolveClassName(compFuncSpec);
-                    if(ComparisonFunc.class.isAssignableFrom(comparator)) {
-                        nwJob.setMapperClass(PigMapReduce.MapWithComparator.class);
-                        nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class);
-                        conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
-                        conf.set("pig.usercomparator", "true");
-                        nwJob.setOutputKeyClass(NullableTuple.class);                          
-                        nwJob.setSortComparatorClass(comparator);
+                
+                if (mro.isUDFComparatorUsed) {  
+                    boolean usercomparator = false;
+                    for (String compFuncSpec : mro.UDFs) {
+                        Class comparator = PigContext.resolveClassName(compFuncSpec);
+                        if(ComparisonFunc.class.isAssignableFrom(comparator)) {
+                            nwJob.setMapperClass(PigMapReduce.MapWithComparator.class);
+                            nwJob.setReducerClass(PigMapReduce.ReduceWithComparator.class);
+                            conf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
+                            conf.set("pig.usercomparator", "true");
+                            nwJob.setOutputKeyClass(NullableTuple.class);
+                            nwJob.setSortComparatorClass(comparator);
+                            usercomparator = true;
+                            break;
+                        }
                     }
+                    if (!usercomparator) {
+                        String msg = "Internal error. Can't find the UDF comparator";
+                        throw new IOException (msg);
+                    }
+                    
                 } else {
                     conf.set("pig.sortOrder",
                         ObjectSerializer.serialize(mro.getSortOrder()));

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Fri Jan  8 18:17:07 2010
@@ -1576,6 +1576,7 @@
             
             if(op.isUDFComparatorUsed){
                 curMROp.UDFs.add(op.getMSortFunc().getFuncSpec().toString());
+                curMROp.isUDFComparatorUsed = true;
             }
             phyToMROpMap.put(op, curMROp);
         }catch(Exception e){
@@ -1903,6 +1904,7 @@
        
         if(sort.isUDFComparatorUsed) {
             mro.UDFs.add(sort.getMSortFunc().getFuncSpec().toString());
+            curMROp.isUDFComparatorUsed = true;
         }        
     
         List<Boolean> flat1 = new ArrayList<Boolean>();         
@@ -2426,6 +2428,7 @@
                     throw new MRCompilerException(msg, errCode, PigException.BUG);
                 }
                 FileSpec oldSpec = ((POStore)mpLeaf).getSFile();
+                boolean oldIsTmpStore = ((POStore)mpLeaf).isTmpStore();
                 
                 FileSpec fSpec = getTempFileSpec();
                 ((POStore)mpLeaf).setSFile(fSpec);
@@ -2447,9 +2450,10 @@
                 limitAdjustMROp.reducePlan.addAsLeaf(pLimit2);
                 POStore st = getStore();
                 st.setSFile(oldSpec);
-                st.setIsTmpStore(false);
+                st.setIsTmpStore(oldIsTmpStore);
                 limitAdjustMROp.reducePlan.addAsLeaf(st);
                 limitAdjustMROp.requestedParallelism = 1;
+                limitAdjustMROp.setLimitOnly(true);
                 // If the operator we're following has global sort set, we
                 // need to indicate that this is a limit after a sort.
                 // This will assure that we get the right sort comparator

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java Fri Jan  8 18:17:07 2010
@@ -83,6 +83,10 @@
 
     // Indicates if this is a limit after a sort
     boolean limitAfterSort = false;
+    
+    // Indicate if the entire purpose for this map reduce job is doing limit, does not change
+    // anything else. This is to help POPackageAnnotator to find the right POPackage to annotate
+    boolean limitOnly = false;
 
     // If true, putting an identity combine in this
     // mapreduce job will speed things up.
@@ -103,6 +107,9 @@
 
     public List<String> UDFs;
     
+    // Indicates if a UDF comparator is used
+    boolean isUDFComparatorUsed = false;
+    
     transient NodeIdGenerator nig;
 
     private String scope;
@@ -129,7 +136,7 @@
     // Name of the partition file generated by sampling process,
     // Used by Skewed Join
 	private String skewedJoinPartitionFile;
-
+	
     public MapReduceOper(OperatorKey k) {
         super(k);
         mapPlan = new PhysicalPlan();
@@ -284,6 +291,14 @@
     public void setLimitAfterSort(boolean las) {
         limitAfterSort = las;
     }
+    
+    public boolean isLimitOnly() {
+        return limitOnly;
+    }
+    
+    public void setLimitOnly(boolean limitOnly) {
+        this.limitOnly = limitOnly;
+    }
 
     public boolean needsDistinctCombiner() { 
         return needsDistinctCombiner;

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PhyPlanSetter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PhyPlanSetter.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PhyPlanSetter.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PhyPlanSetter.java Fri Jan  8 18:17:07 2010
@@ -21,7 +21,7 @@
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.*;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.*;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
-import org.apache.pig.backend.local.executionengine.physicalLayer.relationalOperators.*;
+import org.apache.pig.pen.physicalOperators.*;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
 import org.apache.pig.impl.plan.DependencyOrderWalker;

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SampleOptimizer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SampleOptimizer.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SampleOptimizer.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SampleOptimizer.java Fri Jan  8 18:17:07 2010
@@ -174,7 +174,7 @@
         rslargs[1] = load.getLFile().getFuncSpec().getCtorArgs()[1];
         FileSpec fs = new FileSpec(predFs.getFileName(),new FuncSpec(loadFunc, rslargs));
         POLoad newLoad = new POLoad(load.getOperatorKey(),load.getRequestedParallelism(), fs);
-        newLoad.setSignature(load.getSignature());
+        newLoad.setSignature(predLoad.getSignature());
         try {
             mr.mapPlan.replace(load, newLoad);
             
@@ -190,7 +190,7 @@
         // Second, replace the loader in our successor with whatever the originally used loader was.
         fs = new FileSpec(predFs.getFileName(), predFs.getFuncSpec());
         newLoad = new POLoad(succLoad.getOperatorKey(), succLoad.getRequestedParallelism(), fs);
-        newLoad.setSignature(succLoad.getSignature());
+        newLoad.setSignature(predLoad.getSignature());
         try {
             succ.mapPlan.replace(succLoad, newLoad);
         } catch (PlanException e) {

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/POPackageAnnotator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/POPackageAnnotator.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/POPackageAnnotator.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/POPackageAnnotator.java Fri Jan  8 18:17:07 2010
@@ -105,6 +105,8 @@
             List<MapReduceOper> preds = this.mPlan.getPredecessors(mr);
             for (Iterator<MapReduceOper> it = preds.iterator(); it.hasNext();) {
                 MapReduceOper mrOper = it.next();
+                if (mrOper.isLimitOnly())
+                    mrOper = this.mPlan.getPredecessors(mrOper).get(0);
                 lrFound += patchPackage(mrOper.reducePlan, pkg);
                 if(lrFound == pkg.getNumInps()) {
                     break;

Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java (original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java Fri Jan  8 18:17:07 2010
@@ -23,7 +23,7 @@
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.*;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.*;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSplit;
-import org.apache.pig.backend.local.executionengine.physicalLayer.relationalOperators.*;
+import org.apache.pig.pen.physicalOperators.*;
 import org.apache.pig.impl.plan.PlanVisitor;
 import org.apache.pig.impl.plan.PlanWalker;
 import org.apache.pig.impl.plan.VisitorException;
@@ -270,7 +270,7 @@
 	
     }
 
-    public void visitSplit(org.apache.pig.backend.local.executionengine.physicalLayer.relationalOperators.POSplit split) {
+    public void visitSplit(org.apache.pig.pen.physicalOperators.POSplit split) {
 	// TODO Auto-generated method stub
 	
     }



Mime
View raw message