incubator-accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bil...@apache.org
Subject svn commit: r1237873 - in /incubator/accumulo/branches/1.4: lib/ext/ src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/aggregator/ src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/i...
Date Mon, 30 Jan 2012 18:09:05 GMT
Author: billie
Date: Mon Jan 30 18:09:04 2012
New Revision: 1237873

URL: http://svn.apache.org/viewvc?rev=1237873&view=rev
Log:
ACCUMULO-354 replaced example aggregators with combiners

Added:
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
  (with props)
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
  (with props)
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
      - copied, changed from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
      - copied, changed from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java
Removed:
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/aggregator/
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TotalAggregatingIterator.java
Modified:
    incubator/accumulo/branches/1.4/lib/ext/   (props changed)
    incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
    incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
    incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/resources/META-INF/
  (props changed)

Propchange: incubator/accumulo/branches/1.4/lib/ext/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Jan 30 18:09:04 2012
@@ -0,0 +1 @@
+*.jar

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java
Mon Jan 30 18:09:04 2012
@@ -19,6 +19,7 @@ package org.apache.accumulo.examples.wik
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.EnumSet;
 import java.util.List;
 import java.util.Set;
 import java.util.SortedSet;
@@ -29,15 +30,17 @@ import java.util.regex.Pattern;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
 import org.apache.accumulo.core.client.Connector;
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.client.IteratorSetting.Column;
 import org.apache.accumulo.core.client.TableExistsException;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.accumulo.core.client.admin.TableOperations;
 import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
-import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
-import org.apache.accumulo.core.iterators.aggregation.NumSummation;
-import org.apache.accumulo.core.iterators.aggregation.conf.AggregatorConfiguration;
+import org.apache.accumulo.core.iterators.user.SummingCombiner;
+import org.apache.accumulo.examples.wikisearch.iterator.GlobalIndexUidCombiner;
+import org.apache.accumulo.examples.wikisearch.iterator.TextIndexCombiner;
 import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -52,8 +55,6 @@ import org.apache.hadoop.mapreduce.lib.i
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
-
-@SuppressWarnings("deprecation")
 public class WikipediaIngester extends Configured implements Tool {
   
   public final static String INGEST_LANGUAGE = "wikipedia.ingest_language";
@@ -74,23 +75,21 @@ public class WikipediaIngester extends C
     
     // create the shard table
     if (!tops.exists(tableName)) {
-      // Set a text index aggregator on the given field names. No aggregator is set if the
option is not supplied
+      // Set a text index combiner on the given field names. No combiner is set if the option
is not supplied
       String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME;
       
+      tops.create(tableName);
       if (textIndexFamilies.length() > 0) {
-        System.out.println("Adding content aggregator on the fields: " + textIndexFamilies);
-        
-        // Create and set the aggregators in one shot
-        List<AggregatorConfiguration> aggregators = new ArrayList<AggregatorConfiguration>();
+        System.out.println("Adding content combiner on the fields: " + textIndexFamilies);
         
+        IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class);
+        List<Column> columns = new ArrayList<Column>();
         for (String family : StringUtils.split(textIndexFamilies, ',')) {
-          aggregators.add(new AggregatorConfiguration(new Text("fi\0" + family), org.apache.accumulo.examples.wikisearch.aggregator.TextIndexAggregator.class.getName()));
+          columns.add(new Column("fi\0" + family));
         }
+        TextIndexCombiner.setColumns(setting, columns);
         
-        tops.create(tableName);
-        tops.addAggregators(tableName, aggregators);
-      } else {
-        tops.create(tableName);
+        tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class));
       }
       
       // Set the locality group for the full content column family
@@ -100,34 +99,27 @@ public class WikipediaIngester extends C
     
     if (!tops.exists(indexTableName)) {
       tops.create(indexTableName);
-      // Add the UID aggregator
-      for (IteratorScope scope : IteratorScope.values()) {
-        String stem = String.format("%s%s.%s", Property.TABLE_ITERATOR_PREFIX, scope.name(),
"UIDAggregator");
-        tops.setProperty(indexTableName, stem, "19,org.apache.accumulo.examples.wikisearch.iterator.TotalAggregatingIterator");
-        stem += ".opt.";
-        tops.setProperty(indexTableName, stem + "*", "org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator");
-        
-      }
+      // Add the UID combiner
+      IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+      GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class));
     }
     
     if (!tops.exists(reverseIndexTableName)) {
       tops.create(reverseIndexTableName);
-      // Add the UID aggregator
-      for (IteratorScope scope : IteratorScope.values()) {
-        String stem = String.format("%s%s.%s", Property.TABLE_ITERATOR_PREFIX, scope.name(),
"UIDAggregator");
-        tops.setProperty(reverseIndexTableName, stem, "19,org.apache.accumulo.examples.wikisearch.iterator.TotalAggregatingIterator");
-        stem += ".opt.";
-        tops.setProperty(reverseIndexTableName, stem + "*", "org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator");
-        
-      }
+      // Add the UID combiner
+      IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class);
+      GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+      tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class));
     }
     
     if (!tops.exists(metadataTableName)) {
-      // Add the NumSummation aggregator for the frequency column
-      List<AggregatorConfiguration> aggregators = new ArrayList<AggregatorConfiguration>();
-      aggregators.add(new AggregatorConfiguration(new Text("f"), NumSummation.class.getName()));
+      // Add the SummingCombiner with VARLEN encoding for the frequency column
       tops.create(metadataTableName);
-      tops.addAggregators(metadataTableName, aggregators);
+      IteratorSetting setting = new IteratorSetting(10, SummingCombiner.class);
+      SummingCombiner.setColumns(setting, Collections.singletonList(new Column("f")));
+      SummingCombiner.setEncodingType(setting, SummingCombiner.Type.VARLEN);
+      tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class));
     }
   }
   
@@ -136,7 +128,7 @@ public class WikipediaIngester extends C
     Job job = new Job(getConf(), "Ingest Wikipedia");
     Configuration conf = job.getConfiguration();
     conf.set("mapred.map.tasks.speculative.execution", "false");
-
+    
     String tablename = WikipediaConfiguration.getTableName(conf);
     
     String zookeepers = WikipediaConfiguration.getZookeepers(conf);
@@ -171,8 +163,8 @@ public class WikipediaIngester extends C
     job.setMapOutputKeyClass(Text.class);
     job.setMapOutputValueClass(Mutation.class);
     job.setOutputFormatClass(AccumuloOutputFormat.class);
-    AccumuloOutputFormat.setOutputInfo(job, user, password, true, tablename);
-    AccumuloOutputFormat.setZooKeeperInstance(job, instanceName, zookeepers);
+    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), user, password, true, tablename);
+    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), instanceName, zookeepers);
     
     return job.waitForCompletion(true) ? 0 : 1;
   }

Added: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java?rev=1237873&view=auto
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
(added)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
Mon Jan 30 18:09:04 2012
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.TypedValueCombiner;
+import org.apache.accumulo.core.iterators.ValueFormatException;
+import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * 
+ */
+public class GlobalIndexUidCombiner extends TypedValueCombiner<Uid.List> {
+  public static final Encoder<Uid.List> UID_LIST_ENCODER = new UidListEncoder();
+  public static final int MAX = 20;
+  
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String>
options, IteratorEnvironment env) throws IOException {
+    super.init(source, options, env);
+    setEncoder(UID_LIST_ENCODER);
+  }
+  
+  @Override
+  public Uid.List typedReduce(Key key, Iterator<Uid.List> iter) {
+    Uid.List.Builder builder = Uid.List.newBuilder();
+    HashSet<String> uids = new HashSet<String>();
+    boolean seenIgnore = false;
+    long count = 0;
+    while (iter.hasNext()) {
+      Uid.List v = iter.next();
+      if (null == v)
+        continue;
+      count = count + v.getCOUNT();
+      if (v.getIGNORE()) {
+        seenIgnore = true;
+      }
+      uids.addAll(v.getUIDList());
+    }
+    // Special case logic
+    // If we have aggregated more than MAX UIDs, then null out the UID list and set IGNORE
to true
+    // However, always maintain the count
+    builder.setCOUNT(count);
+    if (uids.size() > MAX || seenIgnore) {
+      builder.setIGNORE(true);
+      builder.clearUID();
+    } else {
+      builder.setIGNORE(false);
+      builder.addAllUID(uids);
+    }
+    return builder.build();
+  }
+  
+  public static class UidListEncoder implements Encoder<Uid.List> {
+    @Override
+    public byte[] encode(Uid.List v) {
+      return v.toByteArray();
+    }
+    
+    @Override
+    public Uid.List decode(byte[] b) {
+      if (b.length == 0)
+        return null;
+      try {
+        return Uid.List.parseFrom(b);
+      } catch (InvalidProtocolBufferException e) {
+        throw new ValueFormatException("Value passed to aggregator was not of type Uid.List");
+      }
+    }
+  }
+}

Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidCombiner.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java?rev=1237873&view=auto
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
(added)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
Mon Jan 30 18:09:04 2012
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.accumulo.core.data.Key;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.iterators.IteratorEnvironment;
+import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
+import org.apache.accumulo.core.iterators.TypedValueCombiner;
+import org.apache.accumulo.core.iterators.ValueFormatException;
+import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * 
+ */
+public class TextIndexCombiner extends TypedValueCombiner<TermWeight.Info> {
+  public static final Encoder<TermWeight.Info> TERMWEIGHT_INFO_ENCODER = new TermWeightInfoEncoder();
+  
+  @Override
+  public TermWeight.Info typedReduce(Key key, Iterator<TermWeight.Info> iter) {
+    TermWeight.Info.Builder builder = TermWeight.Info.newBuilder();
+    List<Integer> offsets = new ArrayList<Integer>();
+    float normalizedTermFrequency = 0f;
+    
+    while (iter.hasNext()) {
+      TermWeight.Info info = iter.next();
+      if (null == info)
+        continue;
+      
+      // Add each offset into the list maintaining sorted order
+      for (int offset : info.getWordOffsetList()) {
+        int pos = Collections.binarySearch(offsets, offset);
+        
+        if (pos < 0) {
+          // Undo the transform on the insertion point
+          offsets.add((-1 * pos) - 1, offset);
+        } else {
+          offsets.add(pos, offset);
+        }
+      }
+      
+      if (info.getNormalizedTermFrequency() > 0) {
+        normalizedTermFrequency += info.getNormalizedTermFrequency();
+      }
+    }
+    
+    // Keep the sorted order we tried to maintain
+    for (int i = 0; i < offsets.size(); ++i) {
+      builder.addWordOffset(offsets.get(i));
+    }
+    
+    builder.setNormalizedTermFrequency(normalizedTermFrequency);
+    return builder.build();
+  }
+  
+  @Override
+  public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String>
options, IteratorEnvironment env) throws IOException {
+    super.init(source, options, env);
+    setEncoder(TERMWEIGHT_INFO_ENCODER);
+  }
+  
+  public static class TermWeightInfoEncoder implements Encoder<TermWeight.Info> {
+    @Override
+    public byte[] encode(TermWeight.Info v) {
+      return v.toByteArray();
+    }
+    
+    @Override
+    public TermWeight.Info decode(byte[] b) {
+      if (b.length == 0)
+        return null;
+      try {
+        return TermWeight.Info.parseFrom(b);
+      } catch (InvalidProtocolBufferException e) {
+        throw new ValueFormatException("Value passed to aggregator was not of type TermWeight.Info");
+      }
+    }
+  }
+}

Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexCombiner.java
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
(from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java)
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java?p2=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java&p1=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java&r1=1230153&r2=1237873&rev=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/GlobalIndexUidAggregatorTest.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
Mon Jan 30 18:09:04 2012
@@ -14,49 +14,58 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.accumulo.examples.wikisearch.aggregator;
+package org.apache.accumulo.examples.wikisearch.iterator;
+
+import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.UUID;
 
-import junit.framework.TestCase;
-
+import org.apache.accumulo.core.client.IteratorSetting;
+import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.aggregation.Aggregator;
-import org.apache.accumulo.examples.wikisearch.aggregator.GlobalIndexUidAggregator;
+import org.apache.accumulo.core.iterators.Combiner;
 import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
 import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
 
-
-@SuppressWarnings("deprecation")
-public class GlobalIndexUidAggregatorTest extends TestCase {
-  
-  Aggregator agg = new GlobalIndexUidAggregator();
+public class GlobalIndexUidTest {
+  private GlobalIndexUidCombiner combiner;
+  private List<Value> values;
+  
+  @Before
+  public void setup() throws Exception {
+    combiner = new GlobalIndexUidCombiner();
+    combiner.init(null, Collections.singletonMap("all", "true"), null);
+    values = new ArrayList<Value>();
+  }
   
   private Uid.List.Builder createNewUidList() {
     return Uid.List.newBuilder();
   }
   
+  @Test
   public void testSingleUid() {
-    agg.reset();
     Builder b = createNewUidList();
     b.setCOUNT(1);
     b.setIGNORE(false);
     b.addUID(UUID.randomUUID().toString());
     Uid.List uidList = b.build();
     Value val = new Value(uidList.toByteArray());
-    agg.collect(val);
-    Value result = agg.aggregate();
+    values.add(val);
+    Value result = combiner.reduce(new Key(), values.iterator());
     assertTrue(val.compareTo(result.get()) == 0);
   }
   
+  @Test
   public void testLessThanMax() throws Exception {
-    agg.reset();
     List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidAggregator.MAX - 1; i++) {
+    for (int i = 0; i < GlobalIndexUidCombiner.MAX - 1; i++) {
       Builder b = createNewUidList();
       b.setIGNORE(false);
       String uuid = UUID.randomUUID().toString();
@@ -65,21 +74,21 @@ public class GlobalIndexUidAggregatorTes
       b.addUID(uuid);
       Uid.List uidList = b.build();
       Value val = new Value(uidList.toByteArray());
-      agg.collect(val);
+      values.add(val);
     }
-    Value result = agg.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == (GlobalIndexUidAggregator.MAX - 1));
+    assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX - 1));
     List<String> resultListUUIDs = resultList.getUIDList();
     for (String s : savedUUIDs)
       assertTrue(resultListUUIDs.contains(s));
   }
   
+  @Test
   public void testEqualsMax() throws Exception {
-    agg.reset();
     List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidAggregator.MAX; i++) {
+    for (int i = 0; i < GlobalIndexUidCombiner.MAX; i++) {
       Builder b = createNewUidList();
       b.setIGNORE(false);
       String uuid = UUID.randomUUID().toString();
@@ -88,21 +97,21 @@ public class GlobalIndexUidAggregatorTes
       b.addUID(uuid);
       Uid.List uidList = b.build();
       Value val = new Value(uidList.toByteArray());
-      agg.collect(val);
+      values.add(val);
     }
-    Value result = agg.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == (GlobalIndexUidAggregator.MAX));
+    assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX));
     List<String> resultListUUIDs = resultList.getUIDList();
     for (String s : savedUUIDs)
       assertTrue(resultListUUIDs.contains(s));
   }
   
+  @Test
   public void testMoreThanMax() throws Exception {
-    agg.reset();
     List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidAggregator.MAX + 10; i++) {
+    for (int i = 0; i < GlobalIndexUidCombiner.MAX + 10; i++) {
       Builder b = createNewUidList();
       b.setIGNORE(false);
       String uuid = UUID.randomUUID().toString();
@@ -111,51 +120,56 @@ public class GlobalIndexUidAggregatorTes
       b.addUID(uuid);
       Uid.List uidList = b.build();
       Value val = new Value(uidList.toByteArray());
-      agg.collect(val);
+      values.add(val);
     }
-    Value result = agg.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == true);
     assertTrue(resultList.getUIDCount() == 0);
-    assertTrue(resultList.getCOUNT() == (GlobalIndexUidAggregator.MAX + 10));
+    assertTrue(resultList.getCOUNT() == (GlobalIndexUidCombiner.MAX + 10));
   }
   
+  @Test
   public void testSeenIgnore() throws Exception {
-    agg.reset();
     Builder b = createNewUidList();
     b.setIGNORE(true);
     b.setCOUNT(0);
     Uid.List uidList = b.build();
     Value val = new Value(uidList.toByteArray());
-    agg.collect(val);
+    values.add(val);
     b = createNewUidList();
     b.setIGNORE(false);
     b.setCOUNT(1);
     b.addUID(UUID.randomUUID().toString());
     uidList = b.build();
     val = new Value(uidList.toByteArray());
-    agg.collect(val);
-    Value result = agg.aggregate();
+    values.add(val);
+    Value result = combiner.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == true);
     assertTrue(resultList.getUIDCount() == 0);
     assertTrue(resultList.getCOUNT() == 1);
   }
   
+  @Test
   public void testInvalidValueType() throws Exception {
-    Logger.getLogger(GlobalIndexUidAggregator.class).setLevel(Level.OFF);
-    agg.reset();
+    Combiner comb = new GlobalIndexUidCombiner();
+    IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class);
+    GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
+    GlobalIndexUidCombiner.setLossyness(setting, true);
+    comb.init(null, setting.getProperties(), null);
+    Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF);
     Value val = new Value(UUID.randomUUID().toString().getBytes());
-    agg.collect(val);
-    Value result = agg.aggregate();
+    values.add(val);
+    Value result = comb.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == false);
     assertTrue(resultList.getUIDCount() == 0);
     assertTrue(resultList.getCOUNT() == 0);
   }
   
+  @Test
   public void testCount() throws Exception {
-    agg.reset();
     UUID uuid = UUID.randomUUID();
     // Collect the same UUID five times.
     for (int i = 0; i < 5; i++) {
@@ -165,9 +179,9 @@ public class GlobalIndexUidAggregatorTes
       b.addUID(uuid.toString());
       Uid.List uidList = b.build();
       Value val = new Value(uidList.toByteArray());
-      agg.collect(val);
+      values.add(val);
     }
-    Value result = agg.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     Uid.List resultList = Uid.List.parseFrom(result.get());
     assertTrue(resultList.getIGNORE() == false);
     assertTrue(resultList.getUIDCount() == 1);

Copied: incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
(from r1230153, incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java)
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java?p2=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java&p1=incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java&r1=1230153&r2=1237873&rev=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/aggregator/TextIndexAggregatorTest.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
Mon Jan 30 18:09:04 2012
@@ -14,29 +14,33 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.accumulo.examples.wikisearch.aggregator;
+package org.apache.accumulo.examples.wikisearch.iterator;
 
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import junit.framework.Assert;
 
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
+import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.examples.wikisearch.aggregator.TextIndexAggregator;
 import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
 import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
 
 import com.google.protobuf.InvalidProtocolBufferException;
 
-public class TextIndexAggregatorTest {
-  private TextIndexAggregator aggregator;
+public class TextIndexTest {
+  private TextIndexCombiner combiner;
+  private List<Value> values;
   
   @Before
   public void setup() throws Exception {
-    aggregator = new TextIndexAggregator();
+    combiner = new TextIndexCombiner();
+    combiner.init(null, Collections.singletonMap("all", "true"), null);
+    values = new ArrayList<Value>();
   }
   
   @After
@@ -50,15 +54,14 @@ public class TextIndexAggregatorTest {
   
   @Test
   public void testSingleValue() throws InvalidProtocolBufferException {
-    aggregator = new TextIndexAggregator();
     Builder builder = createBuilder();
     builder.addWordOffset(1);
     builder.addWordOffset(5);
     builder.setNormalizedTermFrequency(0.1f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
-    Value result = aggregator.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     
     TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
     
@@ -72,21 +75,20 @@ public class TextIndexAggregatorTest {
   
   @Test
   public void testAggregateTwoValues() throws InvalidProtocolBufferException {
-    aggregator = new TextIndexAggregator();
     Builder builder = createBuilder();
     builder.addWordOffset(1);
     builder.addWordOffset(5);
     builder.setNormalizedTermFrequency(0.1f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
     builder = createBuilder();
     builder.addWordOffset(3);
     builder.setNormalizedTermFrequency(0.05f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
-    Value result = aggregator.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     
     TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
     
@@ -101,30 +103,28 @@ public class TextIndexAggregatorTest {
   
   @Test
   public void testAggregateManyValues() throws InvalidProtocolBufferException {
-    aggregator = new TextIndexAggregator();
-    
     Builder builder = createBuilder();
     builder.addWordOffset(13);
     builder.addWordOffset(15);
     builder.addWordOffset(19);
     builder.setNormalizedTermFrequency(0.12f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
     builder = createBuilder();
     builder.addWordOffset(1);
     builder.addWordOffset(5);
     builder.setNormalizedTermFrequency(0.1f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
     builder = createBuilder();
     builder.addWordOffset(3);
     builder.setNormalizedTermFrequency(0.05f);
     
-    aggregator.collect(new Value(builder.build().toByteArray()));
+    values.add(new Value(builder.build().toByteArray()));
     
-    Value result = aggregator.aggregate();
+    Value result = combiner.reduce(new Key(), values.iterator());
     
     TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
     

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/query-war/src/main/webapp/ui.jsp
Mon Jan 30 18:09:04 2012
@@ -62,6 +62,8 @@
     		<p>The search syntax is boolean logic, for example: TEXT == 'boy' and TITLE =~
'Autism'. The supported operators are:
     		==, !=, &lt;, &gt;, &le;, &ge;, =~, and !~. Likewise grouping can be
performed using parentheses and predicates can be
     		joined using and, or, and not.
+    		<p>To highlight the cell-level access control of Apache Accumulo, the "authorization"
required for a particular cell is the language 
+    		of the associated wikipedia article.
     	</div>
     	<div id="d">
 	    	<form id="FORM" name="queryForm" method="get" target="results" onsubmit="return
setAction()">
@@ -73,6 +75,8 @@
 	    		</div>
 	    		<br />
 	    		<div class="center_input">
+	    		<label>Authorizations: </label>
+	    		<br />
 	    		<label>All</label><input type="checkbox" name="auths" value="all" />
 				</div>
 	    		<div class="center_input">

Modified: incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java?rev=1237873&r1=1237872&r2=1237873&view=diff
==============================================================================
--- incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
(original)
+++ incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/query/Query.java
Mon Jan 30 18:09:04 2012
@@ -40,7 +40,6 @@ import javax.xml.transform.TransformerFa
 import javax.xml.transform.stream.StreamResult;
 import javax.xml.transform.stream.StreamSource;
 
-
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.client.ZooKeeperInstance;
@@ -49,7 +48,6 @@ import org.apache.accumulo.examples.wiki
 import org.apache.accumulo.examples.wikisearch.sample.Results;
 import org.apache.log4j.Logger;
 
-
 @Stateless
 @Local(IQuery.class)
 public class Query implements IQuery {
@@ -192,12 +190,9 @@ public class Query implements IQuery {
     
     // Create list of auths
     List<String> authorizations = new ArrayList<String>();
-    if (auths == null || "".equals(auths)) {
-      authorizations.add("all");
-    } else {
+    if (auths != null && auths.length() > 0)
       for (String a : auths.split(","))
         authorizations.add(a);
-    }
     ContentLogic table = new ContentLogic();
     table.setTableName(tableName);
     return table.runQuery(connector, query, authorizations);
@@ -229,8 +224,9 @@ public class Query implements IQuery {
     
     // Create list of auths
     List<String> authorizations = new ArrayList<String>();
-    for (String a : auths.split(","))
-      authorizations.add(a);
+    if (auths != null && auths.length() > 0)
+      for (String a : auths.split(","))
+        authorizations.add(a);
     
     QueryLogic table = new QueryLogic();
     table.setTableName(tableName);

Propchange: incubator/accumulo/branches/1.4/src/examples/wikisearch/query/src/main/resources/META-INF/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Jan 30 18:09:04 2012
@@ -0,0 +1 @@
+ejb-jar.xml



Mime
View raw message