asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mb...@apache.org
Subject [4/8] asterixdb git commit: RAT Cleanup, Resulting LICENSE fixes
Date Thu, 02 Feb 2017 16:53:58 GMT
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml b/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml
index 0abbdf4..9e0535b 100644
--- a/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/pom.xml
@@ -100,5 +100,14 @@
       <artifactId>hyracks-dataflow-std</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-util</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java
new file mode 100644
index 0000000..e031a51
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenThread.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+
+/**
+ * Quick & dirty data generator for multi-thread testing.
+ */
+@SuppressWarnings("rawtypes")
+public class DataGenThread extends Thread {
+    public final BlockingQueue<TupleBatch> tupleBatchQueue;
+    private final int maxNumBatches;
+    private final int maxOutstandingBatches;
+    private int numBatches = 0;
+    private final Random rnd;
+
+    // maxOutstandingBatches pre-created tuple-batches for populating the queue.
+    private TupleBatch[] tupleBatches;
+    private int ringPos;
+
+    public DataGenThread(int numConsumers, int maxNumBatches, int batchSize, ISerializerDeserializer[]
fieldSerdes,
+            int payloadSize, int rndSeed, int maxOutstandingBatches, boolean sorted) {
+        this.maxNumBatches = maxNumBatches;
+        this.maxOutstandingBatches = maxOutstandingBatches;
+        rnd = new Random(rndSeed);
+        tupleBatches = new TupleBatch[maxOutstandingBatches];
+        IFieldValueGenerator[] fieldGens = DataGenUtils.getFieldGensFromSerdes(fieldSerdes,
rnd, sorted);
+        for (int i = 0; i < maxOutstandingBatches; i++) {
+            tupleBatches[i] = new TupleBatch(batchSize, fieldGens, fieldSerdes, payloadSize);
+        }
+        tupleBatchQueue = new LinkedBlockingQueue<TupleBatch>(maxOutstandingBatches);
+        ringPos = 0;
+    }
+
+    public DataGenThread(int numConsumers, int maxNumBatches, int batchSize, ISerializerDeserializer[]
fieldSerdes,
+            IFieldValueGenerator[] fieldGens, int rndSeed, int maxOutstandingBatches) {
+        this.maxNumBatches = maxNumBatches;
+        this.maxOutstandingBatches = maxOutstandingBatches;
+        rnd = new Random(rndSeed);
+        tupleBatches = new TupleBatch[maxOutstandingBatches];
+        for (int i = 0; i < maxOutstandingBatches; i++) {
+            tupleBatches[i] = new TupleBatch(batchSize, fieldGens, fieldSerdes, 0);
+        }
+        tupleBatchQueue = new LinkedBlockingQueue<TupleBatch>(maxOutstandingBatches);
+        ringPos = 0;
+    }
+
+    @Override
+    public void run() {
+        while (numBatches < maxNumBatches) {
+            boolean added = false;
+            try {
+                if (tupleBatches[ringPos].inUse.compareAndSet(false, true)) {
+                    tupleBatches[ringPos].generate();
+                    tupleBatchQueue.put(tupleBatches[ringPos]);
+                    added = true;
+                }
+            } catch (IOException e) {
+                e.printStackTrace();
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+            if (added) {
+                numBatches++;
+                ringPos++;
+                if (ringPos >= maxOutstandingBatches) {
+                    ringPos = 0;
+                }
+            }
+        }
+    }
+
+    public TupleBatch getBatch() throws InterruptedException {
+        return tupleBatchQueue.take();
+    }
+
+    public void releaseBatch(TupleBatch batch) {
+        batch.inUse.set(false);
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java
new file mode 100644
index 0000000..130f7e2
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DataGenUtils.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.dataflow.common.data.marshalling.DoubleSerializerDeserializer;
+import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer;
+import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+
+@SuppressWarnings("rawtypes")
+public class DataGenUtils {
+    public static IFieldValueGenerator getFieldGenFromSerde(ISerializerDeserializer serde,
Random rnd, boolean sorted) {
+        if (serde instanceof IntegerSerializerDeserializer) {
+            if (sorted) {
+                return new SortedIntegerFieldValueGenerator();
+            } else {
+                return new IntegerFieldValueGenerator(rnd);
+            }
+        } else if (serde instanceof FloatSerializerDeserializer) {
+            if (sorted) {
+                return new SortedFloatFieldValueGenerator();
+            } else {
+                return new FloatFieldValueGenerator(rnd);
+            }
+        } else if (serde instanceof DoubleSerializerDeserializer) {
+            if (sorted) {
+                return new SortedDoubleFieldValueGenerator();
+            } else {
+                return new DoubleFieldValueGenerator(rnd);
+            }
+        } else if (serde instanceof UTF8StringSerializerDeserializer) {
+            return new StringFieldValueGenerator(20, rnd);
+        }
+        return null;
+    }
+
+    public static IFieldValueGenerator[] getFieldGensFromSerdes(ISerializerDeserializer[]
serdes, Random rnd,
+            boolean sorted) {
+        IFieldValueGenerator[] fieldValueGens = new IFieldValueGenerator[serdes.length];
+        for (int i = 0; i < serdes.length; i++) {
+            fieldValueGens[i] = getFieldGenFromSerde(serdes[i], rnd, sorted);
+        }
+        return fieldValueGens;
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
new file mode 100644
index 0000000..6163b48
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hyracks.util.MathUtil;
+
+public class DocumentStringFieldValueGenerator implements IFieldValueGenerator<String>
{
+    private static final String FIRST_NAMES_FILE = "dist.all.first.cleaned";
+    private static final String LAST_NAMES_FILE = "dist.all.last.cleaned";
+
+    private final int docMinWords;
+    private final int docMaxWords;
+    private final int maxDictionarySize;
+    private final Random rnd;
+    private int[] cumulIntRanges;
+
+    private List<String> tokenDict = new ArrayList<>();
+
+    public DocumentStringFieldValueGenerator(int docMinWords, int docMaxWords, int maxDictionarySize,
Random rnd)
+            throws IOException {
+        this.docMinWords = docMinWords;
+        this.docMaxWords = docMaxWords;
+        this.maxDictionarySize = maxDictionarySize;
+        this.rnd = rnd;
+        initDictionary();
+        double[] zipfProbDist = ProbabilityHelper.getZipfProbDist(tokenDict.size(), 1);
+        cumulIntRanges = ProbabilityHelper.getCumulIntRanges(zipfProbDist);
+    }
+
+    private void initDictionary() throws IOException {
+        String line;
+        int count = 0;
+
+        // Read first names from data file.
+        InputStream firstNamesIn = this.getClass().getClassLoader().getResourceAsStream(FIRST_NAMES_FILE);
+        try (BufferedReader firstNamesReader = new BufferedReader(new InputStreamReader(firstNamesIn)))
{
+            while (count < maxDictionarySize && (line = firstNamesReader.readLine())
!= null) {
+                if (!line.startsWith(";")) {
+                    tokenDict.add(line.trim());
+                    count++;
+                }
+            }
+        }
+
+        // Read last names from data file.
+        InputStream lastNamesIn = this.getClass().getClassLoader().getResourceAsStream(LAST_NAMES_FILE);
+        try (BufferedReader lastNamesReader = new BufferedReader(new InputStreamReader(lastNamesIn)))
{
+            while (count < maxDictionarySize && (line = lastNamesReader.readLine())
!= null) {
+                if (!line.startsWith(";")) {
+                    tokenDict.add(line.trim());
+                    count++;
+                }
+            }
+        }
+    }
+
+    @Override
+    public String next() {
+        StringBuilder strBuilder = new StringBuilder();
+        int numWords = MathUtil.stripSignBit(rnd.nextInt()) % (docMaxWords - docMinWords
+ 1) + docMinWords;
+        for (int i = 0; i < numWords; i++) {
+            int ix = ProbabilityHelper.choose(cumulIntRanges, rnd.nextInt());
+            strBuilder.append(tokenDict.get(ix));
+            if (i != numWords - 1) {
+                strBuilder.append(" ");
+            }
+        }
+        return strBuilder.toString();
+    }
+
+    public List<String> getTokenDictionary() {
+        return tokenDict;
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
new file mode 100644
index 0000000..8e36335
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+public class DoubleFieldValueGenerator implements IFieldValueGenerator<Double> {
+    protected final Random rnd;
+
+    public DoubleFieldValueGenerator(Random rnd) {
+        this.rnd = rnd;
+    }
+
+    @Override
+    public Double next() {
+        return rnd.nextDouble();
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
new file mode 100644
index 0000000..851a0f4
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+public class FloatFieldValueGenerator implements IFieldValueGenerator<Float> {
+    protected final Random rnd;
+
+    public FloatFieldValueGenerator(Random rnd) {
+        this.rnd = rnd;
+    }
+
+    @Override
+    public Float next() {
+        return rnd.nextFloat();
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
new file mode 100644
index 0000000..dbd4bfc
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+public interface IFieldValueGenerator<T> {
+    public T next();
+    public void reset();
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
new file mode 100644
index 0000000..6c5f649
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+public class IntegerFieldValueGenerator implements IFieldValueGenerator<Integer> {
+    protected final Random rnd;
+
+    public IntegerFieldValueGenerator(Random rnd) {
+        this.rnd = rnd;
+    }
+
+    @Override
+    public Integer next() {
+        return rnd.nextInt();
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
new file mode 100644
index 0000000..256eaf5
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hyracks.util.MathUtil;
+
+public class PersonNameFieldValueGenerator implements IFieldValueGenerator<String>
{
+    private static final String FIRST_NAMES_FILE = "dist.all.first.cleaned";
+    private static final String LAST_NAMES_FILE = "dist.all.last.cleaned";
+    private static final String LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+    private final Random rnd;
+    private final double middleInitialProb;
+
+    private List<String> firstNames = new ArrayList<>();
+    private List<String> lastNames = new ArrayList<>();
+
+    public PersonNameFieldValueGenerator(Random rnd, double middleInitialProb)
+            throws IOException {
+        this.rnd = rnd;
+        this.middleInitialProb = middleInitialProb;
+        initNames();
+    }
+
+    private void initNames() throws IOException {
+        String line;
+
+        // Read first names from data file.
+        InputStream firstNamesIn = this.getClass().getClassLoader().getResourceAsStream(FIRST_NAMES_FILE);
+        try (BufferedReader firstNamesReader = new BufferedReader(new InputStreamReader(firstNamesIn)))
{
+            while ((line = firstNamesReader.readLine()) != null) {
+                if (!line.startsWith(";")) {
+                    firstNames.add(line.trim());
+                }
+            }
+        }
+
+        // Read last names from data file.
+        InputStream lastNamesIn = this.getClass().getClassLoader().getResourceAsStream(LAST_NAMES_FILE);
+        try (BufferedReader lastNamesReader = new BufferedReader(new InputStreamReader(lastNamesIn)))
{
+            while ((line = lastNamesReader.readLine()) != null) {
+                if (!line.startsWith(";")) {
+                    lastNames.add(line.trim());
+                }
+            }
+        }
+    }
+
+    @Override
+    public String next() {
+        StringBuilder strBuilder = new StringBuilder();
+
+        // First name.
+        int fix = MathUtil.stripSignBit(rnd.nextInt()) % firstNames.size();
+        strBuilder.append(firstNames.get(fix));
+        strBuilder.append(" ");
+
+        // Optional middle initial.
+        double d = Math.abs(rnd.nextDouble());
+        if (d <= middleInitialProb) {
+            int mix = MathUtil.stripSignBit(rnd.nextInt()) % LETTERS.length();
+            strBuilder.append(LETTERS.charAt(mix));
+            strBuilder.append(". ");
+        }
+
+        // Last name.
+        int lix = MathUtil.stripSignBit(rnd.nextInt()) % lastNames.size();
+        strBuilder.append(lastNames.get(lix));
+
+        return strBuilder.toString();
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java
new file mode 100644
index 0000000..c7fbd3f
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/ProbabilityHelper.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Arrays;
+
+public class ProbabilityHelper {
+    public static double[] getUniformProbDist(int numChoices) {
+        double[] probDist = new double[numChoices];
+        for (int i = 0; i < numChoices; i++) {
+            probDist[i] = 1.0 / (double) numChoices;
+        }
+        return probDist;
+    }
+
+    public static double[] getZipfProbDist(int numChoices, int zipfSkew) {
+        double[] probDist = new double[numChoices];
+        double divisor = 0;
+        for (int i = 1; i <= numChoices; i++) {
+            divisor += 1.0 / (double) Math.pow((double) i, (double) zipfSkew);
+        }
+        for (int i = 1; i <= numChoices; i++) {
+            probDist[i - 1] = (1.0 / (double) Math.pow((double) i, (double) zipfSkew)) /
divisor;
+        }
+        return probDist;
+    }
+
+    public static int[] getCumulIntRanges(double[] probDist) {
+        int[] opRanges = new int[probDist.length];
+        if (opRanges.length > 1) {
+            opRanges[0] = (int) Math.floor(Integer.MAX_VALUE * probDist[0]);
+            for (int i = 1; i < opRanges.length - 1; i++) {
+                opRanges[i] = opRanges[i - 1] + (int) Math.floor(Integer.MAX_VALUE * probDist[i]);
+            }
+            opRanges[opRanges.length - 1] = Integer.MAX_VALUE;
+        } else {
+            opRanges[0] = Integer.MAX_VALUE;
+        }
+        return opRanges;
+    }
+
+    public static int choose(int[] cumulIntRanges, int randomInt) {
+        int rndVal = Math.abs(randomInt);
+        int ix = Arrays.binarySearch(cumulIntRanges, rndVal);
+        if (ix < 0) {
+            ix = -ix - 1;
+        }
+        return ix;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
new file mode 100644
index 0000000..ddca6f3
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+public class SortedDoubleFieldValueGenerator implements IFieldValueGenerator<Double>
{
+    private double val;
+    private final double startVal;
+
+    public SortedDoubleFieldValueGenerator() {
+        startVal = 0.0d;
+        reset();
+    }
+
+    public SortedDoubleFieldValueGenerator(double startVal) {
+        this.startVal = startVal;
+        reset();
+    }
+
+    @Override
+    public Double next() {
+        return val++;
+    }
+
+    @Override
+    public void reset() {
+        val = startVal;
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
new file mode 100644
index 0000000..1e5dd19
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+public class SortedFloatFieldValueGenerator implements IFieldValueGenerator<Float>
{
+    private float val = 0.0f;
+    private final float startVal;
+
+    public SortedFloatFieldValueGenerator() {
+        startVal = 0.0f;
+        reset();
+    }
+
+    public SortedFloatFieldValueGenerator(float startVal) {
+        this.startVal = startVal;
+        reset();
+    }
+
+    @Override
+    public Float next() {
+        return val++;
+    }
+
+    @Override
+    public void reset() {
+        val = startVal;
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
new file mode 100644
index 0000000..3ab17ca
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+public class SortedIntegerFieldValueGenerator implements IFieldValueGenerator<Integer>
{
+    private int val = 0;
+    private final int startVal;
+
+    public SortedIntegerFieldValueGenerator() {
+        startVal = 0;
+        reset();
+    }
+
+    public SortedIntegerFieldValueGenerator(int startVal) {
+        this.startVal = startVal;
+        reset();
+    }
+
+    @Override
+    public Integer next() {
+        return val++;
+    }
+
+    @Override
+    public void reset() {
+        val = startVal;
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
new file mode 100644
index 0000000..9418447
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.util.Random;
+
+import org.apache.hyracks.util.MathUtil;
+
+public class StringFieldValueGenerator implements IFieldValueGenerator<String> {
+    private int maxLen;
+    private final Random rnd;
+
+    public StringFieldValueGenerator(int maxLen, Random rnd) {
+        this.maxLen = maxLen;
+        this.rnd = rnd;
+    }
+
+    public void setMaxLength(int maxLen) {
+        this.maxLen = maxLen;
+    }
+
+    @Override
+    public String next() {
+        String s = Long.toHexString(Double.doubleToLongBits(rnd.nextDouble()));
+        StringBuilder strBuilder = new StringBuilder();
+        for (int i = 0; i < s.length() && i < maxLen; i++) {
+            strBuilder.append(s.charAt(MathUtil.stripSignBit(rnd.nextInt()) % s.length()));
+        }
+        return strBuilder.toString();
+    }
+
+    @Override
+    public void reset() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java
new file mode 100644
index 0000000..c34c7bc
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleBatch.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+@SuppressWarnings("rawtypes")
+public class TupleBatch {
+    private final int size;
+    private final TupleGenerator[] tupleGens;
+    public final AtomicBoolean inUse = new AtomicBoolean(false);
+
+    public TupleBatch(int size, IFieldValueGenerator[] fieldGens, ISerializerDeserializer[]
fieldSerdes, int payloadSize) {
+        this.size = size;
+        tupleGens = new TupleGenerator[size];
+        for (int i = 0; i < size; i++) {
+            tupleGens[i] = new TupleGenerator(fieldGens, fieldSerdes, payloadSize);
+        }
+    }
+
+    public void generate() throws IOException {
+        for(TupleGenerator tupleGen : tupleGens) {
+            tupleGen.next();
+        }
+    }
+
+    public int size() {
+        return size;
+    }
+
+    public ITupleReference get(int ix) {
+        return tupleGens[ix].get();
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java
new file mode 100644
index 0000000..4f26065
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/datagen/TupleGenerator.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.storage.am.common.datagen;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+@SuppressWarnings({"rawtypes", "unchecked" })
+public class TupleGenerator {
+    protected final ISerializerDeserializer[] fieldSerdes;
+    protected final IFieldValueGenerator[] fieldGens;
+    protected final ArrayTupleBuilder tb;
+    protected final ArrayTupleReference tuple;
+    protected final byte[] payload;
+    protected final DataOutput tbDos;
+
+    public TupleGenerator(IFieldValueGenerator[] fieldGens, ISerializerDeserializer[] fieldSerdes,
int payloadSize) {
+        this.fieldSerdes = fieldSerdes;
+        this.fieldGens = fieldGens;
+        tuple = new ArrayTupleReference();
+        if (payloadSize > 0) {
+            tb = new ArrayTupleBuilder(fieldSerdes.length + 1);
+            payload = new byte[payloadSize];
+        } else {
+            tb = new ArrayTupleBuilder(fieldSerdes.length);
+            payload = null;
+        }
+        tbDos = tb.getDataOutput();
+    }
+
+    public ITupleReference next() throws IOException {
+        tb.reset();
+        for (int i = 0; i < fieldSerdes.length; i++) {
+            fieldSerdes[i].serialize(fieldGens[i].next(), tbDos);
+            tb.addFieldEndOffset();
+        }
+        if (payload != null) {
+            tbDos.write(payload);
+            tb.addFieldEndOffset();
+        }
+        tuple.reset(tb.getFieldEndOffsets(), tb.getByteArray());
+        return tuple;
+    }
+
+    public ITupleReference get() {
+        return tuple;
+    }
+
+    public void reset() {
+        for (IFieldValueGenerator fieldGen : fieldGens) {
+            fieldGen.reset();
+        }
+    }
+
+    public ISerializerDeserializer[] getFieldSerdes() {
+        return fieldSerdes;
+    }
+
+    public IFieldValueGenerator[] getFieldGens() {
+        return fieldGens;
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5e17af20/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java
b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java
index ab87f93..039cf7d 100644
--- a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java
+++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/test/support/TestUtils.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Executors;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hyracks.api.application.INCApplicationContext;
 import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.ActivityId;
@@ -53,4 +54,8 @@ public class TestUtils {
         devices.add(new IODeviceHandle(new File(System.getProperty("java.io.tmpdir")), "."));
         return new IOManager(devices, Executors.newCachedThreadPool());
     }
+
+    public static String joinPath(String... pathElements) {
+        return StringUtils.join(pathElements, File.separatorChar);
+    }
 }


Mime
View raw message