bigtop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rnowl...@apache.org
Subject [3/5] bigtop git commit: BIGTOP-1985. Extract name generator from BigPetStore data generator
Date Tue, 25 Aug 2015 14:34:22 GMT
http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
b/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
deleted file mode 100644
index 1f59f9e..0000000
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/resources/input_data/namedb/namedb.info
+++ /dev/null
@@ -1,13 +0,0 @@
-name = Name DB
-description = Defines a database for maintain a list of names.
-package = Fields
-version = VERSION
-core = 7.x
-dependencies[] = name
-
-; Information added by drupal.org packaging script on 2011-06-08
-version = "7.x-1.0-beta2"
-core = "7.x"
-project = "namedb"
-datestamp = "1307496118"
-

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
index 8bb3c87..a176333 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSampler.java
@@ -20,7 +20,6 @@ import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
@@ -28,8 +27,6 @@ import org.apache.bigtop.datagenerators.bigpetstore.Constants;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerLocationPDF;
-import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSampler;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction;
 import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler;
@@ -39,6 +36,7 @@ import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.Test;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
 public class TestCustomerSampler
@@ -69,7 +67,12 @@ public class TestCustomerSampler
 	{
 		SeedFactory factory = new SeedFactory(1234);
 
-		Collection<String> nameList = Arrays.asList(new String[] {"Fred", "Gary", "George",
"Fiona"});
+		List<Pair<String, String>> nameList = Lists.newArrayList();
+		nameList.add(Pair.of("Fred", "Fred"));
+		nameList.add(Pair.of("Gary", "Gary"));
+		nameList.add(Pair.of("George", "George"));
+		nameList.add(Pair.of("Fiona", "Fiona"));
+
 		List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] {
 				new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100),
 				new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200),
@@ -85,21 +88,18 @@ public class TestCustomerSampler
 
 
 		Sampler<Integer> idSampler = new SequenceSampler();
-		Sampler<String> nameSampler = RouletteWheelSampler.createUniform(nameList, factory);
+		Sampler<Pair<String, String>> nameSampler = RouletteWheelSampler.createUniform(nameList,
factory);
 		Sampler<Store> storeSampler = RouletteWheelSampler.createUniform(stores, factory);
 		ConditionalSampler<ZipcodeRecord, Store> zipcodeSampler = buildLocationSampler(stores,
zipcodes, factory);
 
-		Sampler<Customer> sampler = new CustomerSampler(idSampler, nameSampler, nameSampler,
storeSampler, zipcodeSampler);
+		Sampler<Customer> sampler = new CustomerSampler(idSampler, nameSampler, storeSampler,
zipcodeSampler);
 
 		Customer customer = sampler.sample();
 
 		assertNotNull(customer);
 		assertTrue(customer.getId() >= 0);
 		assertNotNull(customer.getName());
-		assertNotNull(customer.getName().getLeft());
-		assertTrue(nameList.contains(customer.getName().getLeft()));
-		assertNotNull(customer.getName().getRight());
-		assertTrue(nameList.contains(customer.getName().getRight()));
+		assertTrue(nameList.contains(customer.getName()));
 		assertNotNull(customer.getLocation());
 		assertTrue(zipcodes.contains(customer.getLocation()));
 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
index 786c7fc..6ddaa94 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/test/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/TestCustomerSamplerBuilder.java
@@ -20,38 +20,30 @@ import static org.junit.Assert.assertTrue;
 
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import org.apache.bigtop.datagenerators.bigpetstore.generators.customer.CustomerSamplerBuilder;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
 import org.apache.commons.lang3.tuple.Pair;
 import org.junit.Test;
 
-import com.google.common.collect.ImmutableMap;
-
 public class TestCustomerSamplerBuilder
 {
 
 	@Test
 	public void testSample() throws Exception
 	{
-		Map<String, Double> nameList = ImmutableMap.of("Fred", 1.0, "George", 1.0, "Gary",
1.0, "Fiona", 1.0);
 		List<ZipcodeRecord> zipcodes = Arrays.asList(new ZipcodeRecord[] {
 				new ZipcodeRecord("11111", Pair.of(1.0, 1.0), "AZ", "Tempte", 30000.0, 100),
 				new ZipcodeRecord("22222", Pair.of(2.0, 2.0), "AZ", "Phoenix", 45000.0, 200),
 				new ZipcodeRecord("33333", Pair.of(3.0, 3.0), "AZ", "Flagstaff", 60000.0, 300)
 				});
 
-		Names names = new Names(nameList, nameList);
-
 		// don't need product categories for building customers
-		InputData inputData = new InputData(zipcodes, names);
+		InputData inputData = new InputData(zipcodes);
 
 		List<Store> stores = Arrays.asList(new Store(0, "Store_0", zipcodes.get(0)),
 				new Store(1, "Store_1", zipcodes.get(1)),
@@ -68,8 +60,6 @@ public class TestCustomerSamplerBuilder
 		assertNotNull(customer);
 		assertTrue(customer.getId() >= 0);
 		assertNotNull(customer.getName());
-		assertNotNull(customer.getName().getLeft());
-		assertNotNull(customer.getName().getRight());
 		assertNotNull(customer.getLocation());
 
 	}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/README.md
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/README.md b/bigtop-data-generators/bigtop-name-generator/README.md
new file mode 100644
index 0000000..aa204b2
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/README.md
@@ -0,0 +1,51 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+BigTop Name Generator
+=====================
+
+Library for generating first and last names by sampling from the [Drupal
+Name Database](https://www.drupal.org/project/namedb) which itself is based
+on U.S. Census data.  Frequency of generates names are in proportion to
+their frequencies according to the U.S. Census.
+
+Building and Testing
+--------------------
+We use the Gradle build system for the BPS data generator so you'll need
+to install Gradle on your system.
+Once that's done, you can use gradle to run the included unit tests
+and build the data generator jar.
+
+To build:
+
+    $ gradle build
+
+This will create several directories and a jar located at:
+
+    build/libs/bigtop-name-generator-1.1.0-SNAPSHOT.jar
+
+Building automatically runs the included unit tests.  If you would prefer
+to just run the unit tests, you can do so by:
+
+    $ gradle test
+
+To clean up the build files, run:
+
+    $ gradle clean
+
+To install a jar into your local maven repository:
+
+    $ gradle install

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/build.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/build.gradle b/bigtop-data-generators/bigtop-name-generator/build.gradle
new file mode 100644
index 0000000..08d9d34
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/build.gradle
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+apply plugin: 'eclipse'
+apply plugin: 'groovy'
+apply plugin: 'java'
+apply plugin: 'maven'
+group = 'org.apache.bigtop'
+version = '1.1.0-SNAPSHOT'
+
+jar {
+
+    from {
+        configurations.runtime.collect {
+            it.isDirectory() ? it : zipTree(it)
+        }
+    }
+
+    manifest {
+	attributes 'Title': 'BigTop Name Generator', 'Version': version
+    }
+}
+
+repositories {
+	mavenLocal()
+	mavenCentral()
+}
+
+test {
+     // show standard out and error on console
+     testLogging.showStandardStreams = true
+
+     // listen to events in the test execution lifecycle
+     beforeTest { descriptor ->
+       logger.lifecycle("Running test: " + descriptor)
+     }
+
+     // listen to standard out and standard error of the test JVM(s)
+     onOutput { descriptor, event ->
+       logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message
)
+     }
+
+}
+
+dependencies {
+    compile 'com.google.guava:guava:18.0'
+    compile 'org.apache.commons:commons-lang3:3.4'
+    compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT'
+
+    testCompile 'junit:junit:4.+'
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/settings.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/settings.gradle b/bigtop-data-generators/bigtop-name-generator/settings.gradle
new file mode 100644
index 0000000..e738f38
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/settings.gradle
@@ -0,0 +1,16 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+rootProject.name = "bigtop-name-generator"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
new file mode 100644
index 0000000..4b5f620
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameGenerator.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import org.apache.bigtop.datagenerators.samplers.SeedFactory;
+import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler;
+import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
+import org.apache.commons.lang3.tuple.Pair;
+
+public class NameGenerator implements Sampler<Pair<String, String>>
+{
+	private final Sampler<String> firstNameSampler;
+	private final Sampler<String> lastNameSampler;
+
+	public NameGenerator(SeedFactory seedFactory) throws Exception
+	{
+		Names names = new NameReader().readData();
+
+		firstNameSampler = RouletteWheelSampler.create(names.getFirstNames(), seedFactory);
+		lastNameSampler = RouletteWheelSampler.create(names.getLastNames(), seedFactory);
+	}
+
+	public Pair<String, String> sample() throws Exception
+	{
+		return Pair.of(firstNameSampler.sample(), lastNameSampler.sample());
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
new file mode 100644
index 0000000..d53529f
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/NameReader.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Scanner;
+
+import com.google.common.collect.Maps;
+
+public class NameReader
+{
+	InputStream path;
+
+	public NameReader() throws Exception
+	{
+		this.path = getResource(new File("namedb/data/data.dat"));
+	}
+
+	private InputStream getResource(File filename) throws Exception
+	{
+		InputStream stream = getClass().getResourceAsStream("/input_data/" + filename);
+		return new BufferedInputStream(stream);
+	}
+
+	public Names readData() throws FileNotFoundException
+	{
+		Scanner scanner = new Scanner(path);
+
+		Map<String, Double> firstNames = Maps.newHashMap();
+		Map<String, Double> lastNames = Maps.newHashMap();
+
+		while(scanner.hasNextLine())
+		{
+			String line = scanner.nextLine();
+			String[] cols = line.trim().split(",");
+
+			String name = cols[0];
+			double weight = Double.parseDouble(cols[5]);
+
+			if(cols[4].equals("1"))
+				firstNames.put(name, weight);
+			if(cols[3].equals("1"))
+				lastNames.put(name, weight);
+		}
+
+		scanner.close();
+
+		return new Names(firstNames, lastNames);
+
+	}
+}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
new file mode 100644
index 0000000..a4d9e03
--- /dev/null
+++ b/bigtop-data-generators/bigtop-name-generator/src/main/java/org/apache/bigtop/datagenerators/namegenerator/Names.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.bigtop.datagenerators.namegenerator;
+
+import java.io.Serializable;
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+
+public class Names implements Serializable
+{
+	private static final long serialVersionUID = 2731634747628534453L;
+
+	final ImmutableMap<String, Double> firstNames;
+	final ImmutableMap<String, Double> lastNames;
+
+	public Names(Map<String, Double> firstNames,
+			Map<String, Double> lastNames)
+	{
+		this.firstNames = ImmutableMap.copyOf(firstNames);
+		this.lastNames = ImmutableMap.copyOf(lastNames);
+	}
+
+	public ImmutableMap<String, Double> getFirstNames()
+	{
+		return firstNames;
+	}
+
+	public ImmutableMap<String, Double> getLastNames()
+	{
+		return lastNames;
+	}
+}


Mime
View raw message