bigtop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rnowl...@apache.org
Subject [5/5] bigtop git commit: BIGTOP-1985. Extract name generator from BigPetStore data generator
Date Tue, 25 Aug 2015 14:34:24 GMT
BIGTOP-1985. Extract name generator from BigPetStore data generator


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/502bd784
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/502bd784
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/502bd784

Branch: refs/heads/master
Commit: 502bd784abeda6087215a98ca6719213457c6193
Parents: 15af83e
Author: RJ Nowling <rnowling@gmail.com>
Authored: Tue Aug 25 09:30:49 2015 -0500
Committer: RJ Nowling <rnowling@gmail.com>
Committed: Tue Aug 25 09:30:49 2015 -0500

----------------------------------------------------------------------
 .../bigpetstore-data-generator/build.gradle     |      1 +
 .../datagenerators/bigpetstore/Constants.java   |      2 -
 .../bigpetstore/CustomerGenerator.java          |      2 +-
 .../datagenerators/bigpetstore/DataLoader.java  |      9 +-
 .../datamodels/inputs/InputData.java            |     10 +-
 .../bigpetstore/datamodels/inputs/Names.java    |     46 -
 .../bigpetstore/datareaders/NameReader.java     |     62 -
 .../generators/customer/CustomerSampler.java    |     14 +-
 .../customer/CustomerSamplerBuilder.java        |      9 +-
 .../resources/input_data/namedb/data/data.dat   | 129036 ----------------
 .../resources/input_data/namedb/namedb.info     |     13 -
 .../customer/TestCustomerSampler.java           |     20 +-
 .../customer/TestCustomerSamplerBuilder.java    |     12 +-
 .../bigtop-name-generator/README.md             |     51 +
 .../bigtop-name-generator/build.gradle          |     63 +
 .../bigtop-name-generator/settings.gradle       |     16 +
 .../namegenerator/NameGenerator.java            |     40 +
 .../namegenerator/NameReader.java               |     68 +
 .../datagenerators/namegenerator/Names.java     |     46 +
 .../resources/input_data/namedb/data/data.dat   | 129036 ++++++++++++++++
 .../resources/input_data/namedb/namedb.info     |     12 +
 .../namegenerator/TestNameGenerator.java        |     39 +
 22 files changed, 129397 insertions(+), 129210 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/build.gradle
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/build.gradle b/bigtop-data-generators/bigpetstore-data-generator/build.gradle
index d18cac5..57f0692 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/build.gradle
+++ b/bigtop-data-generators/bigpetstore-data-generator/build.gradle
@@ -60,6 +60,7 @@ dependencies {
     compile 'com.google.code.gson:gson:2.3'
     compile 'org.apache.commons:commons-lang3:3.4'
     compile 'org.apache.bigtop:bigtop-samplers:1.1.0-SNAPSHOT'
+    compile 'org.apache.bigtop:bigtop-name-generator:1.1.0-SNAPSHOT'
 
     testCompile 'junit:junit:4.+'
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
index 21827d5..1e8e758 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/Constants.java
@@ -42,8 +42,6 @@ public class Constants
 	public static final File INCOMES_FILE = new File("ACS_12_5YR_S1903/ACS_12_5YR_S1903_with_ann.csv");
 	public static final File POPULATION_FILE = new File("population_data.csv");
 
-	public static final File NAMEDB_FILE = new File("namedb/data/data.dat");
-
 	public static final ProductsCollectionSize PRODUCTS_COLLECTION = ProductsCollectionSize.MEDIUM;
 
 	public static final double INCOME_SCALING_FACTOR = 100.0;

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
index 7fc2cbe..4be976a 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/CustomerGenerator.java
@@ -28,7 +28,7 @@ public class CustomerGenerator
 {
 	final Sampler<Customer> sampler;
 
-	public CustomerGenerator(InputData inputData, List<Store> stores, SeedFactory seedFactory)
+	public CustomerGenerator(InputData inputData, List<Store> stores, SeedFactory seedFactory)
throws Exception
 	{
 		CustomerSamplerBuilder builder = new CustomerSamplerBuilder(stores, inputData, seedFactory);
 		sampler = builder.build();

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
index d3393fe..ecbd6cf 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/DataLoader.java
@@ -21,9 +21,7 @@ import java.io.InputStream;
 import java.util.List;
 
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
-import org.apache.bigtop.datagenerators.bigpetstore.datareaders.NameReader;
 import org.apache.bigtop.datagenerators.bigpetstore.datareaders.ZipcodeReader;
 
 public class DataLoader
@@ -45,12 +43,7 @@ public class DataLoader
 		List<ZipcodeRecord> zipcodeTable = zipcodeReader.readData();
 		System.out.println("Read " + zipcodeTable.size() + " zipcode entries");
 
-		System.out.println("Reading name data");
-		NameReader nameReader = new NameReader(getResource(Constants.NAMEDB_FILE));
-		Names names = nameReader.readData();
-		System.out.println("Read " + names.getFirstNames().size() + " first names and " + names.getLastNames().size()
+ " last names");
-
-		InputData inputData = new InputData(zipcodeTable, names);
+		InputData inputData = new InputData(zipcodeTable);
 
 		return inputData;
 	}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
index 7f5eddf..c180136 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/InputData.java
@@ -24,22 +24,14 @@ public class InputData implements Serializable
 	private static final long serialVersionUID = 9078989799806707788L;
 
 	List<ZipcodeRecord> zipcodeTable;
-	Names names;
 
-	public InputData(List<ZipcodeRecord> zipcodeTable,
-			Names names)
+	public InputData(List<ZipcodeRecord> zipcodeTable)
 	{
 		this.zipcodeTable = Collections.unmodifiableList(zipcodeTable);
-		this.names = names;
 	}
 
 	public List<ZipcodeRecord> getZipcodeTable()
 	{
 		return zipcodeTable;
 	}
-
-	public Names getNames()
-	{
-		return names;
-	}
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
deleted file mode 100644
index 2d6da89..0000000
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datamodels/inputs/Names.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs;
-
-import java.io.Serializable;
-import java.util.Map;
-
-import com.google.common.collect.ImmutableMap;
-
-public class Names implements Serializable
-{
-	private static final long serialVersionUID = 2731634747628534453L;
-
-	final ImmutableMap<String, Double> firstNames;
-	final ImmutableMap<String, Double> lastNames;
-
-	public Names(Map<String, Double> firstNames,
-			Map<String, Double> lastNames)
-	{
-		this.firstNames = ImmutableMap.copyOf(firstNames);
-		this.lastNames = ImmutableMap.copyOf(lastNames);
-	}
-
-	public ImmutableMap<String, Double> getFirstNames()
-	{
-		return firstNames;
-	}
-
-	public ImmutableMap<String, Double> getLastNames()
-	{
-		return lastNames;
-	}
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
deleted file mode 100644
index ec5412a..0000000
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/datareaders/NameReader.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.bigtop.datagenerators.bigpetstore.datareaders;
-
-import java.io.FileNotFoundException;
-import java.io.InputStream;
-import java.util.Map;
-import java.util.Scanner;
-
-import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.Names;
-
-import com.google.common.collect.Maps;
-
-public class NameReader
-{
-	InputStream path;
-
-	public NameReader(InputStream path)
-	{
-		this.path = path;
-	}
-
-	public Names readData() throws FileNotFoundException
-	{
-		Scanner scanner = new Scanner(path);
-
-		Map<String, Double> firstNames = Maps.newHashMap();
-		Map<String, Double> lastNames = Maps.newHashMap();
-
-		while(scanner.hasNextLine())
-		{
-			String line = scanner.nextLine();
-			String[] cols = line.trim().split(",");
-
-			String name = cols[0];
-			double weight = Double.parseDouble(cols[5]);
-
-			if(cols[4].equals("1"))
-				firstNames.put(name, weight);
-			if(cols[3].equals("1"))
-				lastNames.put(name, weight);
-		}
-
-		scanner.close();
-
-		return new Names(firstNames, lastNames);
-
-	}
-}

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
index 13b69a3..2bfb6e7 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSampler.java
@@ -25,19 +25,18 @@ import org.apache.commons.lang3.tuple.Pair;
 public class CustomerSampler implements Sampler<Customer>
 {
 	private final Sampler<Integer> idSampler;
-	private final Sampler<String> firstNameSampler;
-	private final Sampler<String> lastNameSampler;
+	private final Sampler<Pair<String, String>> nameSampler;
 	private final Sampler<Store> storeSampler;
 	private final ConditionalSampler<ZipcodeRecord, Store> locationSampler;
 
 
-	public CustomerSampler(Sampler<Integer> idSampler, Sampler<String> firstNameSampler,
-			Sampler<String> lastNameSampler, Sampler<Store> storeSampler,
+	public CustomerSampler(Sampler<Integer> idSampler,
+			Sampler<Pair<String, String>> nameSampler,
+			Sampler<Store> storeSampler,
 			ConditionalSampler<ZipcodeRecord, Store> locationSampler)
 	{
 		this.idSampler = idSampler;
-		this.firstNameSampler = firstNameSampler;
-		this.lastNameSampler = lastNameSampler;
+		this.nameSampler = nameSampler;
 		this.storeSampler = storeSampler;
 		this.locationSampler = locationSampler;
 	}
@@ -45,8 +44,7 @@ public class CustomerSampler implements Sampler<Customer>
 	public Customer sample() throws Exception
 	{
 		Integer id = idSampler.sample();
-		Pair<String, String> name = Pair.of(firstNameSampler.sample(),
-				lastNameSampler.sample());
+		Pair<String, String> name = nameSampler.sample();
 		Store store = storeSampler.sample();
 		ZipcodeRecord location = locationSampler.sample(store);
 

http://git-wip-us.apache.org/repos/asf/bigtop/blob/502bd784/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
----------------------------------------------------------------------
diff --git a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
index 56ab761..44ffa6a 100644
--- a/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
+++ b/bigtop-data-generators/bigpetstore-data-generator/src/main/java/org/apache/bigtop/datagenerators/bigpetstore/generators/customer/CustomerSamplerBuilder.java
@@ -23,12 +23,14 @@ import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Customer;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.Store;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.InputData;
 import org.apache.bigtop.datagenerators.bigpetstore.datamodels.inputs.ZipcodeRecord;
+import org.apache.bigtop.datagenerators.namegenerator.NameGenerator;
 import org.apache.bigtop.datagenerators.samplers.SeedFactory;
 import org.apache.bigtop.datagenerators.samplers.pdfs.ProbabilityDensityFunction;
 import org.apache.bigtop.datagenerators.samplers.samplers.ConditionalSampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.RouletteWheelSampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.Sampler;
 import org.apache.bigtop.datagenerators.samplers.samplers.SequenceSampler;
+import org.apache.commons.lang3.tuple.Pair;
 
 import com.google.common.collect.Maps;
 
@@ -65,16 +67,15 @@ public class CustomerSamplerBuilder
 				};
 	}
 
-	public Sampler<Customer> build()
+	public Sampler<Customer> build() throws Exception
 	{
 		ProbabilityDensityFunction<Store> storePDF = new CustomerStorePDF(stores);
 
 		Sampler<Integer> idSampler = new SequenceSampler();
-		Sampler<String> firstNameSampler = RouletteWheelSampler.create(inputData.getNames().getFirstNames(),
seedFactory);
-		Sampler<String> lastNameSampler = RouletteWheelSampler.create(inputData.getNames().getLastNames(),
seedFactory);
+		Sampler<Pair<String, String>> nameSampler = new NameGenerator(seedFactory);
 		Sampler<Store> storeSampler = RouletteWheelSampler.create(stores, storePDF, seedFactory);
 
-		return new CustomerSampler(idSampler, firstNameSampler, lastNameSampler, storeSampler,
buildLocationSampler());
+		return new CustomerSampler(idSampler, nameSampler, storeSampler, buildLocationSampler());
 	}
 
 }


Mime
View raw message