Return-Path: X-Original-To: apmail-bigtop-commits-archive@www.apache.org Delivered-To: apmail-bigtop-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 016B0111FF for ; Wed, 20 Aug 2014 03:13:43 +0000 (UTC) Received: (qmail 42495 invoked by uid 500); 20 Aug 2014 03:13:42 -0000 Delivered-To: apmail-bigtop-commits-archive@bigtop.apache.org Received: (qmail 42442 invoked by uid 500); 20 Aug 2014 03:13:42 -0000 Mailing-List: contact commits-help@bigtop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: bigtop-dev@bigtop.apache.org Delivered-To: mailing list commits@bigtop.apache.org Received: (qmail 42431 invoked by uid 99); 20 Aug 2014 03:13:42 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Aug 2014 03:13:42 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 3A4C395325E; Wed, 20 Aug 2014 03:13:42 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jay@apache.org To: commits@bigtop.apache.org Date: Wed, 20 Aug 2014 03:13:42 -0000 Message-Id: <02b94e09e73942ac9a58d81ac579cf7b@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] BIGTOP-1272: Productionize the mahout recommender Repository: bigtop Updated Branches: refs/heads/master e9771e613 -> 4fca4573b http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java deleted file mode 100755 index 0ea81ee..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.java +++ /dev/null @@ -1,468 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.bigtop.bigpetstore.generator; - - -import java.util.Date; -import java.util.Iterator; -import java.util.Random; - -import org.apache.bigtop.bigpetstore.util.Pair; -import org.apache.bigtop.bigpetstore.util.StringUtils; - -/** - * This class generates our data. Over time we will use it to embed bias which - * can then be teased out, i.e. by clutstering/classifiers. For example: - * - * certain products <--> certain years or days - * - * - */ -public class TransactionIteratorFactory { - - /** - * Each "state" has a pet store , with a certain "proportion" of the - * transactions. In this case colorado represents the majority of the - * transactions. - */ - - public static enum STATE { - - // Each product is separated with an _ for its base price. - // That is just to make it easy to add new products. - // Each state is associated with a relative probability. - AZ(.1f, "dog-food_10", "cat-food_8", "leather-collar_25", - "snake-bite ointment_30", "turtle-food_11"), - AK(.1f, - "dog-food_10", "cat-food_8", "fuzzy-collar_19", - "antelope-caller_20", "salmon-bait_30"), - CT(.1f, "dog-food_10", - "cat-food_8", "fuzzy-collar_19", "turtle-pellets_5"), - OK(.1f, - "dog-food_10", "cat-food_8", "duck-caller_13", - "rodent-cage_40", "hay-bail_5", "cow-dung_2"), - CO(.1f, - "dog-food_10", "cat-food_8", "choke-collar_15", - "antelope snacks_30", "duck-caller_18"), - CA(.3f, "dog-food_10", - "cat-food_8", "fish-food_12", "organic-dog-food_16", - "turtle-pellets_5"), - NY(.2f, "dog-food_10", "cat-food_8", "steel-leash_20", - "fish-food_20", "seal-spray_25"); - - public static Random rand = new Random(); - public float probability; - public String[] products; - - private STATE(float probability, String... products) { - this.probability = probability; - this.products = products; - } - - public Pair randProduct() { - String product = products[rand.nextInt(products.length - 1)]; - String name = StringUtils.substringBefore(product, "_"); - Integer basePrice = Integer.parseInt(StringUtils.substringAfter( - product, "_")); - return new Pair(name, basePrice); - } - - } - - public static class KeyVal { - - public final K key; - public final V val; - - public KeyVal(K key, V val) { - this.key = key; - this.val = val; - } - } - - private Iterator> dataIterator; - - Random r; - - public TransactionIteratorFactory(final int records, final STATE state) { - - /** - * Random is seeded by STATE. This way similar names will be randomly - * selected for states . - */ - r = new Random(state.hashCode()); - - if (records == 0) { - throw new RuntimeException( - "Cant create a data iterator with no records (records==0) !"); - } - - this.dataIterator = new Iterator>() { - int trans_id = 1; - - @Override - public boolean hasNext() { - // TODO Auto-generated method stub - return trans_id <= records; - } - - int repeat = 0; - String fname = randFirstName(); - String lname = randLastName(); - - @Override - public KeyVal next() { - /** - * Some customers come back for more :) We repeat a name up to - * ten times. - */ - if (repeat > 0) - repeat--; - else { - fname = randFirstName(); - lname = randLastName(); - repeat = (int) (r.nextGaussian() * 10f); - } - String key, val; - key = join(",", "BigPetStore", "storeCode_" + state.name(), - trans_id++ + ""); - Pair product_price = state.randProduct(); - val = join( - ",", - fname, - lname, - getDate().toString(), - fudgePrice(product_price.getFirst(), - product_price.getSecond()) - + "", product_price.getFirst()); // products are - // biased by - // state - - return new KeyVal(key, val); - } - - @Override - public void remove() { - // TODO Auto-generated method stub - - } - - }; - } - - /** - * Add some decimals to the price; - * - * @param i - * @return - */ - public Float fudgePrice(String product, Integer i) { - float f = (float) i; - if (product.contains("dog")) { - return i + .50f; - } - if (product.contains("cat")) { - return i - .50f; - } - if (product.contains("fish")) { - return i - .25f; - } else - return i + .10f; - } - - static String join(String sep, String... strs) { - if (strs.length == 0) { - return ""; - } else if (strs.length == 1) { - return strs[0]; - } - String temp = strs[0]; // inefficient ... should probably use - // StringBuilder instead - for (int i = 1; i < strs.length; i++) { - temp += "," + strs[i]; - } - return temp; - } - - public Iterator> getData() { - return this.dataIterator; - } - - private String randFirstName() { - return FIRSTNAMES[this.r.nextInt(FIRSTNAMES.length - 1)].toLowerCase(); - } - - private String randLastName() { - return LASTNAMES[this.r.nextInt(LASTNAMES.length - 1)].toLowerCase(); - } - - private Date getDate() { - return new Date(this.r.nextInt()); - } - - private Integer getPrice() { - return this.r.nextInt(MAX_PRICE); - } - - public static final Integer MINUTES_IN_DAY = 60 * 24; - public static final Integer MAX_PRICE = 10000; - - private static String[] FIRSTNAMES = { "Aaron", "Abby", "Abigail", "Adam", - "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia", - "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda", - "Amber", "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela", - "Angie", "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio", - "April", "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby", - "Barb", "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty", - "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie", - "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon", - "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian", - "Brianna", "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan", - "Caleb", "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol", - "Carolyn", "Carrie", "Casey", "Cassandra", "Catherine", "Cathy", - "Chad", "Charlene", "Charles", "Charlie", "Charlotte", "Chase", - "Chasity", "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne", - "Chris", "Christian", "Christina", "Christine", "Christoph", - "Christopher", "Christy", "Chuck", "Cindy", "Clara", "Clarence", - "Clayton", "Clifford", "Clint", "Cody", "Colton", "Connie", - "Corey", "Cory", "Courtney", "Craig", "Crystal", "Curtis", - "Cynthia", "Dakota", "Dale", "Dallas", "Dalton", "Dan", "Dana", - "Daniel", "Danielle", "Danny", "Darla", "Darlene", "Darrell", - "Darren", "Dave", "David", "Dawn", "Dean", "Deanna", "Debbie", - "Deborah", "Debra", "Denise", "Dennis", "Derek", "Derrick", - "Destiny", "Devin", "Diana", "Diane", "Dillon", "Dixie", "Dominic", - "Don", "Donald", "Donna", "Donnie", "Doris", "Dorothy", "Doug", - "Douglas", "Drew", "Duane", "Dustin", "Dusty", "Dylan", "Earl", - "Ed", "Eddie", "Edward", "Elaine", "Elizabeth", "Ellen", "Emily", - "Eric", "Erica", "Erika", "Erin", "Ernest", "Ethan", "Eugene", - "Eva", "Evelyn", "Everett", "Faith", "Father", "Felicia", "Floyd", - "Francis", "Frank", "Fred", "Gabriel", "Gage", "Gail", "Gary", - "Gene", "George", "Gerald", "Gina", "Ginger", "Glen", "Glenn", - "Gloria", "Grace", "Greg", "Gregory", "Haley", "Hannah", "Harley", - "Harold", "Harry", "Heath", "Heather", "Heidi", "Helen", "Herbert", - "Holly", "Hope", "Howard", "Hunter", "Ian", "Isaac", "Jack", - "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie", "Jan", "Jane", - "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay", "Jean", - "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer", - "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica", - "Jessie", "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi", - "Jody", "Joe", "Joel", "Joey", "John", "Johnathan", "Johnny", - "Jon", "Jonathan", "Jonathon", "Jordan", "Joseph", "Josh", - "Joshua", "Joyce", "Juanita", "Judy", "Julia", "Julie", "Justin", - "Kaitlyn", "Karen", "Katelyn", "Katherine", "Kathleen", "Kathryn", - "Kathy", "Katie", "Katrina", "Kay", "Kayla", "Kaylee", "Keith", - "Kelly", "Kelsey", "Ken", "Kendra", "Kenneth", "Kenny", "Kevin", - "Kim", "Kimberly", "Kris", "Krista", "Kristen", "Kristin", - "Kristina", "Kristy", "Kyle", "Kylie", "Lacey", "Laken", "Lance", - "Larry", "Laura", "Lawrence", "Leah", "Lee", "Leonard", "Leroy", - "Leslie", "Levi", "Lewis", "Linda", "Lindsay", "Lindsey", "Lisa", - "Lloyd", "Logan", "Lois", "Loretta", "Lori", "Louis", "Lynn", - "Madison", "Mandy", "Marcus", "Margaret", "Maria", "Mariah", - "Marie", "Marilyn", "Marion", "Mark", "Marlene", "Marsha", - "Martha", "Martin", "Marty", "Marvin", "Mary", "Mary ann", "Mason", - "Matt", "Matthew", "Max", "Megan", "Melanie", "Melinda", "Melissa", - "Melody", "Michael", "Michelle", "Mickey", "Mike", "Mindy", - "Miranda", "Misty", "Mitchell", "Molly", "Monica", "Morgan", - "Mother", "Myron", "Nancy", "Natasha", "Nathan", "Nicholas", - "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman", "Olivia", - "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick", "Patty", - "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip", "Phyllis", - "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy", "Ray", - "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda", - "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert", - "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger", - "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy", - "Russ", "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally", - "Sam", "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah", - "Savannah", "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna", - "Shannon", "Sharon", "Shaun", "Shawn", "Shawna", "Sheila", - "Shelly", "Sher", "Sherri", "Sherry", "Shirley", "Sierra", - "Skyler", "Stacey", "Stacy", "Stanley", "Stephanie", "Stephen", - "Steve", "Steven", "Sue", "Summer", "Susan", "Sydney", "Tabatha", - "Tabitha", "Tamara", "Tammy", "Tara", "Tasha", "Tashia", "Taylor", - "Ted", "Teresa", "Terri", "Terry", "Tessa", "Thelma", "Theresa", - "Thomas", "Tia", "Tiffany", "Tim", "Timmy", "Timothy", "Tina", - "Todd", "Tom", "Tommy", "Toni", "Tony", "Tonya", "Tracey", - "Tracie", "Tracy", "Travis", "Trent", "Trevor", "Trey", "Trisha", - "Tristan", "Troy", "Tyler", "Tyrone", "Unborn", "Valerie", - "Vanessa", "Vernon", "Veronica", "Vicki", "Vickie", "Vicky", - "Victor", "Victoria", "Vincent", "Virginia", "Vivian", "Walter", - "Wanda", "Wayne", "Wendy", "Wesley", "Whitney", "William", - "Willie", "Wyatt", "Zachary" }; - - public static String[] LASTNAMES = { "Abbott", "Acevedo", "Acosta", - "Adams", "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander", - "Alford", "Allen", "Allison", "Alston", "Alvarado", "Alvarez", - "Anderson", "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley", - "Atkins", "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers", - "Bailey", "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks", - "Barber", "Smith", "Johnson", "Williams", "Jones", "Brown", - "Davis", "Miller", "Wilson", "Moore", "Taylor", "Thomas", - "Jackson", "Barker", "Barlow", "Barnes", "Barnett", "Barr", - "Barrera", "Barrett", "Barron", "Barry", "Bartlett", "Barton", - "Bass", "Bates", "Battle", "Bauer", "Baxter", "Beach", "Bean", - "Beard", "Beasley", "Beck", "Becker", "Bell", "Bender", "Benjamin", - "Bennett", "Benson", "Bentley", "Benton", "Berg", "Berger", - "Bernard", "Berry", "Best", "Bird", "Bishop", "Black", "Blackburn", - "Blackwell", "Blair", "Blake", "Blanchard", "Blankenship", - "Blevins", "Bolton", "Bond", "Bonner", "Booker", "Boone", "Booth", - "Bowen", "Bowers", "Bowman", "Boyd", "Boyer", "Boyle", "Bradford", - "Bradley", "Bradshaw", "Brady", "Branch", "Bray", "Brennan", - "Brewer", "Bridges", "Briggs", "Bright", "Britt", "Brock", - "Brooks", "Browning", "Bruce", "Bryan", "Bryant", "Buchanan", - "Buck", "Buckley", "Buckner", "Bullock", "Burch", "Burgess", - "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt", "Burton", - "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain", "Calderon", - "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron", - "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas", - "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo", - "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash", - "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan", - "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase", - "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church", - "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons", - "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole", - "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton", - "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook", - "Cooke", "Cooley", "Cooper", "Copeland", "Cortez", "Cote", - "Cotton", "Cox", "Craft", "Craig", "Crane", "Crawford", "Crosby", - "Cross", "Cruz", "Cummings", "Cunningham", "Curry", "Curtis", - "Dale", "Dalton", "Daniel", "Daniels", "Daugherty", "Davenport", - "David", "Davidson", "Dawson", "Day", "Dean", "Decker", "Dejesus", - "Delacruz", "Delaney", "Deleon", "Delgado", "Dennis", "Diaz", - "Dickerson", "Dickinson", "Dillard", "Dillon", "Dixon", "Dodson", - "Dominguez", "Donaldson", "Donovan", "Dorsey", "Dotson", "Douglas", - "Downs", "Doyle", "Drake", "Dudley", "Duffy", "Duke", "Duncan", - "Dunlap", "Dunn", "Duran", "Durham", "Dyer", "Eaton", "Edwards", - "Elliott", "Ellis", "Ellison", "Emerson", "England", "English", - "Erickson", "Espinoza", "Estes", "Estrada", "Evans", "Everett", - "Ewing", "Farley", "Farmer", "Farrell", "Faulkner", "Ferguson", - "Fernandez", "Ferrell", "Fields", "Figueroa", "Finch", "Finley", - "Fischer", "Fisher", "Fitzgerald", "Fitzpatrick", "Fleming", - "Fletcher", "Flores", "Flowers", "Floyd", "Flynn", "Foley", - "Forbes", "Ford", "Foreman", "Foster", "Fowler", "Fox", "Francis", - "Franco", "Frank", "Franklin", "Franks", "Frazier", "Frederick", - "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes", "Fuller", - "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway", "Gamble", - "Garcia", "Gardner", "Garner", "Garrett", "Garrison", "Garza", - "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson", "Gilbert", - "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore", "Glass", - "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales", - "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould", - "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer", - "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra", - "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn", - "Hale", "Haley", "Hall", "Hamilton", "Hammond", "Hampton", - "Hancock", "Haney", "Hansen", "Hanson", "Hardin", "Harding", - "Hardy", "Harmon", "Harper", "Harris", "Harrington", "Harrison", - "Hart", "Hartman", "Harvey", "Hatfield", "Hawkins", "Hayden", - "Hayes", "Haynes", "Hays", "Head", "Heath", "Hebert", "Henderson", - "Hendricks", "Hendrix", "Henry", "Hensley", "Henson", "Herman", - "Hernandez", "Herrera", "Herring", "Hess", "Hester", "Hewitt", - "Hickman", "Hicks", "Higgins", "Hill", "Hines", "Hinton", "Hobbs", - "Hodge", "Hodges", "Hoffman", "Hogan", "Holcomb", "Holden", - "Holder", "Holland", "Holloway", "Holman", "Holmes", "Holt", - "Hood", "Hooper", "Hoover", "Hopkins", "Hopper", "Horn", "Horne", - "Horton", "House", "Houston", "Howard", "Howe", "Howell", - "Hubbard", "Huber", "Hudson", "Huff", "Huffman", "Hughes", "Hull", - "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst", "Hutchinson", - "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson", "James", "Jarvis", - "Jefferson", "Jenkins", "Jennings", "Jensen", "Jimenez", "Johns", - "Johnston", "Jordan", "Joseph", "Joyce", "Joyner", "Juarez", - "Justice", "Kane", "Kaufman", "Keith", "Keller", "Kelley", "Kelly", - "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd", "Kim", "King", - "Kinney", "Kirby", "Kirk", "Kirkland", "Klein", "Kline", "Knapp", - "Knight", "Knowles", "Knox", "Koch", "Kramer", "Lamb", "Lambert", - "Lancaster", "Landry", "Lane", "Lang", "Langley", "Lara", "Larsen", - "Larson", "Lawrence", "Lawson", "Le", "Leach", "Leblanc", "Lee", - "Leon", "Leonard", "Lester", "Levine", "Levy", "Lewis", "Lindsay", - "Lindsey", "Little", "Livingston", "Lloyd", "Logan", "Long", - "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas", "Luna", - "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack", "Madden", - "Maddox", "Maldonado", "Malone", "Mann", "Manning", "Marks", - "Marquez", "Marsh", "Marshall", "Martin", "Martinez", "Mason", - "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May", - "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall", - "McCarthy", "McCarty", "McClain", "McClure", "McConnell", - "McCormick", "McCoy", "McCray", "McCullough", "McDaniel", - "McDonald", "McDowell", "McFadden", "McFarland", "McGee", - "McGowan", "McGuire", "McIntosh", "McIntyre", "McKay", "McKee", - "McKenzie", "McKinney", "McKnight", "McLaughlin", "McLean", - "McLeod", "McMahon", "McMillan", "McNeil", "McPherson", "Meadows", - "Medina", "Mejia", "Melendez", "Melton", "Mendez", "Mendoza", - "Mercado", "Mercer", "Merrill", "Merritt", "Meyer", "Meyers", - "Michael", "Middleton", "Miles", "Mills", "Miranda", "Mitchell", - "Molina", "Monroe", "Montgomery", "Montoya", "Moody", "Moon", - "Mooney", "Morales", "Moran", "Moreno", "Morgan", "Morin", - "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses", - "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz", - "Murphy", "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson", - "Newman", "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen", - "Nieves", "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris", - "Norton", "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom", - "Odonnell", "Oliver", "Olsen", "Olson", "O'neal", "O'neil", - "O'neill", "Orr", "Ortega", "Ortiz", "Osborn", "Osborne", "Owen", - "Owens", "Pace", "Pacheco", "Padilla", "Page", "Palmer", "Park", - "Parker", "Parks", "Parrish", "Parsons", "Pate", "Patel", - "Patrick", "Patterson", "Patton", "Paul", "Payne", "Pearson", - "Peck", "Pena", "Pennington", "Perez", "Perkins", "Perry", - "Peters", "Petersen", "Peterson", "Petty", "Phelps", "Phillips", - "Pickett", "Pierce", "Pittman", "Pitts", "Pollard", "Poole", - "Pope", "Porter", "Potter", "Potts", "Powell", "Powers", "Pratt", - "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh", "Quinn", - "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph", "Rasmussen", - "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves", "Reid", - "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich", "Richard", - "Richards", "Richardson", "Richmond", "Riddle", "Riggs", "Riley", - "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins", - "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha", - "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins", - "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross", - "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell", - "Russo", "Rutledge", "Ryan", "Salas", "Salazar", "Salinas", - "Sampson", "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana", - "Santiago", "Santos", "Sargent", "Saunders", "Savage", "Sawyer", - "Schmidt", "Schneider", "Schroeder", "Schultz", "Schwartz", - "Scott", "Sears", "Sellers", "Serrano", "Sexton", "Shaffer", - "Shannon", "Sharp", "Sharpe", "Shaw", "Shelton", "Shepard", - "Shepherd", "Sheppard", "Sherman", "Shields", "Short", "Silva", - "Simmons", "Simon", "Simpson", "Sims", "Singleton", "Skinner", - "Slater", "Sloan", "Small", "Snider", "Snow", "Snyder", "Solis", - "Solomon", "Sosa", "Soto", "Sparks", "Spears", "Spence", "Spencer", - "Stafford", "Stanley", "Stanton", "Stark", "Steele", "Stein", - "Stephens", "Stephenson", "Stevens", "Stevenson", "Stewart", - "Stokes", "Stone", "Stout", "Strickland", "Strong", "Stuart", - "Suarez", "Sullivan", "Summers", "Sutton", "Swanson", "Sweeney", - "Sweet", "Sykes", "Talley", "Tanner", "Tate", "Terrell", "Terry", - "Thompson", "Thornton", "Tillman", "Todd", "Torres", "Townsend", - "Tran", "Travis", "Trevino", "Trujillo", "Tucker", "Turner", - "Tyler", "Tyson", "Underwood", "Valdez", "Valencia", "Valentine", - "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan", - "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez", - "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker", - "Wall", "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters", - "Walton", "Ward", "Ware", "Warner", "Warren", "Washington", - "Waters", "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber", - "Webster", "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler", - "Whitaker", "White", "Whitehead", "Whitfield", "Whitley", - "Whitney", "Wiggins", "Wilcox", "Wilder", "Wiley", "Wilkerson", - "Wilkins", "Wilkinson", "William", "Williamson", "Willis", - "Winters", "Wise", "Witt", "Wolf", "Wolfe", "Wong", "Wood", - "Woodard", "Woods", "Woodward", "Wooten", "Workman", "Wright", - "Wyatt", "Wynn", "Yang", "Yates", "York", "Young", "Zamora", - "Zimmerman" - }; -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java new file mode 100644 index 0000000..9ef3d67 --- /dev/null +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/Product.java @@ -0,0 +1,63 @@ +package org.apache.bigtop.bigpetstore.generator.util; + +import java.math.BigDecimal; +import static org.apache.bigtop.bigpetstore.generator.util.ProductType.*; + +public enum Product { + DOG_FOOD(DOG, 10.50), + ORGANIC_DOG_FOOD(DOG, 16.99), + STEEL_LEASH(DOG, 19.99), + FUZZY_COLLAR(DOG, 24.90), + LEATHER_COLLAR(DOG, 18.90), + CHOKE_COLLAR(DOG, 15.50), + DOG_HOUSE(DOG, 109.99), + CHEWY_BONE(DOG, 20.10), + DOG_VEST(DOG, 19.99), + DOG_SOAP(DOG, 5.45), + + CAT_FOOD(CAT, 7.50), + FEEDER_BOWL(CAT, 10.99), + LITTER_BOX(CAT, 24.95), + CAT_COLLAR(CAT, 7.95), + CAT_BLANKET(CAT, 14.49), + + TURTLE_PELLETS(TURTLE, 4.95), + TURTLE_FOOD(TURTLE, 10.90), + TURTLE_TUB(TURTLE, 40.45), + + FISH_FOOD(FISH, 12.50), + SALMON_BAIT(FISH, 29.95), + FISH_BOWL(FISH, 20.99), + AIR_PUMP(FISH, 13.95), + FILTER(FISH, 34.95), + + DUCK_COLLAR(DUCK, 13.25), + DUCK_FOOD(DUCK, 20.25), + WADING_POOL(DUCK, 45.90); + + /* + ANTELOPE_COLLAR(OTHER, 19.90), + ANTELOPE_SNACKS(OTHER, 29.25), + RODENT_CAGE(OTHER, 39.95), + HAY_BALE(OTHER, 4.95), + COW_DUNG(OTHER, 1.95), + SEAL_SPRAY(OTHER, 24.50), + SNAKE_BITE_OINTMENT(OTHER, 29.90); + */ + private final BigDecimal price; + public final ProductType productType; + private Product(ProductType productType, double price) { + this.price = BigDecimal.valueOf(price); + this.productType = productType; + } + + public int id() { + return this.ordinal(); + } + + public BigDecimal price() { + return this.price; + } + + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java new file mode 100644 index 0000000..f41b604 --- /dev/null +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/ProductType.java @@ -0,0 +1,29 @@ +package org.apache.bigtop.bigpetstore.generator.util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public enum ProductType { + DOG, CAT, TURTLE, FISH, DUCK; + + private List products; + + public List getProducts() { + if(products == null) { + generateProductList(); + } + return products; + } + + private void generateProductList() { + List products = new ArrayList<>(); + for(Product p : Product.values()) { + if(p.productType == this) { + products.add(p); + } + } + this.products = Collections.unmodifiableList(products); + } + +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java new file mode 100644 index 0000000..f2b845a --- /dev/null +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/generator/util/State.java @@ -0,0 +1,26 @@ +package org.apache.bigtop.bigpetstore.generator.util; + +import java.util.Random; + + +/** + * Each "state" has a pet store , with a certain "proportion" of the + * transactions. + */ +public enum State { + // Each state is associated with a relative probability. + AZ(.1f), + AK(.1f), + CT(.1f), + OK(.1f), + CO(.1f), + CA(.3f), + NY(.2f); + + public static Random rand = new Random(); + public float probability; + + private State(float probability) { + this.probability = probability; + } +} http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala new file mode 100644 index 0000000..0ec240e --- /dev/null +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/recommend/ItemRecommender.scala @@ -0,0 +1,103 @@ +package org.apache.bigtop.bigpetstore.recommend + +import org.apache.mahout.cf.taste.hadoop.als.RecommenderJob +import org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob +import java.io.File +import parquet.org.codehaus.jackson.map.DeserializerFactory.Config +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.conf.Configurable +import org.apache.hadoop.util.ToolRunner +import org.apache.mahout.cf.taste.hadoop.als.SharingMapper +import org.apache.hadoop.util.Tool +import org.apache.bigtop.bigpetstore.util.DeveloperTools + +// We don't need to wrap these two jobs in ToolRunner.run calls since the only +// thing that we are doing right now is calling the run() methods of RecommenderJob +// and ParallelALSFactorizationJob. Both of these classes have a main() method that +// internally calls ToolRunner.run with all the command line args passed. So, if +// we want to run this code from the command line, we can easily do so by running +// the main methods of the ParallelALSFactorizationJob, followed by running the +// main method of RecommenderJob. That would also take care of the multiple-jvm +// instance issue metioned in the comments below, so the call to +class ItemRecommender(private val inputDir: String, + private val factorizationOutputDir: String, + private val recommendationsOutputDir: String) { + private val recommenderJob = new RecommenderJob + private val factorizationJob = new ParallelALSFactorizationJob + + private def tempDir = "/tmp/mahout_" + System.currentTimeMillis + + private def performAlsFactorization() = { + ToolRunner.run(factorizationJob, Array( + "--input", inputDir, + "--output", factorizationOutputDir, + "--lambda", "0.1", + "--tempDir", tempDir, + "--implicitFeedback", "false", + "--alpha", "0.8", + "--numFeatures", "2", + "--numIterations", "5", + "--numThreadsPerSolver", "1")) + } + + private def generateRecommendations() = { + ToolRunner.run(recommenderJob, (Array( + "--input", factorizationOutputDir + "/userRatings/", + "--userFeatures", factorizationOutputDir + "/U/", + "--itemFeatures", factorizationOutputDir + "/M/", + "--numRecommendations", "1", + "--output", recommendationsOutputDir, + "--maxRating", "1"))) + } + + // At this point, the performAlsFactorization generateRecommendations + // and this method can not be run from the same VM instance. These two jobs + // share a common static variable which is not being handled correctly. + // This, unfortunately, results in a class-cast exception being thrown. That's + // why the resetFlagInSharedAlsMapper is required. See the comments on + // resetFlagInSharedAlsMapper() method. + def recommend = { + performAlsFactorization + resetFlagInSharedAlsMapper + generateRecommendations + } + + // necessary for local execution in the same JVM only. If the performAlsFactorization() + // and generateRecommendations() calls are performed in separate JVM instances, this + // would be taken care of automatically. However, if we want to run this two methods + // as one task, we need to clean up the static state set by these methods, and we don't + // have any legitimate way of doing this directly. This clean-up should have been + // performed by ParallelALSFactorizationJob class after the job is finished. + // TODO: remove this when a better way comes along, or ParallelALSFactorizationJob + // takes responsibility. + private def resetFlagInSharedAlsMapper { + val m = classOf[SharingMapper[_, _, _, _, _]].getDeclaredMethod("reset"); + m setAccessible true + m.invoke(null) + } +} + +object ItemRecommender { + def main(args: Array[String]) { + val res = ToolRunner.run(new Configuration(), new Tool() { + var conf: Configuration = _; + + override def setConf(conf: Configuration) { + this.conf=conf; + } + + + override def getConf() = { + this.conf; + } + + + override def run(toolArgs: Array[String]) = { + val ir = new ItemRecommender(toolArgs(0), toolArgs(1), toolArgs(2)) + ir.recommend + 0; + } + }, args); + System.exit(res); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java index 29f7c67..01a6b95 100755 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/BigPetStoreConstants.java @@ -6,13 +6,13 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - * + * * Static final constants * * is useful to have the basic sql here as the HIVE SQL can vary between hive @@ -24,13 +24,18 @@ package org.apache.bigtop.bigpetstore.util; public class BigPetStoreConstants { //Files should be stored in graphviz arch.dot - public enum OUTPUTS{ + public static enum OUTPUTS { generated,//generator cleaned,//pig + tsv, pig_ad_hoc_script, - MAHOUT_CF_IN,//hive view over data for mahout - MAHOUT_CF_OUT,//mahout cf results - CUSTOMER_PAGE//crunchhh + CUSTOMER_PAGE; //crunchhh + + public static enum MahoutPaths { + Mahout, + AlsFactorization, + AlsRecommendations + } }; } http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java index 9fa9455..c652beb 100644 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java +++ b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/NumericalIdUtils.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,16 +16,14 @@ package org.apache.bigtop.bigpetstore.util; -import java.math.BigInteger; - -import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE; +import org.apache.bigtop.bigpetstore.generator.util.State; /** * User and Product IDs need numerical * identifiers for recommender algorithms * which attempt to interpolate new * products. - * + * * TODO: Delete this class. Its not necessarily required: We might just use HIVE HASH() as our * standard for this. */ @@ -34,7 +32,7 @@ public class NumericalIdUtils { /** * People: Leading with ordinal code for state. */ - public static long toId(STATE state, String name){ + public static long toId(State state, String name){ String fromRawData = state==null? name: http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java b/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java deleted file mode 100644 index a96fa44..0000000 --- a/bigtop-bigpetstore/src/main/java/org/apache/bigtop/bigpetstore/util/Pair.java +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.bigtop.bigpetstore.util; - -import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory; - -import java.util.Comparator; - -@Deprecated -public class Pair implements Comparable> { - - private final S first; - private final T second; - - public Pair(final S car, final T cdr) { - first = car; - second = cdr; - } - - public S getFirst() { return first; } - public T getSecond() { return second; } - - @Override - public boolean equals(Object o) { - if (null == o) { - return false; - } else if (o instanceof Pair) { - Pair p = (Pair) o; - if (first == null && second == null) { - return p.first == null && p.second == null; - } else if (first == null) { - return p.first == null && second.equals(p.second); - } else if (second == null) { - return p.second == null && first.equals(p.first); - } else { - return first.equals(p.first) && second.equals(p.second); - } - } else { - return false; - } - } - - @Override - public int hashCode() { - int code = 0; - - if (null != first) { - code += first.hashCode(); - } - - if (null != second) { - code += second.hashCode() << 1; - } - - return code; - } - - @Override - public int compareTo(Pair p) { - if (null == p) { - return 1; - } - - Comparable firstCompare = (Comparable) first; - - int firstResult = firstCompare.compareTo(p.first); - if (firstResult == 0) { - Comparable secondCompare = (Comparable) second; - return secondCompare.compareTo(p.second); - } else { - return firstResult; - } - } - - // TODO: Can this be made static? Same with SecondElemComparator? - public class FirstElemComparator implements Comparator> { - public FirstElemComparator() { - } - - public int compare(Pair p1, Pair p2) { - Comparable cS = (Comparable) p1.first; - return cS.compareTo(p2.first); - } - } - - public class SecondElemComparator implements Comparator> { - public SecondElemComparator() { - } - - public int compare(Pair p1, Pair p2) { - Comparable cT = (Comparable) p1.second; - return cT.compareTo(p2.second); - } - } - - @Override - public String toString() { - String firstString = "null"; - String secondString = "null"; - - if (null != first) { - firstString = first.toString(); - } - - if (null != second) { - secondString = second.toString(); - } - - return "(" + firstString + ", " + secondString + ")"; - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala new file mode 100644 index 0000000..de9b29b --- /dev/null +++ b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/DataForger.scala @@ -0,0 +1,263 @@ +package org.apache.bigtop.bigpetstore.generator + +import java.util.Random +import org.jfairy.Fairy +import java.util.Date + + +/** + * Generic class for generating random data. This class was created so + * that we can provide a uniform API for getting random data. If we want, + * we can replace the underlying data-generation implementation using + * existing libraries. + */ +object DataForger { + private val random = new Random + private val fairy = Fairy.create() + + // TODO: Jay / Bhashit : refactor to use a random data generator? + def firstName(random: Random) = firstNames(random.nextInt(firstNames.length)) + def firstName: String = firstName(random) + + // TODO: Jay / Bhashit : refactor to use a random data generator? + def lastName(random: Random) = lastNames(random.nextInt(lastNames.length)) + def lastName: String = lastName(random) + + def randomDateInPastYears(maxYearsEarlier: Int) = fairy.dateProducer().randomDateInThePast(maxYearsEarlier).toDate() + + private val firstNames = IndexedSeq("Aaron", "Abby", "Abigail", "Adam", + "Alan", "Albert", "Alex", "Alexandra", "Alexis", "Alice", "Alicia", + "Alisha", "Alissa", "Allen", "Allison", "Alyssa", "Amanda", "Amber", + "Amy", "Andrea", "Andrew", "Andy", "Angel", "Angela", "Angie", + "Anita", "Ann", "Anna", "Annette", "Anthony", "Antonio", "April", + "Arthur", "Ashley", "Audrey", "Austin", "Autumn", "Baby", "Barb", + "Barbara", "Becky", "Benjamin", "Beth", "Bethany", "Betty", + "Beverly", "Bill", "Billie", "Billy", "Blake", "Bob", "Bobbie", + "Bobby", "Bonnie", "Brad", "Bradley", "Brady", "Brandi", "Brandon", + "Brandy", "Breanna", "Brenda", "Brent", "Brett", "Brian", "Brianna", + "Brittany", "Brooke", "Brooklyn", "Bruce", "Bryan", "Caleb", + "Cameron", "Candy", "Carl", "Carla", "Carmen", "Carol", "Carolyn", + "Carrie", "Casey", "Cassandra", "Catherine", "Cathy", "Chad", + "Charlene", "Charles", "Charlie", "Charlotte", "Chase", "Chasity", + "Chastity", "Chelsea", "Cheryl", "Chester", "Cheyenne", "Chris", + "Christian", "Christina", "Christine", "Christoph", "Christopher", + "Christy", "Chuck", "Cindy", "Clara", "Clarence", "Clayton", + "Clifford", "Clint", "Cody", "Colton", "Connie", "Corey", "Cory", + "Courtney", "Craig", "Crystal", "Curtis", "Cynthia", "Dakota", + "Dale", "Dallas", "Dalton", "Dan", "Dana", "Daniel", "Danielle", + "Danny", "Darla", "Darlene", "Darrell", "Darren", "Dave", "David", + "Dawn", "Dean", "Deanna", "Debbie", "Deborah", "Debra", "Denise", + "Dennis", "Derek", "Derrick", "Destiny", "Devin", "Diana", "Diane", + "Dillon", "Dixie", "Dominic", "Don", "Donald", "Donna", "Donnie", + "Doris", "Dorothy", "Doug", "Douglas", "Drew", "Duane", "Dustin", + "Dusty", "Dylan", "Earl", "Ed", "Eddie", "Edward", "Elaine", + "Elizabeth", "Ellen", "Emily", "Eric", "Erica", "Erika", "Erin", + "Ernest", "Ethan", "Eugene", "Eva", "Evelyn", "Everett", "Faith", + "Father", "Felicia", "Floyd", "Francis", "Frank", "Fred", "Gabriel", + "Gage", "Gail", "Gary", "Gene", "George", "Gerald", "Gina", "Ginger", + "Glen", "Glenn", "Gloria", "Grace", "Greg", "Gregory", "Haley", + "Hannah", "Harley", "Harold", "Harry", "Heath", "Heather", "Heidi", + "Helen", "Herbert", "Holly", "Hope", "Howard", "Hunter", "Ian", + "Isaac", "Jack", "Jackie", "Jacob", "Jade", "Jake", "James", "Jamie", + "Jan", "Jane", "Janet", "Janice", "Jared", "Jasmine", "Jason", "Jay", + "Jean", "Jeannie", "Jeff", "Jeffery", "Jeffrey", "Jenna", "Jennifer", + "Jenny", "Jeremiah", "Jeremy", "Jerry", "Jesse", "Jessica", "Jessie", + "Jill", "Jim", "Jimmy", "Joann", "Joanne", "Jodi", "Jody", "Joe", + "Joel", "Joey", "John", "Johnathan", "Johnny", "Jon", "Jonathan", + "Jonathon", "Jordan", "Joseph", "Josh", "Joshua", "Joyce", "Juanita", + "Judy", "Julia", "Julie", "Justin", "Kaitlyn", "Karen", "Katelyn", + "Katherine", "Kathleen", "Kathryn", "Kathy", "Katie", "Katrina", + "Kay", "Kayla", "Kaylee", "Keith", "Kelly", "Kelsey", "Ken", + "Kendra", "Kenneth", "Kenny", "Kevin", "Kim", "Kimberly", "Kris", + "Krista", "Kristen", "Kristin", "Kristina", "Kristy", "Kyle", + "Kylie", "Lacey", "Laken", "Lance", "Larry", "Laura", "Lawrence", + "Leah", "Lee", "Leonard", "Leroy", "Leslie", "Levi", "Lewis", + "Linda", "Lindsay", "Lindsey", "Lisa", "Lloyd", "Logan", "Lois", + "Loretta", "Lori", "Louis", "Lynn", "Madison", "Mandy", "Marcus", + "Margaret", "Maria", "Mariah", "Marie", "Marilyn", "Marion", "Mark", + "Marlene", "Marsha", "Martha", "Martin", "Marty", "Marvin", "Mary", + "Mary ann", "Mason", "Matt", "Matthew", "Max", "Megan", "Melanie", + "Melinda", "Melissa", "Melody", "Michael", "Michelle", "Mickey", + "Mike", "Mindy", "Miranda", "Misty", "Mitchell", "Molly", "Monica", + "Morgan", "Mother", "Myron", "Nancy", "Natasha", "Nathan", + "Nicholas", "Nick", "Nicole", "Nina", "Noah", "Norma", "Norman", + "Olivia", "Paige", "Pam", "Pamela", "Pat", "Patricia", "Patrick", + "Patty", "Paul", "Paula", "Peggy", "Penny", "Pete", "Phillip", + "Phyllis", "Rachael", "Rachel", "Ralph", "Randall", "Randi", "Randy", + "Ray", "Raymond", "Rebecca", "Regina", "Renee", "Rex", "Rhonda", + "Richard", "Rick", "Ricky", "Rita", "Rob", "Robbie", "Robert", + "Roberta", "Robin", "Rochelle", "Rocky", "Rod", "Rodney", "Roger", + "Ron", "Ronald", "Ronda", "Ronnie", "Rose", "Roxanne", "Roy", "Russ", + "Russell", "Rusty", "Ruth", "Ryan", "Sabrina", "Sally", "Sam", + "Samantha", "Samuel", "Sandra", "Sandy", "Sara", "Sarah", "Savannah", + "Scott", "Sean", "Seth", "Shanda", "Shane", "Shanna", "Shannon", + "Sharon", "Shaun", "Shawn", "Shawna", "Sheila", "Shelly", "Sher", + "Sherri", "Sherry", "Shirley", "Sierra", "Skyler", "Stacey", "Stacy", + "Stanley", "Stephanie", "Stephen", "Steve", "Steven", "Sue", + "Summer", "Susan", "Sydney", "Tabatha", "Tabitha", "Tamara", "Tammy", + "Tara", "Tasha", "Tashia", "Taylor", "Ted", "Teresa", "Terri", + "Terry", "Tessa", "Thelma", "Theresa", "Thomas", "Tia", "Tiffany", + "Tim", "Timmy", "Timothy", "Tina", "Todd", "Tom", "Tommy", "Toni", + "Tony", "Tonya", "Tracey", "Tracie", "Tracy", "Travis", "Trent", + "Trevor", "Trey", "Trisha", "Tristan", "Troy", "Tyler", "Tyrone", + "Unborn", "Valerie", "Vanessa", "Vernon", "Veronica", "Vicki", + "Vickie", "Vicky", "Victor", "Victoria", "Vincent", "Virginia", + "Vivian", "Walter", "Wanda", "Wayne", "Wendy", "Wesley", "Whitney", + "William", "Willie", "Wyatt", "Zachary") + + private val lastNames = IndexedSeq("Abbott", "Acevedo", "Acosta", "Adams", + "Adkins", "Aguilar", "Aguirre", "Albert", "Alexander", "Alford", + "Allen", "Allison", "Alston", "Alvarado", "Alvarez", "Anderson", + "Andrews", "Anthony", "Armstrong", "Arnold", "Ashley", "Atkins", + "Atkinson", "Austin", "Avery", "Avila", "Ayala", "Ayers", "Bailey", + "Baird", "Baker", "Baldwin", "Ball", "Ballard", "Banks", "Barber", + "Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller", + "Wilson", "Moore", "Taylor", "Thomas", "Jackson", "Barker", "Barlow", + "Barnes", "Barnett", "Barr", "Barrera", "Barrett", "Barron", "Barry", + "Bartlett", "Barton", "Bass", "Bates", "Battle", "Bauer", "Baxter", + "Beach", "Bean", "Beard", "Beasley", "Beck", "Becker", "Bell", + "Bender", "Benjamin", "Bennett", "Benson", "Bentley", "Benton", + "Berg", "Berger", "Bernard", "Berry", "Best", "Bird", "Bishop", + "Black", "Blackburn", "Blackwell", "Blair", "Blake", "Blanchard", + "Blankenship", "Blevins", "Bolton", "Bond", "Bonner", "Booker", + "Boone", "Booth", "Bowen", "Bowers", "Bowman", "Boyd", "Boyer", + "Boyle", "Bradford", "Bradley", "Bradshaw", "Brady", "Branch", + "Bray", "Brennan", "Brewer", "Bridges", "Briggs", "Bright", "Britt", + "Brock", "Brooks", "Browning", "Bruce", "Bryan", "Bryant", + "Buchanan", "Buck", "Buckley", "Buckner", "Bullock", "Burch", + "Burgess", "Burke", "Burks", "Burnett", "Burns", "Burris", "Burt", + "Burton", "Bush", "Butler", "Byers", "Byrd", "Cabrera", "Cain", + "Calderon", "Caldwell", "Calhoun", "Callahan", "Camacho", "Cameron", + "Campbell", "Campos", "Cannon", "Cantrell", "Cantu", "Cardenas", + "Carey", "Carlson", "Carney", "Carpenter", "Carr", "Carrillo", + "Carroll", "Carson", "Carter", "Carver", "Case", "Casey", "Cash", + "Castaneda", "Castillo", "Castro", "Cervantes", "Chambers", "Chan", + "Chandler", "Chaney", "Chang", "Chapman", "Charles", "Chase", + "Chavez", "Chen", "Cherry", "Christensen", "Christian", "Church", + "Clark", "Clarke", "Clay", "Clayton", "Clements", "Clemons", + "Cleveland", "Cline", "Cobb", "Cochran", "Coffey", "Cohen", "Cole", + "Coleman", "Collier", "Collins", "Colon", "Combs", "Compton", + "Conley", "Conner", "Conrad", "Contreras", "Conway", "Cook", "Cooke", + "Cooley", "Cooper", "Copeland", "Cortez", "Cote", "Cotton", "Cox", + "Craft", "Craig", "Crane", "Crawford", "Crosby", "Cross", "Cruz", + "Cummings", "Cunningham", "Curry", "Curtis", "Dale", "Dalton", + "Daniel", "Daniels", "Daugherty", "Davenport", "David", "Davidson", + "Dawson", "Day", "Dean", "Decker", "Dejesus", "Delacruz", "Delaney", + "Deleon", "Delgado", "Dennis", "Diaz", "Dickerson", "Dickinson", + "Dillard", "Dillon", "Dixon", "Dodson", "Dominguez", "Donaldson", + "Donovan", "Dorsey", "Dotson", "Douglas", "Downs", "Doyle", "Drake", + "Dudley", "Duffy", "Duke", "Duncan", "Dunlap", "Dunn", "Duran", + "Durham", "Dyer", "Eaton", "Edwards", "Elliott", "Ellis", "Ellison", + "Emerson", "England", "English", "Erickson", "Espinoza", "Estes", + "Estrada", "Evans", "Everett", "Ewing", "Farley", "Farmer", + "Farrell", "Faulkner", "Ferguson", "Fernandez", "Ferrell", "Fields", + "Figueroa", "Finch", "Finley", "Fischer", "Fisher", "Fitzgerald", + "Fitzpatrick", "Fleming", "Fletcher", "Flores", "Flowers", "Floyd", + "Flynn", "Foley", "Forbes", "Ford", "Foreman", "Foster", "Fowler", + "Fox", "Francis", "Franco", "Frank", "Franklin", "Franks", "Frazier", + "Frederick", "Freeman", "French", "Frost", "Fry", "Frye", "Fuentes", + "Fuller", "Fulton", "Gaines", "Gallagher", "Gallegos", "Galloway", + "Gamble", "Garcia", "Gardner", "Garner", "Garrett", "Garrison", + "Garza", "Gates", "Gay", "Gentry", "George", "Gibbs", "Gibson", + "Gilbert", "Giles", "Gill", "Gillespie", "Gilliam", "Gilmore", + "Glass", "Glenn", "Glover", "Goff", "Golden", "Gomez", "Gonzales", + "Gonzalez", "Good", "Goodman", "Goodwin", "Gordon", "Gould", + "Graham", "Grant", "Graves", "Gray", "Green", "Greene", "Greer", + "Gregory", "Griffin", "Griffith", "Grimes", "Gross", "Guerra", + "Guerrero", "Guthrie", "Gutierrez", "Guy", "Guzman", "Hahn", "Hale", + "Haley", "Hall", "Hamilton", "Hammond", "Hampton", "Hancock", + "Haney", "Hansen", "Hanson", "Hardin", "Harding", "Hardy", "Harmon", + "Harper", "Harris", "Harrington", "Harrison", "Hart", "Hartman", + "Harvey", "Hatfield", "Hawkins", "Hayden", "Hayes", "Haynes", "Hays", + "Head", "Heath", "Hebert", "Henderson", "Hendricks", "Hendrix", + "Henry", "Hensley", "Henson", "Herman", "Hernandez", "Herrera", + "Herring", "Hess", "Hester", "Hewitt", "Hickman", "Hicks", "Higgins", + "Hill", "Hines", "Hinton", "Hobbs", "Hodge", "Hodges", "Hoffman", + "Hogan", "Holcomb", "Holden", "Holder", "Holland", "Holloway", + "Holman", "Holmes", "Holt", "Hood", "Hooper", "Hoover", "Hopkins", + "Hopper", "Horn", "Horne", "Horton", "House", "Houston", "Howard", + "Howe", "Howell", "Hubbard", "Huber", "Hudson", "Huff", "Huffman", + "Hughes", "Hull", "Humphrey", "Hunt", "Hunter", "Hurley", "Hurst", + "Hutchinson", "Hyde", "Ingram", "Irwin", "Jacobs", "Jacobson", + "James", "Jarvis", "Jefferson", "Jenkins", "Jennings", "Jensen", + "Jimenez", "Johns", "Johnston", "Jordan", "Joseph", "Joyce", + "Joyner", "Juarez", "Justice", "Kane", "Kaufman", "Keith", "Keller", + "Kelley", "Kelly", "Kemp", "Kennedy", "Kent", "Kerr", "Key", "Kidd", + "Kim", "King", "Kinney", "Kirby", "Kirk", "Kirkland", "Klein", + "Kline", "Knapp", "Knight", "Knowles", "Knox", "Koch", "Kramer", + "Lamb", "Lambert", "Lancaster", "Landry", "Lane", "Lang", "Langley", + "Lara", "Larsen", "Larson", "Lawrence", "Lawson", "Le", "Leach", + "Leblanc", "Lee", "Leon", "Leonard", "Lester", "Levine", "Levy", + "Lewis", "Lindsay", "Lindsey", "Little", "Livingston", "Lloyd", + "Logan", "Long", "Lopez", "Lott", "Love", "Lowe", "Lowery", "Lucas", + "Luna", "Lynch", "Lynn", "Lyons", "Macdonald", "Macias", "Mack", + "Madden", "Maddox", "Maldonado", "Malone", "Mann", "Manning", + "Marks", "Marquez", "Marsh", "Marshall", "Martin", "Martinez", + "Mason", "Massey", "Mathews", "Mathis", "Matthews", "Maxwell", "May", + "Mayer", "Maynard", "Mayo", "Mays", "McBride", "McCall", "McCarthy", + "McCarty", "McClain", "McClure", "McConnell", "McCormick", "McCoy", + "McCray", "McCullough", "McDaniel", "McDonald", "McDowell", + "McFadden", "McFarland", "McGee", "McGowan", "McGuire", "McIntosh", + "McIntyre", "McKay", "McKee", "McKenzie", "McKinney", "McKnight", + "McLaughlin", "McLean", "McLeod", "McMahon", "McMillan", "McNeil", + "McPherson", "Meadows", "Medina", "Mejia", "Melendez", "Melton", + "Mendez", "Mendoza", "Mercado", "Mercer", "Merrill", "Merritt", + "Meyer", "Meyers", "Michael", "Middleton", "Miles", "Mills", + "Miranda", "Mitchell", "Molina", "Monroe", "Montgomery", "Montoya", + "Moody", "Moon", "Mooney", "Morales", "Moran", "Moreno", "Morgan", + "Morin", "Morris", "Morrison", "Morrow", "Morse", "Morton", "Moses", + "Mosley", "Moss", "Mueller", "Mullen", "Mullins", "Munoz", "Murphy", + "Murray", "Myers", "Nash", "Navarro", "Neal", "Nelson", "Newman", + "Newton", "Nguyen", "Nichols", "Nicholson", "Nielsen", "Nieves", + "Nixon", "Noble", "Noel", "Nolan", "Norman", "Norris", "Norton", + "Nunez", "Obrien", "Ochoa", "Oconnor", "Odom", "Odonnell", "Oliver", + "Olsen", "Olson", "O'neal", "O'neil", "O'neill", "Orr", "Ortega", + "Ortiz", "Osborn", "Osborne", "Owen", "Owens", "Pace", "Pacheco", + "Padilla", "Page", "Palmer", "Park", "Parker", "Parks", "Parrish", + "Parsons", "Pate", "Patel", "Patrick", "Patterson", "Patton", "Paul", + "Payne", "Pearson", "Peck", "Pena", "Pennington", "Perez", "Perkins", + "Perry", "Peters", "Petersen", "Peterson", "Petty", "Phelps", + "Phillips", "Pickett", "Pierce", "Pittman", "Pitts", "Pollard", + "Poole", "Pope", "Porter", "Potter", "Potts", "Powell", "Powers", + "Pratt", "Preston", "Price", "Prince", "Pruitt", "Puckett", "Pugh", + "Quinn", "Ramirez", "Ramos", "Ramsey", "Randall", "Randolph", + "Rasmussen", "Ratliff", "Ray", "Raymond", "Reed", "Reese", "Reeves", + "Reid", "Reilly", "Reyes", "Reynolds", "Rhodes", "Rice", "Rich", + "Richard", "Richards", "Richardson", "Richmond", "Riddle", "Riggs", + "Riley", "Rios", "Rivas", "Rivera", "Rivers", "Roach", "Robbins", + "Roberson", "Roberts", "Robertson", "Robinson", "Robles", "Rocha", + "Rodgers", "Rodriguez", "Rodriquez", "Rogers", "Rojas", "Rollins", + "Roman", "Romero", "Rosa", "Rosales", "Rosario", "Rose", "Ross", + "Roth", "Rowe", "Rowland", "Roy", "Ruiz", "Rush", "Russell", "Russo", + "Rutledge", "Ryan", "Salas", "Salazar", "Salinas", "Sampson", + "Sanchez", "Sanders", "Sandoval", "Sanford", "Santana", "Santiago", + "Santos", "Sargent", "Saunders", "Savage", "Sawyer", "Schmidt", + "Schneider", "Schroeder", "Schultz", "Schwartz", "Scott", "Sears", + "Sellers", "Serrano", "Sexton", "Shaffer", "Shannon", "Sharp", + "Sharpe", "Shaw", "Shelton", "Shepard", "Shepherd", "Sheppard", + "Sherman", "Shields", "Short", "Silva", "Simmons", "Simon", + "Simpson", "Sims", "Singleton", "Skinner", "Slater", "Sloan", + "Small", "Snider", "Snow", "Snyder", "Solis", "Solomon", "Sosa", + "Soto", "Sparks", "Spears", "Spence", "Spencer", "Stafford", + "Stanley", "Stanton", "Stark", "Steele", "Stein", "Stephens", + "Stephenson", "Stevens", "Stevenson", "Stewart", "Stokes", "Stone", + "Stout", "Strickland", "Strong", "Stuart", "Suarez", "Sullivan", + "Summers", "Sutton", "Swanson", "Sweeney", "Sweet", "Sykes", + "Talley", "Tanner", "Tate", "Terrell", "Terry", "Thompson", + "Thornton", "Tillman", "Todd", "Torres", "Townsend", "Tran", + "Travis", "Trevino", "Trujillo", "Tucker", "Turner", "Tyler", + "Tyson", "Underwood", "Valdez", "Valencia", "Valentine", + "Valenzuela", "Vance", "Vang", "Vargas", "Vasquez", "Vaughan", + "Vaughn", "Vazquez", "Vega", "Velasquez", "Velazquez", "Velez", + "Van halen", "Vincent", "Vinson", "Wade", "Wagner", "Walker", "Wall", + "Wallace", "Waller", "Walls", "Walsh", "Walter", "Walters", "Walton", + "Ward", "Ware", "Warner", "Warren", "Washington", "Waters", + "Watkins", "Watson", "Watts", "Weaver", "Webb", "Weber", "Webster", + "Weeks", "Weiss", "Welch", "Wells", "West", "Wheeler", "Whitaker", + "White", "Whitehead", "Whitfield", "Whitley", "Whitney", "Wiggins", + "Wilcox", "Wilder", "Wiley", "Wilkerson", "Wilkins", "Wilkinson", + "William", "Williamson", "Willis", "Winters", "Wise", "Witt", "Wolf", + "Wolfe", "Wong", "Wood", "Woodard", "Woods", "Woodward", "Wooten", + "Workman", "Wright", "Wyatt", "Wynn", "Yang", "Yates", "York", + "Young", "Zamora", "Zimmerman") +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala new file mode 100644 index 0000000..9e70cca --- /dev/null +++ b/bigtop-bigpetstore/src/main/scala/org/apache/bigtop/bigpetstore/generator/TransactionIteratorFactory.scala @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.bigtop.bigpetstore.generator; + +import java.util.Date +import org.apache.bigtop.bigpetstore.generator.util.State +import org.apache.commons.lang3.StringUtils +import java.util.Arrays.asList +import java.util.Random +import scala.collection.Iterator +import com.sun.org.apache.xml.internal.serializer.ToStream +import java.util.{Iterator => JavaIterator} +import scala.collection.JavaConversions.asJavaIterator +import org.apache.bigtop.bigpetstore.generator.util.Product +import org.apache.commons.lang3.Range; +import org.apache.bigtop.bigpetstore.generator.util.ProductType + +/** + * This class generates our data. Over time we will use it to embed bias which + * can then be teased out, i.e. by clustering/classifiers. For example: + * + * certain products <--> certain years or days + */ +class TransactionIteratorFactory(private val records: Int, + private val customerIdRange: Range[java.lang.Long], + private val state: State) { + assert(records > 0, "Number of records must be greater than 0 to generate a data iterator!") + private val random = new Random(state.hashCode) + + def data: JavaIterator[TransactionIteratorFactory.KeyVal[String, String]] = { + new TransactionIteratorFactory.DataIterator(records, customerIdRange, state, random) + } +} + +object TransactionIteratorFactory { + class KeyVal[K, V](val key: K, val value: V) + + private class DataIterator(records: Int, + customerIdRange: Range[java.lang.Long], + state: State, + r: Random) extends Iterator[KeyVal[String, String]] { + private var firstName: String = null + private var lastName: String = null + private var elementsProcducedCount = 0 + private var repeatCount = 0 + private var currentCustomerId = customerIdRange.getMinimum + private var currentProductType = selectRandomProductType; + + def hasNext = + elementsProcducedCount < records && currentCustomerId <= customerIdRange.getMaximum + + + def next(): TransactionIteratorFactory.KeyVal[String,String] = { + val date = DataForger.randomDateInPastYears(50); + setIteratorState(); + + val product = randomProductOfCurrentlySelectedType + val key = StringUtils.join(asList("BigPetStore", "storeCode_" + state.name(), + elementsProcducedCount.toString), ",") + val value = StringUtils.join(asList(currentCustomerId, firstName, lastName, product.id, + product.name.toLowerCase, product.price, date), ",") + + elementsProcducedCount += 1 + new TransactionIteratorFactory.KeyVal(key, value) + } + + private def setIteratorState() = { + /** Some customers come back for more :) We repeat a customer up to ten times */ + if (repeatCount > 0) { + repeatCount -= 1 + } else { + firstName = DataForger.firstName(r) + lastName = DataForger.lastName(r) + // this sometimes generates numbers much larger than 10. We don't really need Gaussian + // distribution since number of transactions per customer can be truly arbitrary. + repeatCount = (r.nextGaussian * 4f) toInt; + println("####Repeat: " + repeatCount) + currentCustomerId += 1 + currentProductType = selectRandomProductType; + } + } + + private def selectRandomProductType = { + ProductType.values.apply(r.nextInt(ProductType.values.length)) + } + + private def randomProductOfCurrentlySelectedType = { + currentProductType.getProducts.get(r.nextInt(currentProductType.getProducts.size)) + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java index 52b8079..e2f1f25 100644 --- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java +++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestNumericalIdUtils.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,7 +17,7 @@ package org.apache.bigtop.bigpetstore.generator; import static org.junit.Assert.assertFalse; -import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE; +import org.apache.bigtop.bigpetstore.generator.util.State; import org.apache.bigtop.bigpetstore.util.NumericalIdUtils; import org.junit.Test; @@ -25,9 +25,9 @@ public class TestNumericalIdUtils { @Test public void testName() { - String strId= STATE.OK.name()+"_"+ "jay vyas"; + String strId= State.OK.name()+"_"+ "jay vyas"; long id = NumericalIdUtils.toId(strId); - String strId2= STATE.CO.name()+"_"+ "jay vyas"; + String strId2= State.CO.name()+"_"+ "jay vyas"; long id2 = NumericalIdUtils.toId(strId2); System.out.println(id + " " + id2); assertFalse(id==id2); http://git-wip-us.apache.org/repos/asf/bigtop/blob/4fca4573/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java ---------------------------------------------------------------------- diff --git a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java index d68e36c..76de3d0 100755 --- a/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java +++ b/bigtop-bigpetstore/src/test/java/org/apache/bigtop/bigpetstore/generator/TestPetStoreTransactionGeneratorJob.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,7 @@ import java.io.InputStreamReader; import java.util.Date; import org.apache.bigtop.bigpetstore.generator.BPSGenerator.props; -import org.apache.bigtop.bigpetstore.generator.TransactionIteratorFactory.STATE; +import org.apache.bigtop.bigpetstore.generator.util.State; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -61,7 +61,7 @@ public class TestPetStoreTransactionGeneratorJob { * Run the job */ Path output = new Path("petstoredata/" + (new Date()).toString()); - Job createInput = BPSGenerator.createJob(output, c); + Job createInput = BPSGenerator.getCreateTransactionRecordsJob(output, c); createInput.submit(); System.out.println(createInput); createInput.waitForCompletion(true); @@ -83,10 +83,10 @@ public class TestPetStoreTransactionGeneratorJob { s = br.readLine(); System.out.println("===>" + s); recordsSeen++; - if (s.contains(STATE.CT.name())) { + if (s.contains(State.CT.name())) { CTseen = true; } - if (s.contains(STATE.AZ.name())) { + if (s.contains(State.AZ.name())) { AZseen = true; } }