db-derby-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bpendle...@apache.org
Subject svn commit: r814237 - in /db/derby/code/trunk/java/testing/org/apache/derbyTesting: functionTests/tests/lang/wisconsin.java perf/clients/GroupByClient.java perf/clients/Runner.java perf/clients/WisconsinFiller.java
Date Sat, 12 Sep 2009 21:07:47 GMT
Author: bpendleton
Date: Sat Sep 12 21:07:46 2009
New Revision: 814237

URL: http://svn.apache.org/viewvc?rev=814237&view=rev
Log:
DERBY-4363: Add simple benchmark for measuring GROUP BY performance.

This change introduces a simple benchmark which can be used to measure
GROUP BY statement performance. As an example of how to run the benchmark:

   java org.apache.derbyTesting.perf.clients.Runner -load group_by -init
   -load_opts numGroupingCols=2,numGroups1=10,numGroups2=100
   
(all on one long line). 

Over time, I anticipate that we will add other capabilities to this test to
measure other aspects of GROUP BY performance, but the benchmark seems
sufficiently functional at this point to be worth checking in.


Added:
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java
  (with props)
Modified:
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/wisconsin.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/Runner.java
    db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/WisconsinFiller.java

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/wisconsin.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/wisconsin.java?rev=814237&r1=814236&r2=814237&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/wisconsin.java
(original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/functionTests/tests/lang/wisconsin.java
Sat Sep 12 21:07:46 2009
@@ -64,6 +64,10 @@
 	
 	public static void createTables(Connection conn, boolean compress)
 			throws SQLException {
+                createTables(conn, compress, 10000);
+        }
+	public static void createTables(Connection conn, boolean compress, int numRows)
+			throws SQLException {
 
 		Statement stmt = conn.createStatement();
 		
@@ -83,9 +87,9 @@
 											 "stringu1 char(52) not null, " +
 											 "stringu2 char(52) not null, " +
 											 "string4 char(52) )");
-		//--insert 10000 rows into TENKTUP1
+		//--insert numRows rows into TENKTUP1
 		WISCInsert wi = new WISCInsert();
-		wi.doWISCInsert(10000, "TENKTUP1", conn);
+		wi.doWISCInsert(numRows, "TENKTUP1", conn);
 		
 		stmt.execute("create unique index TK1UNIQUE1 on TENKTUP1(unique1)");
 		stmt.execute("create unique index TK1UNIQUE2 on TENKTUP1(unique2)");
@@ -117,9 +121,9 @@
 											"stringu1 char(52), " +
 											"stringu2 char(52), " +
 											"string4 char(52) )");
-		//-- insert 10000 rows into TENKTUP2
+		//-- insert numRows rows into TENKTUP2
 		wi = new WISCInsert();
-		wi.doWISCInsert(10000, "TENKTUP2", conn);
+		wi.doWISCInsert(numRows, "TENKTUP2", conn);
 		
 		stmt.execute("create unique index TK2UNIQUE1 on TENKTUP2(unique1)");
 		stmt.execute("create unique index TK2UNIQUE2 on TENKTUP2(unique2)");

Added: db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java?rev=814237&view=auto
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java
(added)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java
Sat Sep 12 21:07:46 2009
@@ -0,0 +1,217 @@
+/*
+
+Derby - Class org.apache.derbyTesting.perf.clients.GroupByClient
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+*/
+
+package org.apache.derbyTesting.perf.clients;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.Statement;
+import java.sql.SQLException;
+import java.sql.ResultSet;
+import java.sql.DatabaseMetaData;
+
+/**
+ * Test client which performs iterated GROUP BY statements on the
+ * {@code ONEKTUP} tables generated by {@code WisconsinFiller}.
+ *
+ * Based on the parameters specified when run, we perform a particular
+ * GROUP BY statement, and fetch and check the number of rows returned,
+ * as part of a performance run controlled by perf.clients.Runner.
+ *
+ * For example, you could cause this benchmark's GROUP BY to be:
+ * - one which returns 10 groups, with 1000 rows in each group, or
+ * - one which returns 100 groups, with 100 rows in each group, or
+ * - one which returns 1000 groups, with 10 rows in each group, etc.
+ * With correspondingly larger numbers of groups as the scale factor grows.
+ *
+ * You can use more rows by passing '-load_opts numRows=100000', e.g. Note that
+ * this only has an effect when you run -init. Note that changing the number
+ * of rows in the table also changes the expected size of each group; we issue
+ * a select count(*) query at the start to figure out the expected group size,
+ * but this hack only works with table sizes that are multiples of 1000.
+ *
+ * If you use a substantially larger number of rows (say, 100000 or more), you
+ * should specify '-rt 300' or higher so that a valid number of executions
+ * can occur, as the benchmark starts to slow down dramatically with large
+ * numbers of rows.
+ *
+ * To prepare the database for this little benchmark:
+ *  java org.apache.derbyTesting.perf.clients.Runner -init -load group_by
+ *       -load_opts numRows=NNNNNN (if you want more than 10,000 rows in DB)
+ * (this will also run the default GROUP BY, which is GROUP BY TEN)
+ *
+ * On subsequent runs you can skip the '-init', and should instead specify
+ * a particular GROUP BY to run, which you do by specifying:
+ * - the number of GROUP_BY columns (-load_opts numGroupingCols=N), and
+ * - the number of groups for each column (-load_opts numGroupsK=NNNN)
+ *   (NOTE: we count from 1, not from 0, with these parameters!)
+ *
+ * I've tried this benchmark up to 5 grouping columns, which seemed like
+ * plenty for the benchmarking I wanted to do. The code supports more, but
+ * I'm not sure if it works or not.
+ *
+ * For example, this runs a 2-column group by:
+ *
+ * -load group_by -load_opts numGroupingCols=2,numGroups1=10,numGroups2=100
+ *
+ * The resulting SQL will be:
+ *
+ *  SELECT TEN, ONEPERCENT, COUNT(*) FROM TENKTUP1 GROUP BY TEN,ONEPERCENT
+ *
+ * Note that due to the way that the data in the TEN and ONEPERCENT columns
+ * are loaded, they are not independent, so this actually produces 100 groups.
+ *
+ * If numGroupingCols == 1, and thus the code can predict the number of
+ * rows that ought to be in each group, and the total number of groups, then
+ * it checks those values in the result as well.
+ */
+public class GroupByClient implements Client
+{
+    private static final int  MAX_GROUPING_COLS = 25;
+
+    private Connection        conn;
+    private PreparedStatement ps;
+    private int               numGroupingCols;
+    //                        Note that we count from 1. Index 0 is unused.
+    private int               numGroups[] = new int[MAX_GROUPING_COLS+1];
+    private String            groupingExpr[] = new String[MAX_GROUPING_COLS+1];
+    private int               totalExpectedGroups;
+    private int               tableSize = 0;
+    private String            sql;
+
+    public GroupByClient()
+    {
+        numGroupingCols = Runner.getLoadOpt("numGroupingCols", -1);
+        if (numGroupingCols > 0)
+        {
+            totalExpectedGroups = 1;
+            for (int i = 1; i < numGroups.length; i++)
+            {
+                numGroups[i] = Runner.getLoadOpt("numGroups"+i, 1);
+                totalExpectedGroups *= numGroups[i];
+                getGroupingExpr(i);
+            }
+        }
+        else
+        {
+            // default statement is 1 grouping column with 10 groups:
+            numGroupingCols = 1;
+            numGroups[1] = 10;
+            totalExpectedGroups = 10;
+            getGroupingExpr(1);
+        }
+        sql = buildStatement();
+    }
+
+    public void init(Connection c)
+        throws SQLException
+    {
+        conn = c;
+        //dumpTables();
+        getTableSize();
+        System.out.println("We'll run '"+sql+"'");
+        if (numGroupingCols == 1)
+           System.out.println("... which should produce " +
+                totalExpectedGroups + " total groups, each containing " +
+                (tableSize / totalExpectedGroups) + " rows.");
+
+        ps = c.prepareStatement( sql );
+        c.setAutoCommit(false);
+    }
+    private void getGroupingExpr(int i)
+    {
+        if (numGroups[i] == 1)
+            groupingExpr[i] = null;
+        else if (numGroups[i] == 10)
+            groupingExpr[i] = "TEN";
+        else if (numGroups[i] == 100)
+            groupingExpr[i] = "ONEPERCENT";
+        else
+            groupingExpr[i] = "MOD(UNIQUE1,"+numGroups[i]+")";
+    }
+    private String buildStatement()
+    {
+        StringBuffer buf = new StringBuffer();
+        buf.append("SELECT ");
+        appendGroups(buf);
+        buf.append( ", COUNT(*) FROM TENKTUP1 GROUP BY ");
+        appendGroups(buf);
+        return buf.toString();
+    }
+    private void appendGroups(StringBuffer buf)
+    {
+        for (int i = 1; i < numGroups.length && numGroups[i] > 1; i++)
+        {
+            if (i > 1)
+                buf.append(",");
+            buf.append(groupingExpr[i]);
+        }
+    }
+    private void dumpTables()
+        throws SQLException
+    {
+        ResultSet rs = conn.getMetaData().getTables(null,null,"%",null);
+        while (rs.next()) {
+            System.out.println("Schem=" + rs.getString("TABLE_SCHEM") +
+                    " name=" + rs.getString("TABLE_NAME"));
+        }
+        rs.close();
+        conn.commit();
+    }
+    private void getTableSize()
+        throws SQLException
+    {
+        // Since the TENKTUP1 table can have more or less rows, depending
+        // on the value of the -load_opts numRows=N parameter, we run a
+        // count query here to figure out how many rows there actually are.
+
+        Statement s = conn.createStatement();
+        ResultSet rs = s.executeQuery("select count(*) from tenktup1");
+        if (!rs.next())
+            throw new RuntimeException("Unable to find size of tenktup1");
+        tableSize = rs.getInt(1);
+        rs.close();
+        s.close();
+        conn.commit();
+    }
+
+    public void doWork()
+        throws SQLException
+    {
+        ResultSet rs = ps.executeQuery();
+        int expectedCount = tableSize / totalExpectedGroups;
+        int numRows = 0;
+        while (rs.next()) {
+            numRows++;
+            int groupName = rs.getInt(1);
+            int theCount = rs.getInt(2);
+            if (numGroupingCols == 1 && theCount != expectedCount)
+                System.out.println("group="+groupName+", count="+theCount+
+                        ",expectedCount="+expectedCount);
+        }
+        if (numGroupingCols == 1 && numRows != totalExpectedGroups)
+            System.out.println("Total rows from GROUP BY was " + numRows +
+                    ", expected total rows to be " + totalExpectedGroups);
+        rs.close();
+        conn.commit();
+    }
+
+}

Propchange: db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/GroupByClient.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/Runner.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/Runner.java?rev=814237&r1=814236&r2=814237&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/Runner.java (original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/Runner.java Sat
Sep 12 21:07:46 2009
@@ -200,7 +200,7 @@
      * @return the value of the option
      * @throws NumberFormatException if the value is not an {@code int}
      */
-    private static int getLoadOpt(String option, int defaultValue) {
+    static int getLoadOpt(String option, int defaultValue) {
         String val = (String) loadOpts.get(option);
         return val == null ? defaultValue : Integer.parseInt(val);
     }
@@ -235,6 +235,7 @@
 "      * sr_update_multi - single-record update on a random table\n" +
 "                    (32 tables with a single row each)\n" +
 "      * index_join - join of two tables (using indexed columns)\n" +
+"      * group_by - GROUP BY queries against TENKTUP1\n" +
 "      * bank_tx - emulate simple bank transactions, similar to TPC-B. The\n" +
 "                  following load-specific options are accepted:\n" +
 "            - branches=NN: specifies the number of branches in the db\n" +
@@ -303,6 +304,8 @@
             return new SingleRecordFiller(1, 32);
         } else if (load.equals("index_join")) {
             return new WisconsinFiller();
+        } else if (load.equals("group_by")) {
+            return new WisconsinFiller(getLoadOpt("numRows", 10000));
         } else if (load.equals("bank_tx")) {
             return new BankAccountFiller(
                 getLoadOpt("branches", 1),
@@ -337,6 +340,8 @@
             return new SingleRecordUpdateClient(1, 32);
         } else if (load.equals("index_join")) {
             return new IndexJoinClient();
+        } else if (load.equals("group_by")) {
+            return new GroupByClient();
         } else if (load.equals("bank_tx")) {
             return new BankTransactionClient(
                 getLoadOpt("branches", 1),

Modified: db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/WisconsinFiller.java
URL: http://svn.apache.org/viewvc/db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/WisconsinFiller.java?rev=814237&r1=814236&r2=814237&view=diff
==============================================================================
--- db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/WisconsinFiller.java
(original)
+++ db/derby/code/trunk/java/testing/org/apache/derbyTesting/perf/clients/WisconsinFiller.java
Sat Sep 12 21:07:46 2009
@@ -34,6 +34,16 @@
  */
 public class WisconsinFiller implements DBFiller {
 
+    int numRows;
+    public WisconsinFiller(int nRows)
+    {
+        super();
+        numRows = nRows;
+    }
+    public WisconsinFiller()
+    {
+        this(10000);
+    }
     public void fill(Connection c) throws SQLException {
         c.setAutoCommit(false);
 
@@ -42,7 +52,7 @@
         dropTable(c, "ONEKTUP");
         dropTable(c, "BPRIME");
 
-        wisconsin.createTables(c, false);
+        wisconsin.createTables(c, false, numRows);
 
         c.commit();
     }



Mime
View raw message