From dev-return-56324-apmail-phoenix-dev-archive=phoenix.apache.org@phoenix.apache.org Fri May 3 01:24:04 2019 Return-Path: X-Original-To: apmail-phoenix-dev-archive@minotaur.apache.org Delivered-To: apmail-phoenix-dev-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by minotaur.apache.org (Postfix) with SMTP id 051EB19005 for ; Fri, 3 May 2019 01:24:03 +0000 (UTC) Received: (qmail 41291 invoked by uid 500); 3 May 2019 01:24:03 -0000 Delivered-To: apmail-phoenix-dev-archive@phoenix.apache.org Received: (qmail 41254 invoked by uid 500); 3 May 2019 01:24:02 -0000 Mailing-List: contact dev-help@phoenix.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@phoenix.apache.org Delivered-To: mailing list dev@phoenix.apache.org Received: (qmail 41242 invoked by uid 99); 3 May 2019 01:24:02 -0000 Received: from mailrelay1-us-west.apache.org (HELO mailrelay1-us-west.apache.org) (209.188.14.139) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 03 May 2019 01:24:02 +0000 Received: from jira-lw-us.apache.org (unknown [207.244.88.139]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 0A81BE2B88 for ; Fri, 3 May 2019 01:24:02 +0000 (UTC) Received: from jira-lw-us.apache.org (localhost [127.0.0.1]) by jira-lw-us.apache.org (ASF Mail Server at jira-lw-us.apache.org) with ESMTP id E18E925815 for ; Fri, 3 May 2019 01:24:00 +0000 (UTC) Date: Fri, 3 May 2019 01:24:00 +0000 (UTC) From: "Thomas D'Silva (JIRA)" To: dev@phoenix.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Updated] (PHOENIX-5265) Phoenix Test should use gold files for result comparison instead of using hard-corded comparison. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 [ https://issues.apache.org/jira/browse/PHOENIX-5265?page=3Dcom.atlass= ian.jira.plugin.system.issuetabpanels:all-tabpanel ] Thomas D'Silva updated PHOENIX-5265: ------------------------------------ Labels: hardening (was: ) > Phoenix Test should use gold files for result comparison instead of using= hard-corded comparison. > -------------------------------------------------------------------------= ------------------------ > > Key: PHOENIX-5265 > URL: https://issues.apache.org/jira/browse/PHOENIX-5265 > Project: Phoenix > Issue Type: Improvement > Environment: {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows =3D 20; > Connection conn =3D getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, = c.v INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + " VAL= UES(?,?, ?, ?, ?)"); > byte[] val =3D new byte[250]; > for (int i =3D 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toS= tring(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, = c.v, d.v) VALUES(?,?,?)"); > for (int i =3D 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Byt= es.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List keyRanges =3D getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12= 530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transact= ionProvider)) ? "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTab= leName, > actualExplainPlan); > ConnectionQueryServices services =3D conn.unwrap(PhoenixConnection.cl= ass).getQueryServices(); > List regions =3D services.getAllTableRegions(Bytes.t= oBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges =3D getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme =3D !mutable && columnEncode= d; > boolean hasShadowCells =3D TransactionFactory.Provider.OMID.name().eq= uals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ?= 24 : 13, keyRanges.size()); > rs =3D conn > .createStatement() > .executeQuery( > "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(= GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME =3D '= " > + physicalTableName + "' GROUP BY COLUMN_FAMI= LY ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowC= ells ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ?= 23 : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)= ); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCel= ls ? 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? = 10 : 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName +=20 > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=3D0")= ; > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs =3D conn.createStatement().executeQuery("SELECT count(1) FROM " + = PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +=20 > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "=3D'" + = physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS= NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physic= alTableName, actualExplainPlan); > } > {code} > Reporter: Bin Shi > Priority: Major > Labels: hardening > > Currently, in Phoenix Test, the comparison of the returned result and the= expected result is always hard-coded, which is especially widely used by E= 2E and Integration test for comparing the query result, including the resul= t of EXPLAIN query plan. This has significantly=C2=A0impaired the productiv= ity and efficiency of workin on Phoenix. > The right approach is, each test case should write the generated result t= o a file under a new directory, then compares this file with a gold file un= der "gold" directory. After we make some code changes and make sure the cha= nge is correct by manually verifying several specific test cases, we can sa= fely replace the whole "gold" directory with the directory which contains a= ll new generated test files during test running. In this way, we can easily= rebase all the tests. > For example, in BaseStatsCollectorIT.java, we verify the estimated row si= ze in the returned result of EXPLAIN query plan, the row size is decided by= many factors, like the column is encoded or not, it is mutable or not, it = uses transaction provider or not, it uses=C2=A0TEPHRA or OMID as transactio= n provider, etc. The code snippet "testWithMultiCF" shows a typical test ca= se. The comparisons of the result result and the expected result are hard-c= oded in those asserts. Now imagine, if we change the way collecting stats o= r we change column encoding scheme or we change the cell storage format for= TEPHRA or OMID, which is very likely to happen, then we need to manually c= hange all those hard-coded comparison everywhere, and it isn't trivial to r= e-calculate=C2=A0all expected row sizes with the different conditions . Tod= ay you might need one or two weeks to rebase all the tests, which is a huge= waste. We should use "gold" files here, so that we can rebase the test ver= y easily. > =C2=A0BTW, the new generated test result files and gold files should be i= n XML or JSON.=C2=A0The result of "EXPLAN" query should be in XML or JSON t= oo, because=C2=A0the file format matches the structure of a query plan. > {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows =3D 20; > Connection conn =3D getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, = c.v INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + " VAL= UES(?,?, ?, ?, ?)"); > byte[] val =3D new byte[250]; > for (int i =3D 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toS= tring(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, = c.v, d.v) VALUES(?,?,?)"); > for (int i =3D 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Byt= es.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List keyRanges =3D getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12= 530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transact= ionProvider)) ? "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTab= leName, > actualExplainPlan); > ConnectionQueryServices services =3D conn.unwrap(PhoenixConnection.cl= ass).getQueryServices(); > List regions =3D services.getAllTableRegions(Bytes.t= oBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges =3D getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme =3D !mutable && columnEncode= d; > boolean hasShadowCells =3D TransactionFactory.Provider.OMID.name().eq= uals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ?= 24 : 13, keyRanges.size()); > rs =3D conn > .createStatement() > .executeQuery( > "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(= GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME =3D '= " > + physicalTableName + "' GROUP BY COLUMN_FAMI= LY ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowC= ells ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ?= 23 : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)= ); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCel= ls ? 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? = 10 : 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName +=20 > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=3D0")= ; > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs =3D conn.createStatement().executeQuery("SELECT count(1) FROM " + = PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +=20 > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "=3D'" + = physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS= NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physic= alTableName, actualExplainPlan); > } > {code} > =C2=A0 -- This message was sent by Atlassian JIRA (v7.6.3#76005)