From dev-return-59663-archive-asf-public=cust-asf.ponee.io@phoenix.apache.org Tue Jan 14 23:12:06 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id DD0D418061A for ; Wed, 15 Jan 2020 00:12:05 +0100 (CET) Received: (qmail 43795 invoked by uid 500); 14 Jan 2020 23:12:05 -0000 Mailing-List: contact dev-help@phoenix.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@phoenix.apache.org Delivered-To: mailing list dev@phoenix.apache.org Received: (qmail 43778 invoked by uid 99); 14 Jan 2020 23:12:05 -0000 Received: from mailrelay1-us-west.apache.org (HELO mailrelay1-us-west.apache.org) (209.188.14.139) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 14 Jan 2020 23:12:05 +0000 Received: from jira-he-de.apache.org (static.172.67.40.188.clients.your-server.de [188.40.67.172]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 7AF3FE317F for ; Tue, 14 Jan 2020 23:12:02 +0000 (UTC) Received: from jira-he-de.apache.org (localhost.localdomain [127.0.0.1]) by jira-he-de.apache.org (ASF Mail Server at jira-he-de.apache.org) with ESMTP id A44E1782351 for ; Tue, 14 Jan 2020 23:12:00 +0000 (UTC) Date: Tue, 14 Jan 2020 23:12:00 +0000 (UTC) From: "Viraj Jasani (Jira)" To: dev@phoenix.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Assigned] (PHOENIX-5265) Phoenix Test should use gold files for result comparison instead of using hard-corded comparison. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 [ https://issues.apache.org/jira/browse/PHOENIX-5265?page=3Dcom.atlass= ian.jira.plugin.system.issuetabpanels:all-tabpanel ] Viraj Jasani reassigned PHOENIX-5265: ------------------------------------- Assignee: Viraj Jasani > Phoenix Test should use gold files for result comparison instead of using= hard-corded comparison. > -------------------------------------------------------------------------= ------------------------ > > Key: PHOENIX-5265 > URL: https://issues.apache.org/jira/browse/PHOENIX-5265 > Project: Phoenix > Issue Type: Improvement > Environment: {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows =3D 20; > Connection conn =3D getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, = c.v INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + " VAL= UES(?,?, ?, ?, ?)"); > byte[] val =3D new byte[250]; > for (int i =3D 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toS= tring(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, = c.v, d.v) VALUES(?,?,?)"); > for (int i =3D 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Byt= es.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List keyRanges =3D getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12= 530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transact= ionProvider)) ? "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTab= leName, > actualExplainPlan); > ConnectionQueryServices services =3D conn.unwrap(PhoenixConnection.cl= ass).getQueryServices(); > List regions =3D services.getAllTableRegions(Bytes.t= oBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges =3D getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme =3D !mutable && columnEncode= d; > boolean hasShadowCells =3D TransactionFactory.Provider.OMID.name().eq= uals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ?= 24 : 13, keyRanges.size()); > rs =3D conn > .createStatement() > .executeQuery( > "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(= GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME =3D '= " > + physicalTableName + "' GROUP BY COLUMN_FAMI= LY ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowC= ells ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ?= 23 : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)= ); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCel= ls ? 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? = 10 : 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName +=20 > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=3D0")= ; > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs =3D conn.createStatement().executeQuery("SELECT count(1) FROM " + = PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +=20 > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "=3D'" + = physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS= NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physic= alTableName, actualExplainPlan); > } > {code} > Reporter: Bin Shi > Assignee: Viraj Jasani > Priority: Major > Labels: hardening, phoenix-hardening > > Currently, in Phoenix Test, the comparison of the returned result and the= expected result is always hard-coded, which is especially widely used by E= 2E and Integration test for comparing the query result, including the resul= t of EXPLAIN query plan. This has significantly=C2=A0impaired the productiv= ity and efficiency of workin on Phoenix. > The right approach is, each test case should write the generated result t= o a file under a new directory, then compares this file with a gold file un= der "gold" directory. After we make some code changes and make sure the cha= nge is correct by manually verifying several specific test cases, we can sa= fely replace the whole "gold" directory with the directory which contains a= ll new generated test files during test running. In this way, we can easily= rebase all the tests. > For example, in BaseStatsCollectorIT.java, we verify the estimated row si= ze in the returned result of EXPLAIN query plan, the row size is decided by= many factors, like the column is encoded or not, it is mutable or not, it = uses transaction provider or not, it uses=C2=A0TEPHRA or OMID as transactio= n provider, etc. The code snippet "testWithMultiCF" shows a typical test ca= se. The comparisons of the result result and the expected result are hard-c= oded in those asserts. Now imagine, if we change the way collecting stats o= r we change column encoding scheme or we change the cell storage format for= TEPHRA or OMID, which is very likely to happen, then we need to manually c= hange all those hard-coded comparison everywhere, and it isn't trivial to r= e-calculate=C2=A0all expected row sizes with the different conditions . Tod= ay you might need one or two weeks to rebase all the tests, which is a huge= waste. We should use "gold" files here, so that we can rebase the test ver= y easily. > =C2=A0BTW, the new generated test result files and gold files should be i= n XML or JSON.=C2=A0The result of "EXPLAN" query should be in XML or JSON t= oo, because=C2=A0the file format matches the structure of a query plan. > {code:java} > // code placeholder > @Test > public void testWithMultiCF() throws Exception { > int nRows =3D 20; > Connection conn =3D getConnection(0); > PreparedStatement stmt; > conn.createStatement().execute( > "CREATE TABLE " + fullTableName > + "(k VARCHAR PRIMARY KEY, a.v INTEGER, b.v INTEGER, = c.v INTEGER NULL, d.v INTEGER NULL) " > + tableDDLOptions ); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + " VAL= UES(?,?, ?, ?, ?)"); > byte[] val =3D new byte[250]; > for (int i =3D 0; i < nRows; i++) { > stmt.setString(1, Character.toString((char)('a' + i)) + Bytes.toS= tring(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.setInt(4, i); > stmt.setInt(5, i); > stmt.executeUpdate(); > } > conn.commit(); > stmt =3D conn.prepareStatement("UPSERT INTO " + fullTableName + "(k, = c.v, d.v) VALUES(?,?,?)"); > for (int i =3D 0; i < 5; i++) { > stmt.setString(1, Character.toString((char)('a' + 'z' + i)) + Byt= es.toString(val)); > stmt.setInt(2, i); > stmt.setInt(3, i); > stmt.executeUpdate(); > } > conn.commit(); > ResultSet rs; > String actualExplainPlan; > collectStatistics(conn, fullTableName); > List keyRanges =3D getAllSplits(conn, fullTableName); > assertEquals(26, keyRanges.size()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals( > "CLIENT 26-CHUNK 25 ROWS " + (columnEncoded ? ( mutable ? "12= 530" : "14190" ) : (TransactionFactory.Provider.OMID.name().equals(transact= ionProvider)) ? "25320" : "12420") + > " BYTES PARALLEL 1-WAY FULL SCAN OVER " + physicalTab= leName, > actualExplainPlan); > ConnectionQueryServices services =3D conn.unwrap(PhoenixConnection.cl= ass).getQueryServices(); > List regions =3D services.getAllTableRegions(Bytes.t= oBytes(physicalTableName)); > assertEquals(1, regions.size()); > collectStatistics(conn, fullTableName, Long.toString(1000)); > keyRanges =3D getAllSplits(conn, fullTableName); > boolean oneCellPerColFamilyStorageScheme =3D !mutable && columnEncode= d; > boolean hasShadowCells =3D TransactionFactory.Provider.OMID.name().eq= uals(transactionProvider); > assertEquals(oneCellPerColFamilyStorageScheme ? 14 : hasShadowCells ?= 24 : 13, keyRanges.size()); > rs =3D conn > .createStatement() > .executeQuery( > "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(= GUIDE_POSTS_WIDTH),COUNT(*) from \"SYSTEM\".STATS where PHYSICAL_NAME =3D '= " > + physicalTableName + "' GROUP BY COLUMN_FAMI= LY ORDER BY COLUMN_FAMILY"); > assertTrue(rs.next()); > assertEquals("A", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 12530 : 14190 ) : hasShadowC= ells ? 25320 : 12420, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 13 : hasShadowCells ?= 23 : 12, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("B", rs.getString(1)); > assertEquals(oneCellPerColFamilyStorageScheme ? 25 : 20, rs.getInt(2)= ); > assertEquals(columnEncoded ? ( mutable ? 5600 : 7260 ) : hasShadowCel= ls ? 11260 : 5540, rs.getInt(3)); > assertEquals(oneCellPerColFamilyStorageScheme ? 7 : hasShadowCells ? = 10 : 5, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("C", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertTrue(rs.next()); > assertEquals("D", rs.getString(1)); > assertEquals(25, rs.getInt(2)); > assertEquals(columnEncoded ? ( mutable ? 7005 : 7280 ) : hasShadowCel= ls ? 14085 : 6930, rs.getInt(3)); > assertEquals(hasShadowCells ? 13 : 7, rs.getInt(4)); > assertFalse(rs.next()); > // Disable stats > conn.createStatement().execute("ALTER TABLE " + fullTableName +=20 > " SET " + PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH + "=3D0")= ; > collectStatistics(conn, fullTableName); > // Assert that there are no more guideposts > rs =3D conn.createStatement().executeQuery("SELECT count(1) FROM " + = PhoenixDatabaseMetaData.SYSTEM_STATS_NAME +=20 > " WHERE " + PhoenixDatabaseMetaData.PHYSICAL_NAME + "=3D'" + = physicalTableName + "' AND " + PhoenixDatabaseMetaData.COLUMN_FAMILY + " IS= NOT NULL"); > assertTrue(rs.next()); > assertEquals(0, rs.getLong(1)); > assertFalse(rs.next()); > rs =3D conn.createStatement().executeQuery("EXPLAIN SELECT * FROM " += fullTableName); > actualExplainPlan =3D QueryUtil.getExplainPlan(rs); > assertEquals("CLIENT 1-CHUNK PARALLEL 1-WAY FULL SCAN OVER " + physic= alTableName, actualExplainPlan); > } > {code} > =C2=A0 -- This message was sent by Atlassian Jira (v8.3.4#803005)