impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From k...@apache.org
Subject [1/7] incubator-impala git commit: IMPALA-4735: Upgrade pytest in python env to version 2.9.2.
Date Fri, 03 Feb 2017 23:57:38 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/master 6251d8b4d -> 1d933919e


IMPALA-4735: Upgrade pytest in python env to version 2.9.2.

The current version of pytest in the Impala python environment is
quite old (2.7.2) and there have been bug fixes in later versions
that we could benefit from.

Also, since the passing of params to pytest.main() as a string will
be deprecated in upcoming versions of pytest, edit run-tests.py to
instead pass params as a list. (This also means we don't need to
worry about esoteric bash limitations re: single quotes in strings.)

While working on this file, the filtering of commandline args when
running the verfier tests was made a little more robust.

Tested by doing a standard (non-exhaustive) test run on centos 6.4
and ubuntu 14.04, plus an exhaustive test run on RHEL7.

Change-Id: I40d129e0e63ca5bee126bac6ac923abb3c7e0a67
Reviewed-on: http://gerrit.cloudera.org:8080/5640
Tested-by: Impala Public Jenkins
Reviewed-by: Jim Apple <jbapple-impala@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e5c098b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e5c098b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e5c098b0

Branch: refs/heads/master
Commit: e5c098b0765bc17bfc4c2517abcbd040cb1dc307
Parents: 6251d8b
Author: David Knupp <dknupp@cloudera.com>
Authored: Fri Jan 6 11:00:59 2017 -0800
Committer: Jim Apple <jbapple-impala@apache.org>
Committed: Thu Feb 2 21:27:39 2017 +0000

----------------------------------------------------------------------
 infra/python/deps/requirements.txt |   6 +-
 tests/run-tests.py                 | 135 ++++++++++++++++++--------------
 2 files changed, 78 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e5c098b0/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index 9831d21..1c3a329 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -63,10 +63,10 @@ prettytable == 0.7.2
 psutil == 0.7.1
 pyelftools == 0.23
 pyparsing == 2.0.3
-pytest == 2.7.2
-  py == 1.4.30
+pytest == 2.9.2
+  py == 1.4.32
 pytest-random == 0.02
-pytest-xdist == 1.12
+pytest-xdist == 1.15.0
 python-magic == 0.4.11
 pywebhdfs == 0.3.2
   pbr == 1.8.1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e5c098b0/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index aed1234..9902fc9 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -29,18 +29,20 @@ import os
 import pytest
 import sys
 
+from _pytest.config import FILE_OR_DIR
+
 # We whitelist valid test directories. If a new test directory is added, update this.
 VALID_TEST_DIRS = ['failure', 'query_test', 'stress', 'unittests', 'aux_query_tests',
                    'shell', 'hs2', 'catalog_service', 'metadata', 'data_errors',
                    'statestore']
 
 TEST_DIR = os.path.join(os.environ['IMPALA_HOME'], 'tests')
-TEST_RESULT_DIR = os.path.join(os.environ['IMPALA_EE_TEST_LOGS_DIR'], 'results')
+RESULT_DIR = os.path.join(os.environ['IMPALA_EE_TEST_LOGS_DIR'], 'results')
 
 # Arguments that control output logging. If additional default arguments are needed they
 # should go in the pytest.ini file.
-LOGGING_ARGS = '--junitxml=%(result_dir)s/TEST-impala-%(log_name)s.xml '\
-               '--resultlog=%(result_dir)s/TEST-impala-%(log_name)s.log'
+LOGGING_ARGS = {'--junitxml': 'TEST-impala-{0}.xml',
+                '--resultlog': 'TEST-impala-{0}.log'}
 
 # Default the number of concurrent tests defaults to the cpu cores in the system.
 # This can be overridden by setting the NUM_CONCURRENT_TESTS environment variable.
@@ -68,24 +70,24 @@ class TestExecutor:
     try:
       exit_code = pytest.main(args)
     except:
-      sys.stderr.write("Unexpected exception with pytest {}".format(args))
+      sys.stderr.write("Unexpected exception with pytest {0}".format(args))
       raise
     if exit_code != 0 and self._exit_on_error:
       sys.exit(exit_code)
     self.tests_failed = exit_code != 0 or self.tests_failed
 
 
-def build_test_args(log_base_name, valid_dirs):
+def build_test_args(base_name, valid_dirs=VALID_TEST_DIRS):
   """
-  Modify and return the command line arguments that will be passed to py.test.
+  Prepare the list of arguments that will be passed to pytest.main().
 
   Args:
-    log_base_name: the base name for the log file to write
+    base_name: the base name for the log file to write
     valid_dirs: a white list of sub-directories with desired tests (i.e, those
       that will not get flagged with --ignore before py.test is called.)
 
   Return:
-    a modified command line for py.test
+    a list of command line arguments
 
   For most test stages (e.g., serial, parallel), we augment the given command
   line arguments with a list of directories to ignore. However, when running the
@@ -97,59 +99,71 @@ def build_test_args(log_base_name, valid_dirs):
   then we instead need to filter out args that specifiy other tests (otherwise,
   they will be run again), but still retain the basic config args.
   """
-  logging_args = LOGGING_ARGS % {'result_dir': TEST_RESULT_DIR,
-                                 'log_name': log_base_name}
-
-  # The raw command line arguments need to be modified because of the way our
-  # repo is organized. We have several non-test directories and files in our
-  # tests/ path, which causes auto-discovery problems for pytest -- i.e., pytest
-  # will futiley try to execute them as tests, resulting in misleading failures.
-  # (There is a JIRA filed to restructure this: IMPALA-4417.)
+
+  # When building the list of command line args, in order to correctly filter
+  # them as needed (see issue IMPALA-4510) we should account for the fact that
+  # '--foo bar' and '--foo=bar' might be supplied by the user. We also need to
+  # be able identify any other arbitrary options. E.g., if the user specified
+  # the following on the command line:
+  #
+  #   'run-tests.py --arg1 value1 --random_opt --arg2=value2'
+  #
+  # we want an iterable that, if unpacked as a list, would look like:
+  #
+  #   [arg1, value1, random_opt, arg2, value2]
   #
-  # e.g. --ignore="comparison" --ignore="util" --ignore=etc...
+  commandline_args = itertools.chain(*[arg.split('=') for arg in sys.argv[1:]])
+
   ignored_dirs = build_ignore_dir_arg_list(valid_dirs=valid_dirs)
 
+  logging_args = []
+  for arg, log in LOGGING_ARGS.iteritems():
+    logging_args.extend([arg, os.path.join(RESULT_DIR, log.format(base_name))])
+
   if valid_dirs != ['verifiers']:
-    # This isn't the metrics verification stage yet, so after determining the
-    # logging params and which sub-directories within tests/ to ignore, just tack
-    # on any other args from sys.argv -- excluding sys.argv[0], which of course
-    # is the script name
-    test_args = '%s %s %s' % (ignored_dirs, logging_args, ' '.join(sys.argv[1:]))
+    # This isn't the metrics verification stage yet, so we don't need to filter.
+    test_args = ignored_dirs + logging_args + list(commandline_args)
   else:
-    # When filtering, we need to account for the fact that '--foo bar' and
-    # '--foo=bar' might be supplied by the user, as well as random options. E.g.,
-    # if the user specified the following on the command line:
+    # For metrics verification, we only want to run the verifier tests, so we need
+    # to filter out any command line args that specify other test modules, classes,
+    # and functions. The list of these can be found by calling
+    #
+    #    pytest.config.getoption(FILE_OR_DIR)
+    #
+    # For example, with the following command line invocation:
     #
-    #   'run-tests.py --arg1 value1 --random_opt --arg2=value2'
+    # $ ./run-tests.py query_test/test_limit.py::TestLimit::test_limit \
+    #   query_test/test_queries.py::TestHdfsQueries --verbose -n 4 \
+    #   --table_formats=parquet/none --exploration_strategy core
     #
-    # we want an iterable that, if unpacked as a list, would look like:
+    # then pytest.config.getoption(FILE_OR_DIR) will return a list of two elements:
     #
-    #   [arg1, value1, random_opt, arg2, value2]
+    # ['query_test/test_limit.py::TestLimit::test_limit',
+    #  'query_test/test_queries.py::TestHdfsQueries']
     #
-    raw_args = itertools.chain(*[arg.split('=') for arg in sys.argv[1:]])
-    kept_args = []
-
-    for arg in raw_args:
-      try:
-        pytest.config.getvalue(arg.strip('-'))  # Raises ValueError if invalid arg
-        kept_args += [arg, str(raw_args.next())]
-      except ValueError:
-        # For any arg that's not a required pytest config arg, we can filter it out
-        continue
-    test_args = '%s %s %s' % (ignored_dirs, logging_args, ' '.join(kept_args))
+    explicit_tests = pytest.config.getoption(FILE_OR_DIR)
+    config_options = [arg for arg in commandline_args if arg not in explicit_tests]
+    test_args = ignored_dirs + logging_args + config_options
 
   return test_args
 
 
 def build_ignore_dir_arg_list(valid_dirs):
-  """ Builds a list of directories to ignore """
+  """ Builds a list of directories to ignore
+
+  Return:
+    a list ['--ignore', 'dir1', '--ignore', 'dir2', etc...]
+
+  Because we have several non-test directories and files in our tests/ path, pytest
+  can have auto-discovery problems -- i.e., pytest may try to execute some non-test
+  code as though it contained tests, resulting in misleading warnings or failures.
+  (There is a JIRA filed to restructure this: IMPALA-4417.)
+  """
   subdirs = [subdir for subdir in os.listdir(TEST_DIR) if os.path.isdir(subdir)]
-  # In bash, in single-quoted strings, single quotes cannot appear - not even escaped!
-  # Instead, one must close the string with a single-quote, insert a literal single-quote
-  # (escaped, so bash doesn't think you're starting a new string), then start your
-  # single-quoted string again. That works out to the four-character sequence '\''.
-  return ' '.join(["--ignore='%s'" % d.replace("'", "'\''")
-                   for d in set(subdirs) - set(valid_dirs)])
+  ignored_dir_list = []
+  for subdir in (set(subdirs) - set(valid_dirs)):
+    ignored_dir_list += ['--ignore', subdir]
+  return ignored_dir_list
 
 
 def print_metrics(substring):
@@ -167,6 +181,7 @@ def print_metrics(substring):
         print json.dumps(metric, indent=1)
     print "<" * 80
 
+
 if __name__ == "__main__":
   exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv
   test_executor = TestExecutor(exit_on_error=exit_on_error)
@@ -179,28 +194,28 @@ if __name__ == "__main__":
   os.chdir(TEST_DIR)
 
   # Create the test result directory if it doesn't already exist.
-  if not os.path.exists(TEST_RESULT_DIR):
-    os.makedirs(TEST_RESULT_DIR)
+  if not os.path.exists(RESULT_DIR):
+    os.makedirs(RESULT_DIR)
 
   # First run query tests that need to be executed serially
-  args = '-m "execute_serially" %s' % build_test_args('serial', VALID_TEST_DIRS)
-  test_executor.run_tests(args)
+  base_args = ['-m', 'execute_serially']
+  test_executor.run_tests(base_args + build_test_args('serial'))
 
   # Run the stress tests tests
-  print_metrics('connections')
-  args = '-m "stress" -n %d %s' %\
-      (NUM_STRESS_CLIENTS, build_test_args('stress', VALID_TEST_DIRS))
-  test_executor.run_tests(args)
-  print_metrics('connections')
+  if '--collect-only' not in sys.argv:
+    print_metrics('connections')
+  base_args = ['-m', 'stress', '-n', NUM_STRESS_CLIENTS]
+  test_executor.run_tests(base_args + build_test_args('stress'))
+  if '--collect-only' not in sys.argv:
+    print_metrics('connections')
 
   # Run the remaining query tests in parallel
-  args = '-m "not execute_serially and not stress"  -n %d %s' %\
-      (NUM_CONCURRENT_TESTS, build_test_args('parallel', VALID_TEST_DIRS))
-  test_executor.run_tests(args)
+  base_args = ['-m', 'not execute_serially and not stress', '-n', NUM_CONCURRENT_TESTS]
+  test_executor.run_tests(base_args + build_test_args('parallel'))
 
   # Finally, validate impalad/statestored metrics.
-  args = build_test_args(log_base_name='verify-metrics', valid_dirs=['verifiers'])
-  args += ' verifiers/test_verify_metrics.py'
+  args = build_test_args(base_name='verify-metrics', valid_dirs=['verifiers'])
+  args.append('verifiers/test_verify_metrics.py')
   test_executor.run_tests(args)
 
   if test_executor.tests_failed:


Mime
View raw message