From commits-return-520-archive-asf-public=cust-asf.ponee.io@superset.incubator.apache.org Fri Jan 12 21:05:17 2018 Return-Path: X-Original-To: archive-asf-public@eu.ponee.io Delivered-To: archive-asf-public@eu.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by mx-eu-01.ponee.io (Postfix) with ESMTP id 8215B180621 for ; Fri, 12 Jan 2018 21:05:17 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 70505160C42; Fri, 12 Jan 2018 20:05:17 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 8FA3D160C20 for ; Fri, 12 Jan 2018 21:05:16 +0100 (CET) Received: (qmail 14658 invoked by uid 500); 12 Jan 2018 20:05:15 -0000 Mailing-List: contact commits-help@superset.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@superset.incubator.apache.org Delivered-To: mailing list commits@superset.incubator.apache.org Received: (qmail 14640 invoked by uid 99); 12 Jan 2018 20:05:15 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 12 Jan 2018 20:05:15 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 35E8F821A6; Fri, 12 Jan 2018 20:05:15 +0000 (UTC) Date: Fri, 12 Jan 2018 20:05:15 +0000 To: "commits@superset.apache.org" Subject: [incubator-superset] branch master updated: [cache] Using the query as the basis of the cache key (#4016) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <151578751502.2561.9313770511420353745@gitbox.apache.org> From: graceguo@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: incubator-superset X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 8069d6221d943694f544a2977deac1089176189a X-Git-Newrev: a7a6678d5ca535e29e6e021b7404c2e5c3599fdb X-Git-Rev: a7a6678d5ca535e29e6e021b7404c2e5c3599fdb X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. graceguo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-superset.git The following commit(s) were added to refs/heads/master by this push: new a7a6678 [cache] Using the query as the basis of the cache key (#4016) a7a6678 is described below commit a7a6678d5ca535e29e6e021b7404c2e5c3599fdb Author: John Bodley <4567245+john-bodley@users.noreply.github.com> AuthorDate: Fri Jan 12 12:05:12 2018 -0800 [cache] Using the query as the basis of the cache key (#4016) --- superset/assets/npm-debug.log.3344327073 | 0 superset/viz.py | 115 ++++++++++++++++--------------- tests/core_tests.py | 1 - tests/viz_tests.py | 5 +- 4 files changed, 62 insertions(+), 59 deletions(-) diff --git a/superset/assets/npm-debug.log.3344327073 b/superset/assets/npm-debug.log.3344327073 new file mode 100644 index 0000000..e69de29 diff --git a/superset/viz.py b/superset/viz.py index d046241..62e02ac 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -214,8 +214,6 @@ class BaseViz(object): @property def cache_timeout(self): - if self.form_data.get('cache_timeout'): - return int(self.form_data.get('cache_timeout')) if self.datasource.cache_timeout: return self.datasource.cache_timeout if ( @@ -229,44 +227,50 @@ class BaseViz(object): self.get_payload(force), default=utils.json_int_dttm_ser, ignore_nan=True) - @property - def cache_key(self): - form_data = self.form_data.copy() - merge_extra_filters(form_data) - s = str([(k, form_data[k]) for k in sorted(form_data.keys())]) - return hashlib.md5(s.encode('utf-8')).hexdigest() + def cache_key(self, query_obj): + """ + The cache key is the datasource/query string tuple associated with the + object which needs to be fully deterministic. + """ + + return hashlib.md5( + json.dumps(( + self.datasource.id, + self.datasource.get_query_str(query_obj), + )).encode('utf-8'), + ).hexdigest() def get_payload(self, force=False): """Handles caching around the json payload retrieval""" - cache_key = self.cache_key - payload = None + query_obj = self.query_obj() + cache_key = self.cache_key(query_obj) + cached_dttm = None + data = None + stacktrace = None + rowcount = None if not force and cache: - payload = cache.get(cache_key) - - if payload: - stats_logger.incr('loaded_from_cache') - is_cached = True - try: - cached_data = zlib.decompress(payload) - if PY3: - cached_data = cached_data.decode('utf-8') - payload = json.loads(cached_data) - except Exception as e: - logging.error('Error reading cache: ' + - utils.error_msg_from_exception(e)) - payload = None - return [] - logging.info('Serving from cache') + cache_value = cache.get(cache_key) + if cache_value: + stats_logger.incr('loaded_from_cache') + is_cached = True + try: + cache_value = zlib.decompress(cache_value) + if PY3: + cache_value = cache_value.decode('utf-8') + cache_value = json.loads(cache_value) + data = cache_value['data'] + cached_dttm = cache_value['dttm'] + except Exception as e: + logging.error('Error reading cache: ' + + utils.error_msg_from_exception(e)) + data = None + logging.info('Serving from cache') - if not payload: + if not data: stats_logger.incr('loaded_from_source') - data = None is_cached = False - cache_timeout = self.cache_timeout - stacktrace = None - rowcount = None try: - df = self.get_df() + df = self.get_df(query_obj) if not self.error_message: data = self.get_data(df) rowcount = len(df.index) if df is not None else 0 @@ -277,37 +281,40 @@ class BaseViz(object): self.status = utils.QueryStatus.FAILED data = None stacktrace = traceback.format_exc() - payload = { - 'cache_key': cache_key, - 'cache_timeout': cache_timeout, - 'data': data, - 'error': self.error_message, - 'form_data': self.form_data, - 'query': self.query, - 'status': self.status, - 'stacktrace': stacktrace, - 'rowcount': rowcount, - } - payload['cached_dttm'] = datetime.utcnow().isoformat().split('.')[0] - logging.info('Caching for the next {} seconds'.format( - cache_timeout)) - data = self.json_dumps(payload) - if PY3: - data = bytes(data, 'utf-8') - if cache and self.status != utils.QueryStatus.FAILED: + + if data and cache and self.status != utils.QueryStatus.FAILED: + cached_dttm = datetime.utcnow().isoformat().split('.')[0] try: + cache_value = json.dumps({ + 'data': data, + 'dttm': cached_dttm, + }) + if PY3: + cache_value = bytes(cache_value, 'utf-8') cache.set( cache_key, - zlib.compress(data), - timeout=cache_timeout) + zlib.compress(cache_value), + timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning('Could not cache key {}'.format(cache_key)) logging.exception(e) cache.delete(cache_key) - payload['is_cached'] = is_cached - return payload + + return { + 'cache_key': cache_key, + 'cached_dttm': cached_dttm, + 'cache_timeout': self.cache_timeout, + 'data': data, + 'error': self.error_message, + 'form_data': self.form_data, + 'is_cached': is_cached, + 'query': self.query, + 'status': self.status, + 'stacktrace': stacktrace, + 'rowcount': rowcount, + } def json_dumps(self, obj): return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True) diff --git a/tests/core_tests.py b/tests/core_tests.py index 8415465..a7edc4e 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -340,7 +340,6 @@ class CoreTests(SupersetTestCase): slc = self.get_slice('Girls', db.session) data = self.get_json_resp( '/superset/warm_up_cache?slice_id={}'.format(slc.id)) - assert data == [{'slice_id': slc.id, 'slice_name': slc.slice_name}] data = self.get_json_resp( diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 67f4bf8..abf29ad 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -101,11 +101,8 @@ class BaseVizTestCase(unittest.TestCase): def test_cache_timeout(self): datasource = Mock() - form_data = {'cache_timeout': '10'} - test_viz = viz.BaseViz(datasource, form_data) - self.assertEqual(10, test_viz.cache_timeout) - del form_data['cache_timeout'] datasource.cache_timeout = 156 + test_viz = viz.BaseViz(datasource, form_data={}) self.assertEqual(156, test_viz.cache_timeout) datasource.cache_timeout = None datasource.database = Mock() -- To stop receiving notification emails like this one, please contact ['"commits@superset.apache.org" '].