climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jo...@apache.org
Subject [1/3] climate git commit: CLIMATE-575 - Initial config based execution implementation
Date Wed, 11 Feb 2015 17:00:13 GMT
Repository: climate
Updated Branches:
  refs/heads/master 9ae53c9d5 -> c5c0e98d6


CLIMATE-575 - Initial config based execution implementation

- Implement initial config based execution of evaluations. There are a
  number of missing features, the most obvious of which is the lack of
  support for all the available plot types in OCW. These and other
  things will be updated in tickets that follow.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c958da09
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c958da09
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c958da09

Branch: refs/heads/master
Commit: c958da0979c13539ad1bf43c900d075de88e1b6f
Parents: cf31d6b
Author: Michael Joyce <joyce@apache.org>
Authored: Mon Feb 9 14:58:27 2015 -0800
Committer: Michael Joyce <joyce@apache.org>
Committed: Mon Feb 9 14:58:27 2015 -0800

----------------------------------------------------------------------
 .../example/simple_model_to_model_bias.yaml     |  35 ++
 ocw-config-runner/ocw_evaluation_from_config.py | 443 ++++++++++++++
 ocw-config-runner/tests/test_config_runner.py   | 600 +++++++++++++++++++
 3 files changed, 1078 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/c958da09/ocw-config-runner/example/simple_model_to_model_bias.yaml
----------------------------------------------------------------------
diff --git a/ocw-config-runner/example/simple_model_to_model_bias.yaml b/ocw-config-runner/example/simple_model_to_model_bias.yaml
new file mode 100644
index 0000000..fe2cd36
--- /dev/null
+++ b/ocw-config-runner/example/simple_model_to_model_bias.yaml
@@ -0,0 +1,35 @@
+evaluation:
+    temporal_time_delta: 365
+    spatial_regrid_lats: !!python/tuple [-20, 20, 1]
+    spatial_regrid_lons: !!python/tuple [-20, 20, 1]
+
+datasets:
+    reference:
+        data_source: local
+        file_count: 1
+        path: /tmp/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc
+        variable: tasmax
+
+    targets:
+        - data_source: local
+          file_count: 1
+          path: /tmp/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc
+          variable: tasmax
+metrics:
+    - Bias
+
+plots:
+    - type: contour
+      results_indeces:
+          - !!python/tuple [0, 0]
+      lats:
+          range_min: -20
+          range_max: 20
+          range_step: 1
+      lons:
+          range_min: -20
+          range_max: 20
+          range_step: 1
+      output_name: wrf_bias_compared_to_knmi
+      optional_args:
+          gridshape: !!python/tuple [6, 6]

http://git-wip-us.apache.org/repos/asf/climate/blob/c958da09/ocw-config-runner/ocw_evaluation_from_config.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/ocw_evaluation_from_config.py b/ocw-config-runner/ocw_evaluation_from_config.py
new file mode 100644
index 0000000..c41c79c
--- /dev/null
+++ b/ocw-config-runner/ocw_evaluation_from_config.py
@@ -0,0 +1,443 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import dateutil.parser
+from datetime import timedelta
+import logging
+import re
+import sys
+
+from ocw.dataset import Bounds
+from ocw.evaluation import Evaluation
+import ocw.metrics as metrics
+import ocw.plotter as plots
+import ocw.dataset_processor as dsp
+import ocw.data_source.local as local
+import ocw.data_source.rcmed as rcmed
+import ocw.data_source.esgf as esgf
+import ocw.data_source.dap as dap
+
+import numpy as np
+import yaml
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def run_evaluation_from_config(config_file_path):
+    """ Run an OCW evaluation specified by a config file.
+
+    :param config_file_path: The file path to a OCW compliant YAML file
+        specifying how to run the evaluation. For additional information on 
+        the valid options that you can set in the config please check the
+        project wiki https://cwiki.apache.org/confluence/display/climate/home#'.
+    :type config_file_path: :mod:`string`
+    """
+    config = yaml.load(open(config_file_path, 'r'))
+
+    if not is_config_valid(config):
+        logger.warning(
+            'Unable to validate configuration file. Exiting evaluation. '
+            'Please check documentation for config information.'
+        )
+        sys.exit(1)
+
+    evaluation = generate_evaluation_from_config(config)
+    evaluation.run()
+
+    plot_from_config(evaluation, config)
+
+def is_config_valid(config_data):
+    """ Validate supplied evaluation configuration data.
+
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type config_data: :func:`dict`
+
+    :returns: True if the configuration data is sufficient for an evaluation and
+        seems to be well formed, False otherwise.
+    """
+    if not _valid_minimal_config(config_data):
+        logger.error('Insufficient configuration file data for an evaluation')
+        return False
+
+    if not _config_is_well_formed(config_data):
+        logger.error('Configuration data is not well formed')
+        return False
+
+    return True
+
+def generate_evaluation_from_config(config_data):
+    """ Generate an Evaluation object from configuration data.
+
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type config_data: :func:`dict`
+
+    :returns: An Evaluation object containing the data specified in the
+        supplied configuration data.
+    """
+    # Load datasets
+    reference = None
+    targets = None
+    if 'reference' in config_data['datasets']:
+        reference = _load_dataset(config_data['datasets']['reference'])
+
+    if 'targets' in config_data['datasets']:
+        targets = [_load_dataset(t) for t in config_data['datasets']['targets']]
+
+    reference, targets = _prepare_datasets_for_evaluation(reference,
+                                                          targets,
+                                                          config_data)
+    # Load metrics
+    eval_metrics = [_load_metric(m)() for m in config_data['metrics']]
+
+    return Evaluation(reference, targets, eval_metrics)
+
+def plot_from_config(evaluation, config_data):
+    """ Generate plots for an evaluation from configuration data.
+
+    :param evaluation: The Evaluation for which to generate plots.
+    :type evaluation: :class:`ocw.evaluation.Evaluation`
+    :param config_data: Dictionary of the data parsed from the supplied YAML
+        configuration file.
+    :type: :func:`dict`
+    """
+    for plot in config_data['plots']:
+        if plot['type'] == 'contour':
+            _draw_contour_plot(evaluation, plot)
+        elif plot['type'] == 'subregion':
+            logger.warn('Subregion plots are currently unsupported. Skipping ...')
+            continue
+        elif plot['type'] == 'taylor':
+            logger.warn('Taylor diagrams are currently unsupported. Skipping ...')
+            continue
+        elif plot['type'] == 'time_series':
+            logger.warn('Time series plots are currently unsupported. Skipping ...')
+            continue
+        elif plot['type'] == 'portrait':
+            logger.warn('Portrait diagrams are currently unsupported. Skipping ...')
+            continue
+        else:
+            logger.error('Unrecognized plot type requested: {}'.format(plot['type']))
+            continue
+
+def _valid_minimal_config(config_data):
+    """"""
+    if not 'datasets' in config_data.keys():
+        logger.error('No datasets specified in configuration data.')
+        return False
+
+    if not 'metrics' in config_data.keys():
+        logger.error('No metrics specified in configuration data.')
+        return False
+
+    if _contains_unary_metrics(config_data['metrics']):
+        if (not 'reference' in config_data['datasets'].keys() and 
+            not 'targets' in config_data['datasets'].keys()):
+            err = (
+                'Unary metric in configuration data requires either a reference '
+                'or target dataset to be present for evaluation. Please ensure '
+                'that your config is well formed.'
+            )
+            logger.error(err)
+            return False
+
+    if _contains_binary_metrics(config_data['metrics']):
+        if (not 'reference' in config_data['datasets'].keys() or 
+            not 'targets' in config_data['datasets'].keys()):
+            logger.error(
+                'Binary metric in configuration requires both a reference '
+                'and target dataset to be present for evaluation. Please ensure '
+                'that your config is well formed.'
+            )
+            return False
+
+    return True
+
+def _config_is_well_formed(config_data):
+    """"""
+    is_well_formed = True
+
+    if 'reference' in config_data['datasets']:
+        if not _valid_dataset_config_data(config_data['datasets']['reference']):
+            is_well_formed = False
+
+    if 'targets' in config_data['datasets']:
+        targets = config_data['datasets']['targets']
+        if type(targets) != type(list()):
+            err = (
+                'Expected to find list of target datasets but instead found '
+                'object of type {}'
+            ).format(type(targets))
+            logger.error(err)
+            is_well_formed = False
+        else:
+            for t in targets:
+                if not _valid_dataset_config_data(t):
+                    is_well_formed = False
+
+    available_metrics = _fetch_built_in_metrics()
+    for metric in config_data['metrics']:
+        if metric not in available_metrics:
+            warning = (
+                'Unable to locate metric name {} in built-in metrics. If this '
+                'is not a user defined metric then please check for potential '
+                'misspellings.'
+            ).format(metric)
+            logger.warn(warning)
+            is_well_formed = False
+
+    if 'plots' in config_data:
+        for plot in config_data['plots']:
+            if not _valid_plot_config_data(plot):
+                is_well_formed = False
+
+    return is_well_formed
+
+def _contains_unary_metrics(config_metric_data):
+    """"""
+    unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+    return any(metric in unarys for metric in config_metric_data)
+
+    return True
+
+def _contains_binary_metrics(config_metric_data):
+    """"""
+    binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+    return any(metric in binarys for metric in config_metric_data)
+
+def _fetch_built_in_metrics():
+    """"""
+    unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+    binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+    return unarys + binarys
+
+def _valid_dataset_config_data(dataset_config_data):
+    """"""
+    try:
+        data_source = dataset_config_data['data_source']
+    except KeyError:
+        logger.error('Dataset does not contain a data_source attribute.')
+        return False
+
+    if data_source == 'local':
+        required_keys = set(['data_source', 'file_count', 'path', 'variable'])
+    elif data_source == 'rcmed':
+        required_keys = set([
+            'dataset_id',
+            'parameter_id',
+            'min_lat',
+            'max_lat',
+            'min_lon',
+            'max_lon',
+            'start_time',
+            'end_time',
+        ])
+    elif data_source == 'esgf':
+        required_keys = set([
+            'data_source',
+            'dataset_id',
+            'variable',
+            'esgf_username',
+            'esgf_password'
+        ])
+    elif data_source == 'dap':
+        required_keys = set({'url', 'variable'})
+    else:
+        logger.error('Dataset does not contain a valid data_source location.')
+        return False
+
+    present_keys = set(dataset_config_data.keys())
+    missing_keys = required_keys - present_keys
+    contains_required = len(missing_keys) == 0
+
+    if contains_required:
+        if data_source == 'local' and dataset_config_data['file_count'] > 1:
+            # If the dataset is a multi-file dataset then we need to make sure
+            # that the file glob pattern is included.
+            if not 'file_glob_pattern' in dataset_config_data:
+                logger.error(
+                    'Multi-file local dataset is missing key: file_glob_pattern'
+                )
+                return False
+        return True
+    else:
+        missing = sorted(list(missing_keys))
+        logger.error(
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        return False
+
+def _valid_plot_config_data(plot_config_data):
+    """"""
+    try:
+        plot_type = dataset_config_data['type']
+    except KeyError:
+        logger.error('Plot config does not include a type attribute.')
+        return False
+
+    if plot_type == 'contour':
+        required_keys = set([
+                'results_indeces',
+                'lats',
+                'lons',
+                'output_name'
+        ])
+    elif plot_type == 'taylor':
+        logger.warn('Taylor diagrams are currently unsupported. Skipping validation')
+    elif plot_type == 'subregion':
+        logger.warn('Subregion plots are currently unsupported. Skipping validation')
+    elif plot_type == 'time_series':
+        logger.warn('Time series plots are currently unsupported. Skipping validation')
+    elif plot_type == 'portrait':
+        logger.warn('Portrait diagrams are currently unsupported. Skipping validation')
+    else:
+        logger.error('Invalid plot type specified.')
+        return False
+
+    present_keys = set(plot_config_data.keys())
+    missing_keys = required_keys - present_keys
+    contains_required = len(missing_keys) == 0
+
+    if not contains_required:
+        missing = sorted(list(missing_keys))
+        logger.error(
+            'Plot config does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        return False
+
+    return True
+
+def _load_dataset(dataset_config_data):
+    """"""
+    if dataset_config_data['data_source'] == 'local':
+        if dataset_config_data['file_count'] > 1:
+            logger.error(
+                'Multi-file datasets are currently not supported. Cancelling load '
+                'of the following dataset: {}'.format(dataset_config_data)
+            )
+            return None
+
+        return local.load_file(dataset_config_data['path'],
+                               dataset_config_data['variable'],
+                               **dataset_config_data.get('optional_args', {}))
+    elif dataset_config_data['data_source'] == 'rcmed':
+        return rcmed.parameter_dataset(dataset_config_data['dataset_id'],
+                                       dataset_config_data['parameter_id'],
+                                       dataset_config_data['min_lat'],
+                                       dataset_config_data['max_lat'],
+                                       dataset_config_data['min_lon'],
+                                       dataset_config_data['min_lon'],
+                                       dataset_config_data['start_time'],
+                                       dataset_config_data['end_time'],
+                                       **dataset_config_data.get('optional_args', {}))
+    elif dataset_config_data['data_source'] == 'esgf':
+        return esgf.load_dataset(dataset_config_data['dataset_id'],
+                                 dataset_config_data['variable'],
+                                 dataset_config_data['esgf_username'],
+                                 dataset_config_data['esgf_password'],
+                                 **dataset_config_data.get('optional_args', {}))
+    elif dataset_config_data['data_source'] == 'dap':
+        return dap.load(dataset_config_data['url'],
+                        dataset_config_data['variable'],
+                        **dataset_config_data('optional_args', {}))
+
+def _prepare_datasets_for_evaluation(reference, target, config_data):
+    """"""
+    subset = config_data['evaluation'].get('subset', None)
+    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
+    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
+    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)
+
+    if subset:
+        start = dateutil.parser.parse(subset[4])
+        end = dateutil.parser.parse(subset[5])
+        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)
+
+        if reference:
+            reference = dsp.safe_subset(bounds, reference)
+
+        if targets:
+            targets = [dsp.safe_subset(bounds, t) for t in targets]
+
+    if temporal_time_delta:
+        resolution = timedelta(temporal_time_delta)
+
+        if reference:
+            reference = dsp.temporal_rebin(reference, resolution)
+
+        if targets:
+            targets = [dsp.temporal_rebin(t, resolution) for t in targets]
+
+    if spatial_regrid_lats and spatial_regrid_lons:
+        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
+        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])
+
+        if reference:
+            reference = dsp.spatial_regrid(reference, lats, lons)
+
+        if targets:
+            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]
+
+    return reference, target
+
+
+def _load_metric(metric_config_data):
+    """"""
+    # If the dataset is user defined outside of ocw.metrics we won't currently
+    # handle loading it.
+    if '.' in metric_config_data:
+        logger.error(
+            'User-defined metrics outside of the ocw.metrics module '
+            'cannot currently be loaded. If you just wanted a metric '
+            'found in ocw.metrics then do not specify the full '
+            'package and module names. See the documentation for examples.'
+        )
+        return None
+
+    return getattr(metrics, metric_config_data)
+
+def _draw_contour_plot(evaluation, plot_config):
+    """"""
+    row, col = plot_config['results_indeces'][0]
+
+    lats = plot_config['lats']
+    if type(lats) != type(list):
+        lats = range(lats['range_min'], lats['range_max'], lats['range_step'])
+
+    lons = plot_config['lons']
+    if type(lons) != type(list):
+        lons = range(lons['range_min'], lons['range_max'], lons['range_step'])
+
+    plots.draw_contour_map(evaluation.results[row][col],
+                            np.array(lats),
+                            np.array(lons),
+                            plot_config['output_name'],
+                            **plot_config.get('optional_args', {}))
+
+if __name__ == '__main__':
+    description = 'OCW Config Based Evaluation'
+    epilog = 'Additional information at https://cwiki.apache.org/confluence/display/climate/home#'
+
+    parser = argparse.ArgumentParser(description=description, epilog=epilog)
+    parser.add_argument('config', help='Path to YAML config file for the evaluation')
+    args = parser.parse_args()
+
+    run_evaluation_from_config(args.config)

http://git-wip-us.apache.org/repos/asf/climate/blob/c958da09/ocw-config-runner/tests/test_config_runner.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_config_runner.py b/ocw-config-runner/tests/test_config_runner.py
new file mode 100644
index 0000000..ae63589
--- /dev/null
+++ b/ocw-config-runner/tests/test_config_runner.py
@@ -0,0 +1,600 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from mock import patch
+import unittest
+
+import ocw_evaluation_from_config as config_runner
+import ocw.metrics
+
+import yaml
+
+class TestMetricLoad(unittest.TestCase):
+    def test_valid_metric_load(self):
+        config = yaml.safe_load("""
+            metrics:
+                - Bias
+        """)
+        loaded_metrics = [config_runner._load_metric(m)()
+                          for m in config['metrics']]
+        self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias))
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_metric_load(self, mock_logger):
+        config = yaml.safe_load("""
+            metrics:
+                - ocw.metrics.Bias
+        """)
+        config_runner._load_metric(config['metrics'][0])
+        error = (
+            'User-defined metrics outside of the ocw.metrics module '
+            'cannot currently be loaded. If you just wanted a metric '
+            'found in ocw.metrics then do not specify the full '
+            'package and module names. See the documentation for examples.'
+        )
+        mock_logger.error.assert_called_with(error)
+
+
+class TestRCMEDDatasetConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.required_rcmed_keys = set([
+            'dataset_id',
+            'parameter_id',
+            'min_lat',
+            'max_lat',
+            'min_lon',
+            'max_lon',
+            'start_time',
+            'end_time'
+        ])
+        example_config_yaml = """
+            - data_source: rcmed
+              dataset_id: 4
+              parameter_id: 4
+              min_lat: -40
+              max_lat: 40
+              min_lon: -50
+              max_lon: 50
+              start_time: YYYY-MM-DDThh:mm:ss
+              end_time: YYYY-MM-DDThh:mm:ss
+
+            - data_source: rcmed
+        """
+        conf = yaml.safe_load(example_config_yaml)
+        self.valid_rcmed = conf[0]
+        self.invalid_rcmed = conf[1]
+
+    def test_valid_rcmed_config(self):
+        ret = config_runner._valid_dataset_config_data(self.valid_rcmed)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_rcmed_config(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.invalid_rcmed)
+
+        present_keys = set(self.invalid_rcmed.keys())
+        missing_keys = self.required_rcmed_keys - present_keys
+        missing = sorted(list(missing_keys))
+
+        error = (
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        mock_logger.error.assert_called_with(error)
+
+
+class TestLocalDatasetConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable'])
+        example_config_yaml = """
+            - data_source: local
+              file_count: 1
+              path: /a/fake/path
+              variable: pr
+              optional_args:
+                  name: Target1
+
+            - data_source: local
+
+            - data_source: local
+              file_count: 5
+              file_glob_pattern: something for globbing files here
+              variable: pr
+              path: /a/fake/path
+              optional_args:
+                  name: Target1
+
+            - data_source: local
+              file_count: 5
+              variable: pr
+              path: /a/fake/path
+        """
+
+        conf = yaml.safe_load(example_config_yaml)
+        self.valid_local_single = conf[0]
+        self.invalid_local_single = conf[1]
+        self.valid_local_multi = conf[2]
+        self.invalid_local_multi = conf[1]
+        self.invalid_local_multi_file_glob = conf[3]
+
+    def test_valid_local_config_single_file(self):
+        ret = config_runner._valid_dataset_config_data(self.valid_local_single)
+        self.assertTrue(ret)
+
+    def test_valid_local_config_multi_file(self):
+        ret = config_runner._valid_dataset_config_data(self.valid_local_multi)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_local_config(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.invalid_local_single)
+
+        present_keys = set(self.invalid_local_single.keys())
+        missing_keys = self.required_local_keys - present_keys
+        missing = sorted(list(missing_keys))
+
+        error = (
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        mock_logger.error.assert_called_with(error)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_local_config_multi_file(self, mock_logger):
+        # mutlifile config is handled slightly differently. We should see the
+        # same missing keys in this situation as we would on the single file
+        # local config. We will test for a missing file_glob_pattern in a
+        # different test.
+        config_runner._valid_dataset_config_data(self.invalid_local_multi)
+
+        present_keys = set(self.invalid_local_multi.keys())
+        missing_keys = self.required_local_keys - present_keys
+        missing = sorted(list(missing_keys))
+
+        error = (
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        mock_logger.error.assert_called_with(error)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger):
+        # We can't check for the file_glob_pattern pattern until after we have
+        # verified that the single local file config has been met.
+        config_runner._valid_dataset_config_data(self.invalid_local_multi_file_glob)
+
+        mock_logger.error.assert_called_with(
+            'Multi-file local dataset is missing key: file_glob_pattern'
+        )
+
+
+class TestESGFDatasetConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.required_esgf_keys = set([
+            'data_source',
+            'dataset_id',
+            'variable',
+            'esgf_username',
+            'esgf_password'
+        ])
+        example_config_yaml = """
+           - data_source: esgf
+             dataset_id: fake dataset id
+             variable: pr
+             esgf_username: my esgf username
+             esgf_password: my esgf password
+
+           - data_source: esgf
+        """
+        conf = yaml.safe_load(example_config_yaml)
+        self.valid_esgf = conf[0]
+        self.invalid_esgf = conf[1]
+
+    def test_valid_esgf_conf(self):
+        ret = config_runner._valid_dataset_config_data(self.valid_esgf)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_esgf_conf(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.invalid_esgf)
+
+        present_keys = set(self.invalid_esgf.keys())
+        missing_keys = self.required_esgf_keys - present_keys
+        missing = sorted(list(missing_keys))
+
+        error = (
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        mock_logger.error.assert_called_with(error)
+
+
+class TestDAPDatasetConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.required_dap_keys = set(['url', 'variable'])
+        example_config_yaml = """
+           - data_source: dap
+             url: afakeurl.com
+             variable: pr
+
+           - data_source: dap
+        """
+        conf = yaml.safe_load(example_config_yaml)
+        self.valid_dap = conf[0]
+        self.invalid_dap = conf[1]
+
+    def test_valid_dap_config(self):
+        ret = config_runner._valid_dataset_config_data(self.valid_dap)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_dap_config(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.invalid_dap)
+
+        present_keys = set(self.invalid_dap.keys())
+        missing_keys = self.required_dap_keys - present_keys
+        missing = sorted(list(missing_keys))
+
+        error = (
+            'Dataset does not contain required keys. '
+            'The following keys are missing: {}'.format(', '.join(missing))
+        )
+        mock_logger.error.assert_called_with(error)
+
+
+class InvalidDatasetConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        example_config_yaml = """
+            - file_count: 1
+              path: /a/fake/path
+              variable: pr
+
+            - data_source: invalid_location_identifier
+        """
+        conf = yaml.safe_load(example_config_yaml)
+        self.missing_data_source = conf[0]
+        self.invalid_data_source = conf[1]
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_missing_data_source_config(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.missing_data_source)
+        mock_logger.error.assert_called_with(
+            'Dataset does not contain a data_source attribute.'
+        )
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_invalid_data_source(self, mock_logger):
+        config_runner._valid_dataset_config_data(self.invalid_data_source)
+        mock_logger.error.assert_called_with(
+            'Dataset does not contain a valid data_source location.'
+        )
+
+
+class MetricFetchTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        binary_config = """
+            metrics:
+                - Bias
+                - StdDevRatio
+        """
+        unary_config = """
+            metrics:
+                - TemporalStdDev
+        """
+        self.unary_conf = yaml.safe_load(unary_config)
+        self.binary_conf = yaml.safe_load(binary_config)
+
+    def test_contains_binary_metric(self):
+        ret = config_runner._contains_binary_metrics(self.binary_conf['metrics'])
+        self.assertTrue(ret)
+
+    def test_does_not_contain_binary_metric(self):
+        ret = config_runner._contains_binary_metrics(self.unary_conf['metrics'])
+        self.assertFalse(ret)
+
+    def test_contains_unary_metric(self):
+        ret = config_runner._contains_unary_metrics(self.unary_conf['metrics'])
+        self.assertTrue(ret)
+        
+    def test_does_not_contain_unary_metric(self):
+        ret = config_runner._contains_unary_metrics(self.binary_conf['metrics'])
+        self.assertFalse(ret)
+
+
+class TestValidMinimalConfig(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        no_datasets_config = """
+        metrics:
+            - Bias
+        """
+        self.no_datasets = yaml.safe_load(no_datasets_config)
+
+        no_metrics_config = """
+        datasets:
+            reference:
+                data_source: dap
+                url: afakeurl.com
+                variable: pr
+        """
+        self.no_metrics = yaml.safe_load(no_metrics_config)
+
+        unary_with_reference_config = """
+        datasets:
+            reference:
+                data_source: dap
+                url: afakeurl.com
+                variable: pr
+
+        metrics:
+            - TemporalStdDev
+        """
+        self.unary_with_reference = yaml.safe_load(unary_with_reference_config)
+
+        unary_with_target_config = """
+        datasets:
+            targets:
+                - data_source: dap
+                  url: afakeurl.com
+                  variable: pr
+
+        metrics:
+            - TemporalStdDev
+        """
+        self.unary_with_target = yaml.safe_load(unary_with_target_config)
+
+        unary_no_reference_or_target = """
+        datasets:
+            not_ref_or_target:
+                - data_source: dap
+                  url: afakeurl.com
+                  variable: pr
+
+        metrics:
+            - TemporalStdDev
+        """
+        self.unary_no_ref_or_target = yaml.safe_load(unary_no_reference_or_target)
+
+        binary_valid_config = """
+        datasets:
+            reference:
+                data_source: dap
+                url: afakeurl.com
+                variable: pr
+
+            targets:
+                - data_source: dap
+                  url: afakeurl.com
+                  variable: pr
+        metrics:
+            - Bias
+        """
+        self.binary_valid = yaml.safe_load(binary_valid_config)
+
+        binary_no_reference_config = """
+        datasets:
+            targets:
+                - data_source: dap
+                  url: afakeurl.com
+                  variable: pr
+        metrics:
+            - Bias
+        """
+        self.binary_no_reference = yaml.safe_load(binary_no_reference_config)
+
+        binary_no_target_config = """
+        datasets:
+            reference:
+                data_source: dap
+                url: afakeurl.com
+                variable: pr
+
+        metrics:
+            - Bias
+        """
+        self.binary_no_target = yaml.safe_load(binary_no_target_config)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_no_datasets(self, mock_logger):
+        ret = config_runner._valid_minimal_config(self.no_datasets)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'No datasets specified in configuration data.'
+        )
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_no_metrics(self, mock_logger):
+        ret = config_runner._valid_minimal_config(self.no_metrics)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'No metrics specified in configuration data.'
+        )
+
+    def test_unary_with_reference(self):
+        ret = config_runner._valid_minimal_config(self.unary_with_reference)
+        self.assertTrue(ret)
+
+    def test_unary_with_target(self):
+        ret = config_runner._valid_minimal_config(self.unary_with_target)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_unary_no_datasets(self, mock_logger):
+        ret = config_runner._valid_minimal_config(self.unary_no_ref_or_target)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'Unary metric in configuration data requires either a reference '
+            'or target dataset to be present for evaluation. Please ensure '
+            'that your config is well formed.'
+        )
+
+    def test_valid_binary(self):
+        ret = config_runner._valid_minimal_config(self.binary_valid)
+        self.assertTrue(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_binary_no_reference(self, mock_logger):
+        ret = config_runner._valid_minimal_config(self.binary_no_reference)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'Binary metric in configuration requires both a reference '
+            'and target dataset to be present for evaluation. Please ensure '
+            'that your config is well formed.'
+        )
+        
+    @patch('ocw_evaluation_from_config.logger')
+    def test_binary_no_target(self, mock_logger):
+        ret = config_runner._valid_minimal_config(self.binary_no_target)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'Binary metric in configuration requires both a reference '
+            'and target dataset to be present for evaluation. Please ensure '
+            'that your config is well formed.'
+        )
+
+
+class TestIsConfigValid(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        not_minimal_config = """
+            datasets:
+        """
+        self.not_minimal = yaml.safe_load(not_minimal_config)
+
+        not_well_formed_config = """
+        datasets:
+            reference:
+                data_source: local
+                file_count: 1
+                path: /a/fake/path/file.py
+                variable: pr
+
+            targets:
+                - data_source: local
+                  file_count: 5
+                  file_glob_pattern: something for globbing files here
+                  variable: pr
+                  optional_args:
+                      name: Target1
+
+                - data_source: esgf
+                  dataset_id: fake dataset id
+                  variable: pr
+                  esgf_username: my esgf username
+                  esgf_password: my esgf password
+
+        metrics:
+            - Bias
+            - TemporalStdDev
+        """
+        self.not_well_formed = yaml.safe_load(not_well_formed_config)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_not_minimal_config(self, mock_logger):
+        ret = config_runner.is_config_valid(self.not_minimal)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'Insufficient configuration file data for an evaluation'
+        )
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_not_valid_config(self, mock_logger):
+        ret = config_runner.is_config_valid(self.not_well_formed)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            'Configuration data is not well formed'
+        )
+
+
+class TestConfigIsWellFormed(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        malformed_reference_config = """
+            datasets:
+                reference:
+                    data_source: notavalidlocation
+
+            metrics:
+                - Bias
+        """
+        self.malformed_reference_conf = yaml.safe_load(malformed_reference_config)
+
+        malformed_target_list_config = """
+            datasets:
+                targets:
+                    notalist: 
+                        a_key: a_value
+
+                    alsonotalist:
+                        a_key: a_value
+
+            metrics:
+                - Bias
+        """
+        self.malformed_target_list = yaml.safe_load(malformed_target_list_config)
+
+        missing_metric_name_config = """
+            datasets:
+                reference:
+                    data_source: dap
+                    url: afakeurl.com
+                    variable: pr
+
+            metrics:
+                - NotABuiltInMetric
+        """
+        self.missing_metric_name = yaml.safe_load(missing_metric_name_config)
+
+    def test_malformed_reference_config(self):
+        ret = config_runner._config_is_well_formed(self.malformed_reference_conf)
+        self.assertFalse(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_malformed_target_dataset_list(self, mock_logger):
+        ret = config_runner._config_is_well_formed(self.malformed_target_list)
+        self.assertFalse(ret)
+
+        mock_logger.error.assert_called_with(
+            "Expected to find list of target datasets but instead found "
+            "object of type <type 'dict'>"
+        )
+
+    def test_not_builtin_metric(self):
+        ret = config_runner._config_is_well_formed(self.missing_metric_name)
+        self.assertFalse(ret)
+
+    @patch('ocw_evaluation_from_config.logger')
+    def test_warns_regarding_not_builtin_metric(self, mock_logger):
+        ret = config_runner._config_is_well_formed(self.missing_metric_name)
+        mock_logger.warn.assert_called_with(
+            'Unable to locate metric name NotABuiltInMetric in built-in '
+            'metrics. If this is not a user defined metric then please check '
+            'for potential misspellings.'
+        )


Mime
View raw message