singa-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [singa] XJDKC commented on a change in pull request #697: New Model Layer Operator API
Date Tue, 02 Jun 2020 02:04:12 GMT

XJDKC commented on a change in pull request #697:
URL: https://github.com/apache/singa/pull/697#discussion_r433580199



##########
File path: python/singa/layer.py
##########
@@ -8,1443 +8,1279 @@
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 # =============================================================================
-""" Python layers wrap the C++ layers to provide simpler construction APIs.
-
-Example usages::
-
-    from singa import layer
-    from singa import tensor
-    from singa import device
-
-    layer.engine = 'cudnn'  # to use cudnn layers
-    dev = device.create_cuda_gpu()
-
-    # create a convolution layer
-    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
-
-    # init param values
-    w, b = conv.param_values()
-    w.guassian(0, 0.01)
-    b.set_value(0)
-    conv.to_device(dev)  # move the layer data onto a CudaGPU device
-
-    x = tensor.Tensor((3, 32, 32), dev)
-    x.uniform(-1, 1)
-    y = conv.foward(True, x)
-
-    dy = tensor.Tensor()
-    dy.reset_like(y)
-    dy.set_value(0.1)
-    # dp is a list of tensors for parameter gradients
-    dx, dp = conv.backward(kTrain, dy)
-"""
-from __future__ import division
-from __future__ import absolute_import
-
-from builtins import str
-from builtins import range
-from builtins import object
-from builtins import set
-
-from . import singa_wrap
-from .proto import model_pb2
-from . import tensor
-
-engine = 'cudnn'
-'''engine is the prefix of layer identifier.
-
-The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
-layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
-For example, CudnnConvolution layer is identified by 'cudnn_convolution';
-'singacpp_convolution' is for Convolution layer;
-Some layers' implementation use only Tensor functions, thererfore they are
-transparent to the underlying devices. For threse layers, they would have
-multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
-singacl_dropout are all for the Dropout layer. In addition, it has an extra
-identifier 'singa', i.e. 'singa_dropout' also stands for the Dropout layer.
-
-engine is case insensitive. Each python layer would create the correct specific
-layer using the engine attribute.
-'''
-
-if singa_wrap.USE_CUDNN:
-    cudnn_version = singa_wrap.CUDNN_VERSION
-else:
-    cudnn_version = 0
-
-
-class Layer(object):
-    '''Base Python layer class.
-
-    Typically, the life cycle of a layer instance includes:
-        1. construct layer without input_sample_shapes, goto 2;
-           construct layer with input_sample_shapes, goto 3;
-        2. call setup to create the parameters and setup other meta fields
-        3. call forward or access layer members
-        4. call backward and get parameters for update
-
-    Args:
-        name (str): layer name
-    '''
-
-    def __init__(self, name, conf=None, **kwargs):
-        if conf is None:
-            self.layer = None  # layer converted by swig
-            self.name = name  # TODO(wangwei) duplicate with self.conf.name
-            self.conf = model_pb2.LayerConf()
-            self.conf.name = name
-            self.param_specs = []
-        else:
-            self.conf = conf
-            self.name = conf.name
-            self.caffe_layer()
-            self.param_specs = []
-
-            # convert caffe proto into singa proto format
-            #   case1: parameters of conv and dense layers
-            #   case2: type of activation layers
-            if (conf.type == 'Convolution' or conf.type == 4) or \
-                    (conf.type == 'InnerProduct' or conf.type == 14):
-                w, b = _construct_param_specs_from_caffe_proto(conf)
-                del conf.param[:]
-                conf.param.extend([w, b])
-                self.param_specs.append(w)
-                self.param_specs.append(b)
-                # print 'conf:\n', conf
-            if conf.type == 'Pooling':
-                conf.pooling_conf.ceil = True
-                # print 'conf:\n', conf
-            elif (conf.type == 'ReLU' or conf.type == 18 or
-                  conf.type == 'Sigmoid' or conf.type == 19 or
-                  conf.type == 'TanH' or conf.type == 23):
-                conf.type = (engine + '_' + conf.type).lower()
-            self.conf = conf
-
-        self.has_setup = False
-
-    def setup(self, in_shapes):
-        '''Call the C++ setup function to create params and set some meta data.
-
-        Args:
-            in_shapes: if the layer accepts a single input Tensor, in_shapes is
-                a single tuple specifying the inpute Tensor shape; if the layer
-                accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor
-        '''
-        if self.has_setup:
-            return
-        if type(in_shapes[0]) is tuple:
-            self.layer.SetupWithMultInputs([list(s) for s in in_shapes],
-                                           self.conf.SerializeToString())
-        else:
-            self.layer.Setup(list(in_shapes), self.conf.SerializeToString())
-        self.has_setup = True
-
-    def caffe_layer(self):
-        '''
-        Create a singa layer based on caffe layer configuration.
-        '''
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
-        if self.conf.type == 'InnerProduct' or self.conf.type == 14:
-            self.layer = _create_layer(engine, 'Dense')
-        else:
-            self.layer = _create_layer(engine, self.conf.type)
-
-    def get_output_sample_shape(self):
-        '''Called after setup to get the shape of the output sample(s).
-
-        Returns:
-            a tuple for a single output Tensor or a list of tuples if this layer
-            has multiple outputs
-        '''
-        assert self.has_setup, \
-            'Must call setup() before get_output_sample_shape()'
-        return self.layer.GetOutputSampleShape()
-
-    def param_names(self):
-        '''
-        Returns:
-            a list of strings, one for the name of one parameter Tensor
-        '''
-        names = []
-        for x in self.param_specs:
-            names.append(x.name)
-        return names
-
-    def param_values(self):
-        '''Return param value tensors.
-
-        Parameter tensors are not stored as layer members because cpp Tensor
-        could be moved onto diff devices due to the change of layer device,
-        which would result in inconsistency.
-
-        Returns:
-            a list of tensors, one for each paramter
-        '''
-        if self.layer is None:
-            return []
-        else:
-            return tensor.from_raw_tensors(self.layer.param_values())
 
-    def forward(self, flag, x):
-        '''Forward propagate through this layer.
+import math
+import numpy as np
+from functools import wraps
 
-        Args:
-            flag: True (kTrain) for training (kEval); False for evaluating;
-                other values for furture use.
-            x (Tensor or list<Tensor>): an input tensor if the layer is
-                connected from a single layer; a list of tensors if the layer
-                is connected from multiple layers.
-
-        Return:
-            a tensor if the layer is connected to a single layer; a list of
-            tensors if the layer is connected to multiple layers;
-        '''
-        assert self.has_setup, 'Must call setup() before forward()'
-        if type(flag) is bool:
-            if flag:
-                flag = model_pb2.kTrain
-            else:
-                flag = model_pb2.kEval
-        if type(x) is list:
-            xs = [t.data for t in x]
-            y = self.layer.ForwardWithMultInputs(flag, xs)
-        else:
-            assert isinstance(x, tensor.Tensor), \
-                'input of %s (type:%s) must be a Tensor or Tensor list'\
-                % (self.name, type(x).__name__)
-            y = self.layer.Forward(flag, x.data)
-        if type(y) is tuple:
-            return tensor.from_raw_tensors(y)
-        else:
-            return tensor.from_raw_tensor(y)
+from singa import utils
+from .tensor import Tensor
+from . import singa_wrap as singa
 
-    def backward(self, flag, dy):
-        '''Backward propagate gradients through this layer.
 
-        Args:
-            flag (int): for future use.
-            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
-                objective loss
-        Return:
-            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of
x
-            , dpi is the gradient of the i-th parameter
-        '''
-        if type(flag) is bool:
-            if flag:
-                flag = model_pb2.kTrain
-            else:
-                flag = model_pb2.kEval
+class LayerMeta(type):
 
-        if type(dy) == list:
-            dys = [t.data for t in dy]
-            ret = self.layer.BackwardWithMultInputs(flag, dys)
-        else:
-            assert isinstance(dy, tensor.Tensor), \
-                'input of %s (type:%s) must be a Tensor or Tensor list'\
-                % (self.name, type(dy).__name__)
-            dys = dy.data
-            ret = self.layer.Backward(flag, dys)
-        if type(ret[0]) is tuple:
-            dxs = tensor.from_raw_tensors(ret[0])
-        else:
-            dxs = tensor.from_raw_tensor(ret[0])
-        return dxs, tensor.from_raw_tensors(ret[1])
+    def init_wrapper(func):
 
-    def to_device(self, device):
-        '''Move layer state tensors onto the given device.
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if len(args) == 0:
+                return
 
-        Args:
-            device: swig converted device, created using singa.device
-        '''
-        if self.layer is not None:
-            self.layer.ToDevice(device)
+            if isinstance(args[0], list):
+                assert len(args) > 0 and isinstance(args[0][0], Tensor), (
+                    'initialize function expects PlaceHolders or Tensors')
+                dev = args[0][0].device
+            else:
+                assert len(args) > 0 and isinstance(args[0], Tensor), (
+                    'initialize function expects PlaceHolders or Tensors')
+                dev = args[0].device
 
-    def as_type(self, dtype):
-        pass
+            self._get_unique_name()
+            prev_state = dev.graph_enabled()
+            dev.EnableGraph(False)
+            func(self, *args, **kwargs)
+            self._initialzied = True
+            dev.EnableGraph(prev_state)
 
-    def __copy__(self):
-        pass
+        return wrapper
 
-    def __deepcopy__(self, memo):
-        pass
+    def __new__(cls, name, bases, attr):
+        if 'initialize' in attr:
+            attr['initialize'] = LayerMeta.init_wrapper(attr['initialize'])
 
+        return super(LayerMeta, cls).__new__(cls, name, bases, attr)
 
-class Dummy(Layer):
-    '''A dummy layer that does nothing but just forwards/backwards the data
-    (the input/output is a single tensor).
-    '''
-
-    def __init__(self, name, input_sample_shape=None):
-        super(Dummy, self).__init__(name)
-        self.output_sample_shape = input_sample_shape
-
-    def get_output_sample_shape(self):
-        return self.output_sample_shape
-
-    def setup(self, input_sample_shape):
-        self.output_sample_shape = input_sample_shape
-        self.has_setup = True
-
-    def forward(self, flag, x):
-        '''Return the input x'''
-        return x
-
-    def backward(self, falg, dy):
-        '''Return dy, []'''
-        return dy, []
-
-
-class Conv2D(Layer):
-    """Construct a layer for 2D convolution.
-
-    Args:
-        nb_kernels (int): num of the channels (kernels) of the input Tensor
-        kernel: an integer or a pair of integers for kernel height and width
-        stride: an integer or a pair of integers for stride height and width
-        border_mode (string): padding mode, case in-sensitive,
-            'valid' -> padding is 0 for height and width
-            'same' -> padding is half of the kernel (floor), the kernel must be
-            odd number.
-        cudnn_prefer (string): the preferred algorithm for cudnn convolution
-            which could be 'fastest', 'autotune', 'limited_workspace' and
-            'no_workspace'
-        workspace_byte_limit(int): max workspace size in MB (default is 512MB)
-        data_format (string): either 'NCHW' or 'NHWC'
-        use_bias (bool): True or False
-        pad: an integer or a pair of integers for padding height and width
-        W_specs (dict): used to specify the weight matrix specs, fields
-            include,
-            'name' for parameter name
-            'lr_mult' for learning rate multiplier
-            'decay_mult' for weight decay multiplier
-            'init' for init method, which could be 'gaussian', 'uniform',
-            'xavier' and ''
-            'std', 'mean', 'high', 'low' for corresponding init methods
-            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-            'regularizer' for regularization, currently support 'l2'
-        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-        name (string): layer name.
-        input_sample_shape: 3d tuple for the shape of the input Tensor
-            without the batchsize, e.g., (channel, height, width) or
-            (height, width, channel)
-    """
 
-    def __init__(self,
-                 name,
-                 nb_kernels,
-                 kernel=3,
-                 stride=1,
-                 border_mode='same',
-                 cudnn_prefer='fastest',
-                 workspace_byte_limit=1024,
-                 data_format='NCHW',
-                 use_bias=True,
-                 W_specs=None,
-                 b_specs=None,
-                 pad=None,
-                 input_sample_shape=None):
-        super(Conv2D, self).__init__(name)
-        assert data_format == 'NCHW', 'Not supported data format: %s ' \
-            'only "NCHW" is enabled currently' % (data_format)
-        conf = self.conf.convolution_conf
-        conf.num_output = nb_kernels
-        conf.prefer = cudnn_prefer
-        conf.workspace_byte_limit = workspace_byte_limit
-        self.kernel = kernel
-        self.stride = stride
-        self.pad = pad
-        self.border_mode = border_mode
-        conf.bias_term = use_bias
-        # TODO(wangwei) enable data format for cpp code
-        # conf.data_format = data_format
-        if W_specs is None:
-            W_specs = {'init': 'xavier'}
-        if 'name' not in W_specs:
-            W_specs['name'] = name + '/weight'
-        wspecs = _construct_param_specs_from_dict(W_specs)
-        self.conf.param.extend([wspecs])
-        self.param_specs.append(wspecs)
-        if use_bias:
-            if b_specs is None:
-                b_specs = {'init': 'constant'}
-            if 'name' not in b_specs:
-                b_specs['name'] = name + '/bias'
-            bspecs = _construct_param_specs_from_dict(b_specs)
-            self.conf.param.extend([bspecs])
-            self.param_specs.append(bspecs)
-
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacl'])
-        self.layer = _create_layer(engine, 'Convolution')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
-
-    def setup(self, in_shape):
-        '''Set up the kernel, stride and padding; then call the C++ setup
-        function to create params and set some meta data.
+class Layer(object, metaclass=LayerMeta):
 
-        Args:
-                in_shapes is a tuple of int for the input sample shape
-        '''
-        if self.has_setup:
-            return
-        _set_kernel_stride_pad(self.conf.convolution_conf, self.kernel,
-                               self.stride, self.border_mode, self.pad,
-                               in_shape)
-        self.layer.Setup(list(in_shape), self.conf.SerializeToString())
-        self.has_setup = True
-
-
-class Conv1D(Conv2D):
-    """Construct a layer for 1D convolution.
-
-    Most of the args are the same as those for Conv2D except the kernel,
-    stride, pad, which is a scalar instead of a tuple.
-    input_sample_shape is a tuple with a single value for the input feature
-    length
-    """
+    sep = '.'
 
-    def __init__(self,
-                 name,
-                 nb_kernels,
-                 kernel=3,
-                 stride=1,
-                 border_mode='same',
-                 cudnn_prefer='fastest',
-                 workspace_byte_limit=1024,
-                 use_bias=True,
-                 W_specs={'init': 'Xavier'},
-                 b_specs={
-                     'init': 'Constant',
-                     'value': 0
-                 },
-                 pad=None,
-                 input_sample_shape=None):
-        pad = None
-        if pad is not None:
-            pad = (0, pad)
-        if input_sample_shape is not None:
-            input_sample_shape = (1, 1, input_sample_shape[0])
-        super(Conv1D, self).__init__(name,
-                                     nb_kernels, (1, kernel), (0, stride),
-                                     border_mode,
-                                     cudnn_prefer,
-                                     workspace_byte_limit,
-                                     use_bias=use_bias,
-                                     pad=pad,
-                                     W_specs=W_specs,
-                                     b_specs=b_specs,
-                                     input_sample_shape=input_sample_shape)
-
-    def get_output_sample_shape(self):
-        shape = self.layer.GetOutputSampleShape()
-        assert len(shape) == 3, 'The output sample shape should be 3D.'\
-            'But the length is %d' % len(shape)
-        return (shape[0], shape[2])
-
-
-class Pooling2D(Layer):
-    '''2D pooling layer providing max/avg pooling.
-
-    All args are the same as those for Conv2D, except the following one
-
-    Args:
-        mode: pooling type, model_pb2.PoolingConf.MAX or
-            model_pb2.PoolingConf.AVE
-
-    '''
+    def __init__(self):
+        self.name = self.__class__.__name__
+        self._initialized = False
+        self._parent = None
+        self._layers = dict()
 
-    def __init__(self,
-                 name,
-                 mode,
-                 kernel=3,
-                 stride=2,
-                 border_mode='same',
-                 pad=None,
-                 data_format='NCHW',
-                 input_sample_shape=None):
-        super(Pooling2D, self).__init__(name)
-        assert data_format == 'NCHW', 'Not supported data format: %s ' \
-            'only "NCHW" is enabled currently' % (data_format)
-        conf = self.conf.pooling_conf
-        conf.pool = mode
-        self.kernel = kernel
-        self.stride = stride
-        self.pad = pad
-        self.border_mode = border_mode
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacl'])
-        self.layer = _create_layer(engine, 'Pooling')
-        if input_sample_shape is not None:
-            self.setup(input_sample_shape)
+    def initialize(self, *input):
+        pass
 
-    def setup(self, in_shape):
-        '''Set up the kernel, stride and padding; then call the C++ setup
-        function to create params and set some meta data.
+    def forward(self, *input):
+        pass
 
-        Args:
-            in_shapes is a tuple of int for the input sample shape
-        '''
-        if self.has_setup:
-            return
-        _set_kernel_stride_pad(self.conf.pooling_conf, self.kernel, self.stride,
-                               self.border_mode, self.pad, in_shape)
-        self.layer.Setup(list(in_shape), self.conf.SerializeToString())
-        self.has_setup = True
+    def __call__(self, *args, **kwargs):
+        if not self._initialized:
+            self.initialize(*args, **kwargs)
+            self._initialized = True
+
+        return self.forward(*args, **kwargs)
+
+    def get_params(self):
+        params = dict()
+        sublayers = self._layers
+        for name, sublayer in sublayers.items():
+            params.update(sublayer.get_params())
+        return params
+
+    def set_params(self, parameters):
+        # set parameters for Layer
+        # input should be either a PyTensor or numpy ndarray.
+        # examples: Layer.set_params(W=np.ones((in, out), dtype=np.float32)),
+        # Layer.set_params(**{'block1':{'linear1':{'W':np.ones((in, out),
+        # dtype=np.float32)}}})
+        sublayers = self._layers
+        for name, sublayer in sublayers.items():
+            sublayer.set_params(parameters)
+
+    def get_states(self):
+        states = dict()
+        sublayers = self._layers
+        for name, sublayer in sublayers.items():
+            states.update(sublayer.get_states())
+        states.update(self.get_params())
+        return states
+
+    def set_states(self, states):
+        sublayers = self._layers
+        for name, sublayer in sublayers.items():
+            sublayer.set_states(states)
+        self.set_params(states)
+
+    def device_check(self, *inputs):
+        x_device = inputs[0].device
+        x_dev_id = x_device.id()
+        for var in inputs:
+            if var.device.id() != x_dev_id:
+                var.to_device(x_device)
+
+    def set_attribute(self, attribute, attribute_value):
+        assert (attribute_value.shape == attribute.shape), "Shape dismatched."
+        if isinstance(attribute_value, Tensor):
+            attribute.reset_like(attribute_value)
+            attribute.copy_data(attribute_value)
+        elif isinstance(attribute_value, np.ndarray):
+            attribute.copy_from_numpy(attribute_value)
+        else:
+            raise ValueError("attributes should be Tensor or Numpy array.")
+
+    def _get_unique_name(self):
+        prefix = ''

Review comment:
       Not yet. It just gets the name of the parent layer, and then adds the separator to
become the name of the current layer. Since we update name by calling the initialize function
and all layers are initialized from top to bottom, so we can ensure that each name is unique.

   steps:
   * [set local name for sublayers](https://github.com/apache/singa/blob/8b1a49dab2b38bdd2cb5ba0bdeb94696e8e1dc6b/python/singa/layer.py#L156)
   * [get unique names when initializing sublayers](https://github.com/apache/singa/blob/8b1a49dab2b38bdd2cb5ba0bdeb94696e8e1dc6b/python/singa/layer.py#L46)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message