spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbradley <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-13068][PYSPARK][ML] Type conversion for...
Date Tue, 22 Mar 2016 16:58:29 GMT
Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11663#discussion_r57024858
  
    --- Diff: python/pyspark/ml/param/__init__.py ---
    @@ -65,6 +75,146 @@ def __eq__(self, other):
                 return False
     
     
    +class TypeConverters(object):
    +    """
    +    .. note:: DeveloperApi
    +
    +    Factory methods for common type conversion functions for `Param.typeConverter`.
    +
    +    .. versionadded:: 2.0.0
    +    """
    +
    +    @staticmethod
    +    def _is_numeric(value):
    +        vtype = type(value)
    +        return vtype in [int, float, np.float64, np.int64] or vtype.__name__ == 'long'
    +
    +    @staticmethod
    +    def _is_integer(value):
    +        if TypeConverters._is_numeric(value):
    +            value = float(value)
    +            return value.is_integer()
    +        else:
    +            return False
    +
    +    @staticmethod
    +    def _can_convert_to_list(value):
    +        vtype = type(value)
    +        return vtype == list or vtype == np.ndarray or isinstance(value, Vector)
    +
    +    @staticmethod
    +    def _is_string(value):
    +        vtype = type(value)
    +        return vtype in [str, np.unicode_, np.string_, np.str_] or type(value).__name__
== 'unicode'
    +
    +    @staticmethod
    +    def identity(value):
    +        """
    +        Dummy converter that just returns value.
    +        """
    +        return value
    +
    +    @staticmethod
    +    def toList(value):
    +        """
    +        Convert a value to a list, if possible.
    +        """
    +        if type(value) == list:
    +            return value
    +        elif type(value) == np.ndarray:
    +            return list(value)
    +        elif isinstance(value, Vector):
    +            return list(value.toArray())
    +        else:
    +            raise TypeError("Could not convert %s to list" % value)
    +
    +    @staticmethod
    +    def toListFloat(value):
    +        """
    +        Convert a value to list of floats, if possible.
    +        """
    +        if TypeConverters._can_convert_to_list(value):
    +            value = TypeConverters.toList(value)
    +            if all(map(lambda v: TypeConverters._is_numeric(v), value)):
    +                return list(map(lambda v: float(v), value))
    +        raise TypeError("Could not convert %s to list of floats" % value)
    +
    +    @staticmethod
    +    def toListInt(value):
    +        """
    +        Convert a value to list of ints, if possible.
    +        """
    +        if TypeConverters._can_convert_to_list(value):
    +            value = TypeConverters.toList(value)
    +            if all(map(lambda v: TypeConverters._is_integer(v), value)):
    +                return list(map(lambda v: int(v), value))
    +        raise TypeError("Could not convert %s to list of ints" % value)
    +
    +    @staticmethod
    +    def toListString(value):
    +        """
    +        Convert a value to list of strings, if possible.
    +        """
    +        if TypeConverters._can_convert_to_list(value):
    +            value = TypeConverters.toList(value)
    +            if all(map(lambda v: TypeConverters._is_string(v), value)):
    +                return list(map(lambda v: str(v), value))
    +        raise TypeError("Could not convert %s to list of strings" % value)
    +
    +    @staticmethod
    +    def toVector(value):
    +        """
    +        Convert a value to a MLlib Vector, if possible.
    +        """
    +        if isinstance(value, Vector):
    +            return value
    +        elif TypeConverters._can_convert_to_list(value):
    +            value = TypeConverters.toList(value)
    +            if all(map(lambda v: TypeConverters._is_numeric(v), value)):
    +                return DenseVector(value)
    +        raise TypeError("Could not convert %s to vector" % value)
    +
    +    @staticmethod
    +    def toFloat(value):
    +        """
    +        Convert a value to a float, if possible.
    +        """
    +        if TypeConverters._is_numeric(value):
    +            return float(value)
    +        else:
    +            raise TypeError("Could not convert %s to float" % value)
    +
    +    @staticmethod
    +    def toInt(value):
    +        """
    +        Convert a value to an int, if possible.
    +        """
    +        if TypeConverters._is_integer(value):
    +            return int(value)
    +        else:
    +            raise TypeError("Could not convert %s to int" % value)
    +
    +    @staticmethod
    +    def toString(value):
    +        """
    +        Convert a value to a string, if possible.
    +        """
    +        if TypeConverters._is_string(value):
    +            return str(value)
    +        else:
    +            raise TypeError("Could not convert %s to string" % value)
    +
    +    @staticmethod
    +    def toBoolean(value):
    +        """
    +        Convert a value to a boolean, if possible.
    +        """
    +        if type(value) == bool:
    +            return value
    +        else:
    +            raise TypeError("Could not convert %s to bool" % value)
    --- End diff --
    
    Since this is a bit strict, maybe we should say in the error: "Boolean Param requires
value of type bool.  Found type %s"


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message