spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbradley <...@git.apache.org>
Subject [GitHub] spark pull request: [SPARK-11939] [ML] [PySpark] PySpark support m...
Date Tue, 12 Jan 2016 02:25:24 GMT
Github user jkbradley commented on a diff in the pull request:

    https://github.com/apache/spark/pull/10469#discussion_r49408931
  
    --- Diff: python/pyspark/ml/util.py ---
    @@ -52,3 +71,141 @@ def _randomUID(cls):
             concatenates the class name, "_", and 12 random hex chars.
             """
             return cls.__name__ + "_" + uuid.uuid4().hex[12:]
    +
    +
    +@inherit_doc
    +class MLWriter(object):
    +    """
    +    Abstract class for utility classes that can save ML instances.
    +
    +    .. versionadded:: 2.0.0
    +    """
    +
    +    def __init__(self, instance):
    +        self._jwrite = instance._java_obj.write()
    +
    +    @since("2.0.0")
    +    def save(self, path):
    +        """Saves the ML instances to the input path."""
    +        self._jwrite.save(path)
    +
    +    @since("2.0.0")
    +    def overwrite(self):
    +        """Overwrites if the output path already exists."""
    +        self._jwrite.overwrite()
    +        return self
    +
    +    @since("2.0.0")
    +    def context(self, sqlContext):
    +        """Sets the SQL context to use for saving."""
    +        self._jwrite.context(sqlContext._ssql_ctx)
    +        return self
    +
    +
    +@inherit_doc
    +class MLWritable(object):
    +    """
    +    Mixin for ML instances that provide MLWriter through their Scala
    +    implementation.
    +
    +    .. versionadded:: 2.0.0
    +    """
    +
    +    @since("2.0.0")
    +    def write(self):
    +        """Returns an MLWriter instance for this ML instance."""
    +        return MLWriter(self)
    +
    +    @since("2.0.0")
    +    def save(self, path):
    +        """Save this ML instance to the given path, a shortcut of `write().save(path)`."""
    +        if not isinstance(path, basestring):
    +            raise TypeError("path should be a basestring, got type %s" % type(path))
    +        self._java_obj.save(path)
    +
    +
    +@inherit_doc
    +class MLReader(object):
    +    """
    +    Abstract class for utility classes that can load ML instances.
    +
    +    .. versionadded:: 2.0.0
    +    """
    +
    +    def __init__(self, instance):
    +        self._instance = instance
    +        self._jread = instance._java_obj.read()
    +
    +    @since("2.0.0")
    +    def load(self, path):
    +        """Loads the ML component from the input path."""
    +        self._instance.load(path)
    +
    +    @since("2.0.0")
    +    def context(self, sqlContext):
    +        """Sets the SQL context to use for loading."""
    +        self._jread.context(sqlContext._ssql_ctx)
    +        return self
    +
    +
    +@inherit_doc
    +class MLReadable(object):
    +    """
    +    Mixin for objects that provide MLReader using its Scala implementation.
    +
    +    .. versionadded:: 2.0.0
    +    """
    +
    +    @classmethod
    +    def _java_loader_class(cls):
    +        """
    +        Returns the full class name of the Java loader. The default
    +        implementation replaces "pyspark" by "org.apache.spark" in
    +        the Python full class name.
    +        """
    +        java_package = cls.__module__.replace("pyspark", "org.apache.spark")
    +        return ".".join([java_package, cls.__name__])
    +
    +    @classmethod
    +    def _load_java(cls, path):
    +        """
    +        Load a Java model from the given path.
    +        """
    +        java_class = cls._java_loader_class()
    +        java_obj = _jvm()
    +        for name in java_class.split("."):
    +            java_obj = getattr(java_obj, name)
    +        return java_obj.load(path)
    +
    +    @classmethod
    +    @since("2.0.0")
    +    def read(self):
    +        """Returns an MLReader instance for this class."""
    +        return MLReader(self)
    +
    +
    +@inherit_doc
    +class TransformerMLReadable(MLReadable):
    --- End diff --
    
    I'm hoping this and EstimatorMLReadable will not be needed if we can make a generic MLReadable
which works for any JavaWrapper type.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message