hivemall-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From myui <...@git.apache.org>
Subject [GitHub] incubator-hivemall pull request #66: [WIP][HIVEMALL-91] Implement Online LDA
Date Tue, 04 Apr 2017 01:03:22 GMT
Github user myui commented on a diff in the pull request:

    https://github.com/apache/incubator-hivemall/pull/66#discussion_r109558520
  
    --- Diff: core/src/main/java/hivemall/lda/OnlineLDAUDTF.java ---
    @@ -0,0 +1,112 @@
    +package hivemall.lda;
    +
    +import org.apache.commons.cli.CommandLine;
    +import org.apache.commons.cli.Options;
    +import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
    +import org.apache.hadoop.hive.ql.metadata.HiveException;
    +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    +import org.json.CDL;
    +
    +import hivemall.UDTFWithOptions;
    +import hivemall.utils.HivemallUtils;
    +import hivemall.utils.hadoop.HiveUtils;
    +import hivemall.utils.lang.Primitives;
    +
    +public class OnlineLDAUDTF extends UDTFWithOptions{
    +	
    +	private ListObjectInspector _XOI;
    +	
    +	// Learning Parameters
    +	private int _topics;	// The number of topics
    +	private double _alpha;	// The hyperparameter for theta
    +	private double _eta;	// The hyperparameter for beta
    +	private int _totalD;	// The total number of Document
    +	private double _tau0;	// The parameter to control learning speed of Lambda
    +	private double _kappa;	// The parameter to control learning speed of Lambda
    +	private int _batchSize;	// The mini-batch size
    +	
    +	private int _iterations;// The number of Iterations
    +	private double _delta;	// The number for Convergence check
    +	
    +
    +	@Override
    +	protected Options getOptions() {
    +		// TODO Auto-generated method stub
    +		Options opts = new Options();
    +		opts.addOption("k", "topics", true, "The number of Topics");
    +		opts.addOption("a", "alpha", false, "The hyperparameter for theta[Default: 1/K]");
    +		opts.addOption("e", "eta", false, 	"The hyperparameter for beta [Default: 1/K]");
    +		opts.addOption("td", "totalD", false, 	"The total number of Document [Default: 10000]");
    +		opts.addOption("tau", "tau0",  false, 	"The parameter to control learning speed of
Lambda [Default: 64]");
    +		opts.addOption("kappa", false, 	"The parameter to control learning speed of Lambda
[Default: 0.7]");
    +		opts.addOption("iters", "iterations", false, 	"The number of Iterations [default: 1]");
    +		opts.addOption("d", "delta", false, 	"The number for Convergence check [default: 1E-5]");
    +		return opts;
    +	}
    +
    +	@Override
    +	protected CommandLine processOptions(ObjectInspector[] argOIs) throws UDFArgumentException
{
    +		// TODO Auto-generated method stub
    +		int topics = 10;
    +		double alpha = 1./topics;
    +		double eta = 1. / topics;
    +		int totalD = 10000;
    +		double tau0 = 64;
    +		double kappa = 0.7;
    +		int iterations = 1;
    +		double delta = 1E-5;
    +		
    +		CommandLine cl = null;
    +		if(argOIs.length >= 2){
    +			String rawArgs = HiveUtils.getConstString(argOIs[1]);
    +			cl = parseOptions(rawArgs);
    +			topics = Primitives.parseInt(cl.getOptionValue("topics"), topics);
    +			alpha = Primitives.parseDouble(cl.getOptionValue("alpha"), alpha);
    +			eta = Primitives.parseDouble(cl.getOptionValue("eta"), eta);
    +			totalD = Primitives.parseInt(cl.getOptionValue("totalD"), totalD);
    +			tau0 = Primitives.parseDouble(cl.getOptionValue("tau0"), tau0);
    +			kappa = Primitives.parseDouble(cl.getOptionValue("kappa"), kappa);
    +			iterations = Primitives.parseInt(cl.getOptionValue("iterations"), iterations);
    +			delta = Primitives.parseDouble(cl.getOptionValue("delta"), delta);
    +		}
    +		
    +		this._topics = topics;
    +		this._alpha = alpha;
    +		this._eta = eta;
    +		this._totalD = totalD;
    +		this._kappa = kappa;
    +		this._iterations = iterations;
    +		this._delta = delta;
    +
    +		return cl;
    +	}
    +	
    +	@Override
    +	public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException
{
    +		// TODO Auto-generated method stub
    --- End diff --
    
    not implemented yet.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message