hivemall-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From myui <...@git.apache.org>
Subject [GitHub] incubator-hivemall issue #98: [WIP][HIVEMALL-108-2] Support `-iter` option i...
Date Fri, 14 Jul 2017 10:24:10 GMT
Github user myui commented on the issue:

    https://github.com/apache/incubator-hivemall/pull/98
  
    Confirmed that the following system test passes successfully on EMR.
    
    https://github.com/apache/incubator-hivemall/pull/79
    
    ```sql
    set hivevar:n_samples=16281;
    set hivevar:total_steps=32562;
    
    drop table if exists logress_model;
    create table logress_model as
    select
     feature,
     avg(weight) as weight
    from
     (
      select
         train_classifier(add_bias(features), label, '-loss logloss -opt SGD -reg no -eta
simple -total_steps ${total_steps}') as (feature, weight)
         -- logress(add_bias(features), label, '-total_steps ${total_steps}') as (feature,
weight)
      from
         train_x3
     ) t
    group by feature;
    
    WITH test_exploded as (
      select
        rowid,
        label,
        extract_feature(feature) as feature,
        extract_weight(feature) as value
      from
        test LATERAL VIEW explode(add_bias(features)) t AS feature
    ),
    predict as (
      select
        t.rowid,
        sigmoid(sum(m.weight * t.value)) as prob,
        CAST((case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1.0 else 0.0 end)
as FLOAT) as label
      from
        test_exploded t LEFT OUTER JOIN
        logress_model m ON (t.feature = m.feature)
      group by
        t.rowid
    ),
    submit as (
      select
        t.label as actual,
        pd.label as predicted,
        pd.prob as probability
      from
        test t JOIN predict pd
          on (t.rowid = pd.rowid)
    )
    select count(1) / ${n_samples} from submit
    where actual = predicted;
    ```
    
    ```sql
    drop table if exists adagrad_model;
    create table adagrad_model as
    select
     feature,
     avg(weight) as weight
    from
     (
      select
         -- train_adagrad_regr(features, label) as (feature, weight)
         train_regression(features, label, '-loss squaredloss -opt AdaGrad -reg no') as (feature,
weight)
      from
         train_x3
     ) t
    group by feature;
    
    WITH test_exploded as (
      select
        rowid,
        label,
        extract_feature(feature) as feature,
        extract_weight(feature) as value
      from
        test LATERAL VIEW explode(add_bias(features)) t AS feature
    ),
    predict as (
      select
        t.rowid,
        sigmoid(sum(m.weight * t.value)) as prob
      from
        test_exploded t LEFT OUTER JOIN
        adagrad_model m ON (t.feature = m.feature)
      group by
        t.rowid
    ),
    submit as (
      select
        t.label as actual,
        pd.prob as probability
      from
        test t JOIN predict pd
          on (t.rowid = pd.rowid)
    )
    select rmse(probability, actual) from submit;
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message