hivemall-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From myui <...@git.apache.org>
Subject [GitHub] incubator-hivemall issue #13: [WIP] Kernelized Passive-Aggressive Algorithm ...
Date Thu, 12 Jan 2017 17:24:47 GMT
Github user myui commented on the issue:

    https://github.com/apache/incubator-hivemall/pull/13
  
    Usage
    
    ```sql
    use a9a;
    
    create external table train (
      rowid int,
      label float,
      features ARRAY<STRING>
    ) ROW FORMAT DELIMITED 
      FIELDS TERMINATED BY '\t' 
      COLLECTION ITEMS TERMINATED BY "," 
    STORED AS TEXTFILE LOCATION 's3://myui-dev/Datasets/a9a/train/';
    
    create external table test (
      rowid int, 
      label float,
      features ARRAY<STRING>
    ) ROW FORMAT DELIMITED 
      FIELDS TERMINATED BY '\t' 
      COLLECTION ITEMS TERMINATED BY "," 
    STORED AS TEXTFILE LOCATION 's3://myui-dev/Datasets/a9a/test/';
    
    create or replace view train_x3
    as
    select 
      * 
    from (
      select
         amplify(3, rowid, label, features) as (rowid, label, features)
      from  
         train 
    ) t
    CLUSTER BY rand(31);
    
    create or replace view test_exploded as
    select 
      t1.rowid,
      t2.h,
      t2.hk,
      t2.Xh,
      t2.Xk
    from
      test t1
      LATERAL VIEW feature_pairs(features, "-kpa") t2 as h, hk, Xh, Xk;
      
    drop table kpa_model;
    create table kpa_model as
    select 
     feature,
     avg(w0) as w0,
     avg(w1) as w1,
     avg(w2) as w2,
     avg(w3) as w3
    from 
     (select 
         train_kpa(features,label,"-c 0.01") as (feature, w0, w1, w2, w3)
      from 
         train
         -- train_x3
     ) t 
    group by feature;
    
    create or replace view kpa_predict 
    as
    WITH p1 as (
    select
      t1.rowid, 
      kpa_predict(
        t1.Xh, -- nonnull
        t1.Xk, -- nonnull
        m1.w0, -- nullable
        m1.w1, -- nonnull
        m1.w2, -- nonnull
        m2.w3 -- nullable
      ) as score
    from 
      test_exploded t1
      LEFT OUTER JOIN kpa_model m1 ON (m1.feature = t1.h)
      LEFT OUTER JOIN kpa_model m2 ON (m2.feature = t1.hk)
    group by
      rowid
    )
    select
      rowid,
      case when score > 0.0 then 1 else 0 end as label
    from
      p1;
    
    create or replace view kpa_submit as
    select 
      t.label as actual, 
      p.label as predicted
    from 
      test t 
      JOIN kpa_predict p on (t.rowid = p.rowid);
    
    select count(1)/16281 from kpa_submit 
    where actual = predicted;
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message