spot-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From everl...@apache.org
Subject [3/5] incubator-spot git commit: Adding migration scripts from Spot 0.9 to 1.0
Date Wed, 21 Jun 2017 22:21:41 GMT
Adding migration scripts from Spot 0.9 to 1.0


Project: http://git-wip-us.apache.org/repos/asf/incubator-spot/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spot/commit/ad512012
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spot/tree/ad512012
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spot/diff/ad512012

Branch: refs/heads/SPOT-35_graphql_api
Commit: ad512012d7447b13343467b279633f86fdbeec38
Parents: a55e09f
Author: Moy Valdovinos <moy8011@gmail.com>
Authored: Mon Jun 5 19:34:11 2017 -0500
Committer: Everardo Lopez Sandoval (Intel) <elopezsa@elopezsa-mac02.zpn.intel.com>
Committed: Wed Jun 21 17:20:53 2017 -0500

----------------------------------------------------------------------
 .../migration/create_dns_migration_tables.hql   |  96 ++++++++++
 .../migration/create_flow_migration_tables.hql  | 108 +++++++++++
 .../migration/create_proxy_migration_tables.hql |  99 ++++++++++
 spot-setup/migration/migrate_old_dns_data.sh    | 158 ++++++++++++++++
 spot-setup/migration/migrate_old_flow_data.sh   | 183 +++++++++++++++++++
 spot-setup/migration/migrate_old_proxy_data.sh  | 138 ++++++++++++++
 6 files changed, 782 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/create_dns_migration_tables.hql
----------------------------------------------------------------------
diff --git a/spot-setup/migration/create_dns_migration_tables.hql b/spot-setup/migration/create_dns_migration_tables.hql
new file mode 100644
index 0000000..c85fd0f
--- /dev/null
+++ b/spot-setup/migration/create_dns_migration_tables.hql
@@ -0,0 +1,96 @@
+CREATE DATABASE ${var:dbname};
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_dendro_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_dendro_tmp (
+dns_a STRING,
+dns_qry_name STRING,
+ip_dst STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/dendro'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_edge_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_edge_tmp ( 
+frame_time STRING,
+frame_len STRING,
+ip_dst STRING,
+ip_src STRING,
+dns_qry_name STRING,
+dns_qry_class STRING,
+dns_qry_type STRING,
+dns_qry_rcode STRING,
+dns_a STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/edge'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_ingest_summary_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_ingest_summary_tmp ( 
+tdate STRING,
+total BIGINT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/summary'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_scores_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_scores_tmp ( 
+frame_time STRING,
+frame_len BIGINT,
+ip_dst STRING, 
+dns_qry_name STRING, 
+dns_qry_class STRING,
+dns_qry_type INT,
+dns_qry_rcode INT, 
+ml_score FLOAT,
+tld STRING,
+query_rep STRING,
+hh INT,
+ip_sev INT,
+dns_sev INT,
+dns_qry_class_name STRING, 
+dns_qry_type_name STRING,
+dns_qry_rcode_name STRING, 
+network_context STRING,
+unix_tstamp BIGINT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/scores'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_storyboard_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_storyboard_tmp ( 
+ip_threat STRING,
+dns_threat STRING, 
+title STRING,
+text STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/storyboard'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.dns_threat_dendro_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.dns_threat_dendro_tmp (
+anchor STRING, 
+total BIGINT,
+dns_qry_name STRING, 
+ip_dst STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/dns/threat_dendro'
+TBLPROPERTIES ('skip.header.line.count'='1');
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/create_flow_migration_tables.hql
----------------------------------------------------------------------
diff --git a/spot-setup/migration/create_flow_migration_tables.hql b/spot-setup/migration/create_flow_migration_tables.hql
new file mode 100644
index 0000000..0ba11cb
--- /dev/null
+++ b/spot-setup/migration/create_flow_migration_tables.hql
@@ -0,0 +1,108 @@
+CREATE DATABASE ${var:dbname};
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_scores_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_scores_tmp (
+sev int, 
+tstart STRING, 
+srcip STRING,
+dstip STRING,
+sport INT, 
+dport INT, 
+proto STRING,
+flag STRING,
+ipkt INT,
+ibyt INT,
+lda_score FLOAT,
+rank INT,
+srcIpInternal INT,
+destIpInternal INT,
+srcGeo STRING, 
+dstGeo STRING, 
+srcDomain STRING, 
+dstDomain STRING, 
+srcIP_rep STRING,
+dstIP_rep STRING 
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/flow/scores/'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_chords_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_chords_tmp (
+srcip STRING,
+dstip STRING,
+ibyt BIGINT, 
+ipkt BIGINT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/flow/chords'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_edge_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_edge_tmp (
+tstart STRING, 
+srcip STRING,
+dstip STRING,
+sport INT, 
+dport INT, 
+proto STRING,
+flags STRING,
+tos INT, 
+ibyt BIGINT, 
+ipkt BIGINT, 
+input BIGINT,
+output BIGINT, 
+rip STRING,
+obyt BIGINT, 
+opkt BIGINT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/flow/edge'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_ingest_summary_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_ingest_summary_tmp (
+tdate STRING,
+total BIGINT 
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/flow/summary';
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_storyboard_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_storyboard_tmp (
+ip_threat STRING,
+title STRING,
+text STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+LOCATION '${var:hpath}/flow/storyboard'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.flow_timeline_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.flow_timeline_tmp (
+tstart STRING, 
+tend STRING, 
+srcip STRING,
+dstip STRING,
+proto STRING,
+sport INT,
+dport INT, 
+ipkt BIGINT, 
+ibyt BIGINT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/flow/timeline'
+TBLPROPERTIES ('skip.header.line.count'='1');
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/create_proxy_migration_tables.hql
----------------------------------------------------------------------
diff --git a/spot-setup/migration/create_proxy_migration_tables.hql b/spot-setup/migration/create_proxy_migration_tables.hql
new file mode 100644
index 0000000..56826f4
--- /dev/null
+++ b/spot-setup/migration/create_proxy_migration_tables.hql
@@ -0,0 +1,99 @@
+CREATE DATABASE ${var:dbname};
+
+
+DROP TABLE IF EXISTS ${var:dbname}.proxy_edge_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_edge_tmp ( 
+tdate STRING,
+time STRING, 
+clientip STRING, 
+host STRING, 
+webcat STRING, 
+respcode STRING, 
+reqmethod STRING,
+useragent STRING,
+resconttype STRING,
+referer STRING,
+uriport STRING,
+serverip STRING, 
+scbytes INT, 
+csbytes INT, 
+fulluri STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/proxy/edge'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.proxy_ingest_summary_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_ingest_summary_tmp ( 
+tdate STRING,
+total BIGINT 
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+LOCATION '${var:hpath}/proxy/summary'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.proxy_scores_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_scores_tmp ( 
+tdate STRING,
+time STRING, 
+clientip STRING, 
+host STRING, 
+reqmethod STRING,
+useragent STRING,
+resconttype STRING,
+duration INT,
+username STRING, 
+webcat STRING, 
+referer STRING,
+respcode INT,
+uriport INT, 
+uripath STRING,
+uriquery STRING, 
+serverip STRING, 
+scbytes INT, 
+csbytes INT, 
+fulluri STRING,
+word STRING, 
+ml_score FLOAT,
+uri_rep STRING,
+uri_sev INT,
+respcode_name STRING,
+network_context STRING,
+score_hash STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/proxy/scores'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.proxy_storyboard_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_storyboard_tmp ( 
+p_threat STRING, 
+title STRING,
+text STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+LOCATION '${var:hpath}/proxy/storyboard'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+
+DROP TABLE IF EXISTS ${var:dbname}.proxy_timeline_tmp;
+
+CREATE EXTERNAL TABLE ${var:dbname}.proxy_timeline_tmp ( 
+tstart STRING, 
+tend STRING, 
+duration BIGINT, 
+clientip STRING, 
+respcode STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+LOCATION '${var:hpath}/proxy/timeline'
+TBLPROPERTIES ('skip.header.line.count'='1');
+
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/migrate_old_dns_data.sh
----------------------------------------------------------------------
diff --git a/spot-setup/migration/migrate_old_dns_data.sh b/spot-setup/migration/migrate_old_dns_data.sh
new file mode 100755
index 0000000..8fa7eb7
--- /dev/null
+++ b/spot-setup/migration/migrate_old_dns_data.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+
+OLD_DATA_PATH=$1
+STAGING_DB=$2
+HDFS_STAGING_PATH=$3
+DEST_DB=$4
+IMPALA_DEM=$5
+
+# Execution example:
+#./migrate_old_dns_data.sh '/home/spot/spot-csv-data' 'spot_migration' '/user/duxbury/spot_migration/'
'migrated' 'node01'
+
+# OLD_DATA_PATH='/home/spot/spot-csv-data'
+# STAGING_DB='spot_migration'
+# HDFS_STAGING_PATH='/user/spot/spot_migration/'
+# DEST_DB='migrated'
+# IMPALA_DEM='node01'
+
+
+hadoop fs -mkdir $HDFS_STAGING_PATH
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/scores/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/dendro/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/edge/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/summary/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/storyboard/
+hadoop fs -mkdir $HDFS_STAGING_PATH/dns/threat_dendro/
+hdfs dfs -setfacl -R -m user:impala:rwx $HDFS_STAGING_PATH
+
+#Creating Staging tables in Impala
+impala-shell -i ${IMPALA_DEM} --var=hpath=${HDFS_STAGING_PATH} --var=dbname=${STAGING_DB}
-c -f create_dns_migration_tables.hql
+
+DAYS=$OLD_DATA_PATH/dns/*
+
+for dir in $DAYS
+do
+  #break
+  #echo $dir
+  day="$(basename $dir)"
+  #echo $day
+  echo "Processing day $day ..."
+  y=${day:0:4}
+  m=$(expr ${day:4:2} + 0)
+  d=$(expr ${day:6:2} + 0)
+  echo $y $m $d $d2
+  echo $dir
+
+
+  ## dns Scores and dns_threat_investigation
+  echo "Processing dns Scores"
+  if [ -f $dir/dns_scores.csv ]
+  then
+    command="LOAD DATA LOCAL INPATH '$dir/dns_scores.csv' OVERWRITE INTO TABLE $STAGING_DB.dns_scores_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.dns_scores PARTITION (y=$y, m=$m, d=$d) 
+select frame_time, unix_tstamp, frame_len, ip_dst, dns_qry_name, dns_qry_class, dns_qry_type,
dns_qry_rcode, ml_score, tld, query_rep, hh, dns_qry_class_name, dns_qry_type_name, dns_qry_rcode_name,
network_context
+from $STAGING_DB.dns_scores_tmp;"
+    echo $command
+    hive -e "$command"
+
+    echo "Processing dns Threat Investigation"
+    command="INSERT INTO $DEST_DB.dns_threat_investigation PARTITION (y=$y, m=$m, d=$d) 
+select unix_tstamp, ip_dst, dns_qry_name, ip_sev, dns_sev
+from $STAGING_DB.dns_scores_tmp
+where ip_sev > 0 or dns_sev > 0;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ## dns Dendro
+  echo "Processing dns Dendro"
+  dendro_files=`ls $dir/dendro*.csv`
+  #echo $dendro_files
+  if [ ! -z "$dendro_files" ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/dendro*.csv' OVERWRITE INTO TABLE $STAGING_DB.dns_dendro_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.dns_dendro PARTITION (y=$y, m=$m, d=$d) 
+select unix_timestamp('$y${day:4:2}${day:6:2}', 'yyyyMMMdd'), dns_a, dns_qry_name, ip_dst
+from $STAGING_DB.dns_dendro_tmp;"
+    echo $command 
+    hive -e "$command"
+
+  fi
+
+  ## dns Edge
+  echo "Processing dns Edge"
+  edge_files=`ls $dir/edge*.csv`
+  #echo $edge_files
+  if [ ! -z "$edge_files" ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/edge*.csv' OVERWRITE INTO TABLE $STAGING_DB.dns_edge_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.dns_edge PARTITION (y=$y, m=$m, d=$d) 
+select unix_timestamp(regexp_replace(frame_time, '\"', ''), 'MMMMM dd yyyy H:mm:ss.SSS z'),
frame_len, ip_dst, ip_src, dns_qry_name, dns_qry_class, dns_qry_type, dns_qry_rcode, dns_a,
0, '', '', '', ''
+from $STAGING_DB.dns_edge_tmp;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##dns_ingest_summary
+
+  ##dns_storyboard
+  echo "Processing dns Storyboard"
+  if [ -f $dir/threats.csv ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/threats.csv' OVERWRITE INTO TABLE $STAGING_DB.dns_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.dns_storyboard PARTITION (y=$y, m=$m, d=$d) 
+select ip_threat, dns_threat, title, text from $STAGING_DB.dns_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##dns_threat_dendro
+  echo "Processing dns Threat Dendro"
+  threat_dendro_files=`ls $dir/threat-dendro*.csv`
+  #echo $threat_dendro_files
+  if [ ! -z "$threat_dendro_files" ]
+  then
+    for file in $threat_dendro_files
+    do
+      #echo $file
+      filename="$(basename $file)"
+      ip="${filename%.tsv}"
+      ip="${ip#threat-dendro-}"
+      echo $filename $ip
+
+      command="LOAD DATA LOCAL INPATH '$file' OVERWRITE INTO TABLE $STAGING_DB.dns_threat_dendro_tmp;"
+      echo $command
+      hive -e "$command"
+
+      command="INSERT INTO $DEST_DB.dns_threat_dendro PARTITION (y=$y, m=$m, d=$d) 
+select '$ip', total, dns_qry_name, ip_dst
+from $STAGING_DB.dns_threat_dendro_tmp;"
+      echo $command
+      hive -e "$command"
+
+    done
+  fi
+done
+
+impala-shell -i ${IMPALA_DEM} -q "INVALIDATE METADATA;"
+
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/migrate_old_flow_data.sh
----------------------------------------------------------------------
diff --git a/spot-setup/migration/migrate_old_flow_data.sh b/spot-setup/migration/migrate_old_flow_data.sh
new file mode 100755
index 0000000..61556e0
--- /dev/null
+++ b/spot-setup/migration/migrate_old_flow_data.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+OLD_DATA_PATH=$1
+STAGING_DB=$2
+HDFS_STAGING_PATH=$3
+DEST_DB=$4
+IMPALA_DEM=$5
+
+# Execution example:
+#./migrate_old_flow_data.sh '/home/spot/spot-csv-data' 'spot_migration' '/user/duxbury/spot_migration/'
'migrated' 'node01'
+
+# OLD_DATA_PATH='/home/spot/spot-csv-data'
+# STAGING_DB='spot_migration'
+# HDFS_STAGING_PATH='/user/spot/spot_migration/'
+# DEST_DB='migrated'
+# IMPALA_DEM='node01'
+
+
+hadoop fs -mkdir $HDFS_STAGING_PATH
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/scores/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/chords/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/edge/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/summary/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/storyboard/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/threat_investigation/
+hadoop fs -mkdir $HDFS_STAGING_PATH/flow/timeline/
+hdfs dfs -setfacl -R -m user:impala:rwx $HDFS_STAGING_PATH
+
+#Creating Staging tables in Impala
+impala-shell -i ${IMPALA_DEM} --var=hpath=${HDFS_STAGING_PATH} --var=dbname=${STAGING_DB}
-c -f create_flow_migration_tables.hql
+
+
+# ##flow_ingest_summary
+# echo "Processing Flow Ingest Summary"
+
+# if [ -f $OLD_DATA_PATH/flow/ingest_summary/is-*.csv ]
+#   then
+#     command="LOAD DATA LOCAL INPATH '$OLD_DATA_PATH/flow/ingest_summary/is-*.csv' OVERWRITE
INTO TABLE $STAGING_DB.flow_ingest_summary_tmp;"
+#     echo $command
+#     hive -e "$command"
+
+#     command="INSERT INTO $DEST_DB.flow_ingest_summary PARTITION (y=$y, m=$m, d=$d) 
+# select tdate, total
+# from $STAGING_DB.flow_ingest_summary_tmp;"
+#     echo $command
+#     hive -e "$command"
+
+#   fi
+
+
+DAYS=$OLD_DATA_PATH/flow/*
+
+for dir in $DAYS
+do
+  #break
+  #echo $dir
+  day="$(basename $dir)"
+  #echo $day
+  echo "Processing day $day ..."
+  y=${day:0:4}
+  m=$(expr ${day:4:2} + 0)
+  d=$(expr ${day:6:2} + 0)
+  echo $y $m $d $d2
+  echo $dir
+
+
+  ## Flow Scores and flow_threat_investigation
+  echo "Processing Flow Scores"
+  if [ -f $dir/flow_scores.csv ]
+  then
+    command="LOAD DATA LOCAL INPATH '$dir/flow_scores.csv' OVERWRITE INTO TABLE $STAGING_DB.flow_scores_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.flow_scores PARTITION (y=$y, m=$m, d=$d) 
+select tstart,srcip,dstip,sport,dport,proto,ipkt,ibyt,0,0,lda_score,rank,srcIpInternal,destIpInternal,srcGeo,dstGeo,srcDomain,dstDomain,srcIP_rep,dstIP_rep

+from $STAGING_DB.flow_scores_tmp;"
+    echo $command
+    hive -e "$command"
+
+    echo "Processing Flow Threat Investigation"
+    command="INSERT INTO $DEST_DB.flow_threat_investigation PARTITION (y=$y, m=$m, d=$d)

+select tstart,srcip,dstip,sport,dport,sev
+from $STAGING_DB.flow_scores_tmp
+where sev > 0;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ## Flow Chords
+  echo "Processing Flow Chords"
+  chord_files=`ls $dir/chord*.tsv`
+  #echo $chord_files
+  if [ ! -z "$chord_files" ]
+  then
+    for file in $chord_files
+    do
+      #echo $file
+      filename="$(basename $file)"
+      ip="${filename%.tsv}"
+      ip="${ip#chord-}"
+      ip="${ip//_/.}"
+      echo $filename $ip
+
+      command="LOAD DATA LOCAL INPATH '$file' OVERWRITE INTO TABLE $STAGING_DB.flow_chords_tmp;"
+      echo $command
+      hive -e "$command"
+
+      command="INSERT INTO $DEST_DB.flow_chords PARTITION (y=$y, m=$m, d=$d) 
+  select '$ip', srcip, dstip, ibyt, ipkt from $STAGING_DB.flow_chords_tmp;"
+      echo $command
+      hive -e "$command"
+
+    done
+  fi
+
+  ## Flow Edge
+  echo "Processing Flow Edge"
+  edge_files=`ls $dir/edge*.tsv`
+  #echo $edge_files
+  if [ ! -z "$edge_files" ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/edge*.tsv' OVERWRITE INTO TABLE $STAGING_DB.flow_edge_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.flow_edge PARTITION (y=$y, m=$m, d=$d) 
+select tstart, srcip, dstip, sport, dport, proto, flags, tos, ibyt, ipkt, input, output,
rip, obyt, opkt, 0, 0
+from $STAGING_DB.flow_edge_tmp
+where srcip is not NULL;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##flow_storyboard
+  echo "Processing Flow Storyboard"
+  if [ -f $dir/threats.csv ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/threats.csv' OVERWRITE INTO TABLE $STAGING_DB.flow_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.flow_storyboard PARTITION (y=$y, m=$m, d=$d) 
+select ip_threat, title, text from $STAGING_DB.flow_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##flow_timeline
+  echo "Processing Flow Timeline"
+  timeline_files=`ls $dir/sbdet*.tsv`
+  #echo $timeline_files
+  if [ ! -z "$timeline_files" ]
+  then
+    for file in $timeline_files
+    do
+      #echo $file
+      filename="$(basename $file)"
+      ip="${filename%.tsv}"
+      ip="${ip#sbdet-}"
+      echo $filename $ip
+
+      command="LOAD DATA LOCAL INPATH '$file' OVERWRITE INTO TABLE $STAGING_DB.flow_timeline_tmp;"
+      echo $command
+      hive -e "$command"
+
+      command="INSERT INTO $DEST_DB.flow_timeline PARTITION (y=$y, m=$m, d=$d) 
+  select '$ip', tstart, tend, srcip, dstip, proto, sport, dport, ipkt, ibyt from $STAGING_DB.flow_timeline_tmp;"
+      echo $command
+      hive -e "$command"
+
+    done
+  fi
+done
+
+impala-shell -i ${IMPALA_DEM} -q "INVALIDATE METADATA;"
+
+

http://git-wip-us.apache.org/repos/asf/incubator-spot/blob/ad512012/spot-setup/migration/migrate_old_proxy_data.sh
----------------------------------------------------------------------
diff --git a/spot-setup/migration/migrate_old_proxy_data.sh b/spot-setup/migration/migrate_old_proxy_data.sh
new file mode 100755
index 0000000..3c3129c
--- /dev/null
+++ b/spot-setup/migration/migrate_old_proxy_data.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+# OLD_DATA_PATH=$1
+# STAGING_DB=$2
+# HDFS_STAGING_PATH=$3
+# DEST_DB=$4
+# IMPALA_DEM=$5
+
+OLD_DATA_PATH='/home/duxbury/moy/spot-csv-data'
+HDFS_STAGING_PATH='/user/duxbury/spot_migration'
+STAGING_DB='spot_migration'
+DEST_DB='migrated'
+IMPALA_DEM='gmsonidw01'
+
+
+hadoop fs -mkdir $HDFS_STAGING_PATH
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/scores/
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/edge/
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/summary/
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/storyboard/
+hadoop fs -mkdir $HDFS_STAGING_PATH/proxy/timeline/
+hdfs dfs -setfacl -R -m user:impala:rwx $HDFS_STAGING_PATH
+
+#Creating Staging tables in Impala
+impala-shell -i ${IMPALA_DEM} --var=hpath=${HDFS_STAGING_PATH} --var=dbname=${STAGING_DB}
-c -f create_proxy_migration_tables.hql
+
+DAYS=$OLD_DATA_PATH/proxy/*
+
+for dir in $DAYS
+do
+  #break
+  #echo $dir
+  day="$(basename $dir)"
+  #echo $day
+  echo "Processing day $day ..."
+  y=${day:0:4}
+  m=$(expr ${day:4:2} + 0)
+  d=$(expr ${day:6:2} + 0)
+  echo $y $m $d $d2
+  echo $dir
+
+
+  ## proxy Scores and proxy_threat_investigation
+  echo "Processing proxy Scores"
+  if [ -f $dir/proxy_scores.tsv ]
+  then
+    command="LOAD DATA LOCAL INPATH '$dir/proxy_scores.tsv' OVERWRITE INTO TABLE $STAGING_DB.proxy_scores_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.proxy_scores PARTITION (y=$y, m=$m, d=$d) 
+select tdate, time, clientip, host, reqmethod, useragent, resconttype, duration, username,
webcat, referer, respcode, uriport, uripath, uriquery, serverip, scbytes, csbytes, fulluri,
word, ml_score, uri_rep, respcode_name, network_context 
+from $STAGING_DB.proxy_scores_tmp;"
+
+    echo $command
+    hive -e "$command"
+
+    echo "Processing proxy Threat Investigation"
+    command="INSERT INTO $DEST_DB.proxy_threat_investigation PARTITION (y=$y, m=$m, d=$d)

+select tdate, fulluri, uri_sev
+from $STAGING_DB.proxy_scores_tmp
+where uri_sev > 0;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+
+  ## proxy Edge
+  echo "Processing proxy Edge"
+  edge_files=`ls $dir/edge*.tsv`
+  #echo $edge_files
+  if [ ! -z "$edge_files" ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/edge*.tsv' OVERWRITE INTO TABLE $STAGING_DB.proxy_edge_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.proxy_edge PARTITION (y=$y, m=$m, d=$d) 
+select tdate, time, clientip, host, webcat, respcode, reqmethod, useragent, resconttype,
referer, uriport, serverip, scbytes, csbytes, fulluri, 0, ''
+from $STAGING_DB.proxy_edge_tmp;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##proxy_ingest_summary
+
+  ##proxy_storyboard
+  echo "Processing proxy Storyboard"
+  if [ -f $dir/threats.csv ]
+  then
+
+    command="LOAD DATA LOCAL INPATH '$dir/threats.csv' OVERWRITE INTO TABLE $STAGING_DB.proxy_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+    command="INSERT INTO $DEST_DB.proxy_storyboard PARTITION (y=$y, m=$m, d=$d) 
+select p_threat, title, text
+from $STAGING_DB.proxy_storyboard_tmp;"
+    echo $command
+    hive -e "$command"
+
+  fi
+
+  ##proxy_timeline
+  echo "Processing proxy Timeline"
+  timeline_files=`ls $dir/timeline*.tsv`
+  #echo $timeline_files
+  if [ ! -z "$timeline_files" ]
+  then
+    for file in $timeline_files
+    do
+      #echo $file
+      filename="$(basename $file)"
+      ip="${filename%.tsv}"
+      ip="${ip#timeline-}"
+      echo $filename $ip
+
+      command="LOAD DATA LOCAL INPATH '$file' OVERWRITE INTO TABLE $STAGING_DB.proxy_timeline_tmp;"
+      echo $command
+      hive -e "$command"
+
+      command="INSERT INTO $DEST_DB.proxy_timeline PARTITION (y=$y, m=$m, d=$d) 
+select '$ip', tstart, tend, duration, clientip, respcode, ''
+from $STAGING_DB.proxy_timeline_tmp
+where cast(tstart as timestamp) is not null;"
+      echo $command
+      hive -e "$command"
+
+    done
+  fi
+done
+
+impala-shell -i ${IMPALA_DEM} -q "INVALIDATE METADATA;"
+
+


Mime
View raw message