vxquery-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prest...@apache.org
Subject [01/16] git commit: Added the option to create an inventory csv file.
Date Wed, 02 Apr 2014 00:21:20 GMT
Repository: incubator-vxquery
Updated Branches:
  refs/heads/prestonc/hash_join 85af9d42d -> fcb024115


Added the option to create an inventory csv file.

The inventory hold the number of sensor and reading for each station.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/bb1ec578
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/bb1ec578
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/bb1ec578

Branch: refs/heads/prestonc/hash_join
Commit: bb1ec5782d9631af2f66e5a07bf97d59ba2b7fc7
Parents: 85af9d4
Author: Preston Carman <prestonc@apache.org>
Authored: Tue Mar 25 16:38:28 2014 -0700
Committer: Preston Carman <prestonc@apache.org>
Committed: Tue Mar 25 16:38:28 2014 -0700

----------------------------------------------------------------------
 .../noaa-ghcn-daily/scripts/weather_cli.py      |  8 +++--
 .../scripts/weather_config_ghcnd.py             | 14 ++++----
 .../scripts/weather_convert_to_xml.py           | 35 ++++++++++++++++++--
 3 files changed, 46 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/bb1ec578/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
index 52945e5..5bfa698 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
@@ -53,7 +53,7 @@ def main(argv):
             print '    -a        Append the results to the progress file.'
             print '    -f (str)  The file name of a specific station to process.'
             print '              * Helpful when testing a single stations XML file output.'
-            print '    -l (str)  Select the locality of the scripts execution (download,
progress_file, sensor_build, station_build, partition, partition_scheme, statistics).'
+            print '    -l (str)  Select the locality of the scripts execution (download,
progress_file, sensor_build, station_build, partition, partition_scheme, inventory, statistics).'
             print '    -m (int)  Limits the number of files created for each station.'
             print '              * Helpful when testing to make sure all elements are supported
for each station.'
             print '              Alternate form: --max_station_files=(int)'
@@ -73,7 +73,7 @@ def main(argv):
                 print 'Error: Argument must be a file name for --file (-f).'
                 sys.exit()
         elif opt in ('-l', "--locality"):
-            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition",
"partition_scheme", "test_links", "queries", "statistics"):
+            if arg in ("download", "progress_file", "sensor_build", "station_build", "partition",
"partition_scheme", "test_links", "queries", "inventory", "statistics"):
                 section = arg
             else:
                 print 'Error: Argument must be a string for --locality (-l) and a valid locality.'
@@ -221,6 +221,10 @@ def main(argv):
             print 'Processing the queries section (' + dataset.get_name() + ').'
             benchmark.copy_query_files(reset)
     
+    if section in ("inventory"):
+        print 'Processing the inventory section.'
+        convert.process_inventory_file()
+                  
 #     if section in ("statistics"):
 #         print 'Processing the statistics section.'
 #         data.print_progress_file_stats(convert)

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/bb1ec578/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
index 6d3bd9c..04fff52 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
@@ -86,10 +86,10 @@ STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
 STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
 
 # Details about the row.
-INVENTORY_FIELDS = []
-INVENTORY_FIELDS.append(['ID', 1, 11, 'Character'])
-INVENTORY_FIELDS.append(['LATITUDE', 13, 20, 'Real'])
-INVENTORY_FIELDS.append(['LONGITUDE', 22, 30, 'Real'])
-INVENTORY_FIELDS.append(['ELEMENT', 32, 35, 'Character'])
-INVENTORY_FIELDS.append(['FIRSTYEAR', 37, 40, 'Integer'])
-INVENTORY_FIELDS.append(['LASTYEAR', 42, 45, 'Integer'])
+INVENTORY_FIELDS = {}
+INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
+INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
+INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
+INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
+INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
+INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/bb1ec578/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
----------------------------------------------------------------------
diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
index c115efa..a4f33a1 100644
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
+++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_convert_to_xml.py
@@ -109,9 +109,10 @@ class WeatherConvertToXML:
 
         # Extra support files.
         self.ghcnd_countries = base_path + '/ghcnd-countries.txt'
+        self.ghcnd_inventory = base_path + '/ghcnd-inventory.txt'
         self.ghcnd_states = base_path + '/ghcnd-states.txt'
         self.ghcnd_stations = base_path + '/ghcnd-stations.txt'
-
+        
         # MSHR support files.
         self.mshr_stations = base_path + '/mshr_enhanced_201402.txt'
         
@@ -160,6 +161,36 @@ class WeatherConvertToXML:
     def get_base_folder(self, station_id, data_type="sensors"):
         return build_base_save_folder(self.save_path, station_id, data_type) 
     
+    def process_inventory_file(self):
+        print "Processing inventory file"
+        file_stream = open(self.ghcnd_inventory, 'r')
+        
+        csv_header = ['ID', 'SENSORS', 'SENSORS_COUNT',  'MAX_YEARS', 'TOTAL_YEARS_FOR_ALL_SENSORS']
+        row = file_stream.readline()
+        csv_inventory = {}
+        for row in file_stream:
+            id = self.get_field_from_definition(row, INVENTORY_FIELDS['ID'])
+            sensor_id = self.get_field_from_definition(row, INVENTORY_FIELDS['ELEMENT'])
+            start = int(self.get_field_from_definition(row, INVENTORY_FIELDS['FIRSTYEAR']))
+            end = int(self.get_field_from_definition(row, INVENTORY_FIELDS['LASTYEAR']))
+            if id in csv_inventory:
+                new_count = str(int(csv_inventory[id][2]) + 1)
+                new_max = str(max(int(csv_inventory[id][3]), (end - start)))
+                new_total = str(int(csv_inventory[id][3]) + end - start)
+                csv_inventory[id] = [id, (csv_inventory[id][1] + "," + sensor_id), new_count,
new_max, new_total]
+            else:
+                csv_inventory[id] = [id, sensor_id, str(1), str(end - start), str(end - start)]
+                
+        path = self.save_path + "/inventory.csv"
+        self.save_csv_file(path, csv_inventory, csv_header)
+    
+    def save_csv_file(self, path, csv_inventory, header):
+        csv_content = "|".join(header) + "\n"
+        for row_id in csv_inventory:
+            csv_content += "|".join(csv_inventory[row_id]) + "\n"
+        self.save_file(path, csv_content)
+        
+
     def process_station_file(self, file_name):
         print "Processing station file: " + file_name
         file_stream = open(file_name, 'r')
@@ -333,7 +364,7 @@ class WeatherConvertToXML:
         country_code = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_CODE']).strip()
         country_name = self.get_field_from_definition(station_mshr_row, MSHR_FIELDS['FIPS_COUNTRY_NAME']).strip()
         if country_code != "" and country_name != "":
-            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:"+country_code,
country_name)
+            additional_xml += self.default_xml_location_labels("CNTRY", "FIPS:" + country_code,
country_name)
         
         return additional_xml
 


Mime
View raw message