hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r695679 [1/6] - in /hadoop/core/trunk: ./ src/contrib/hive/ src/contrib/hive/conf/ src/contrib/hive/metastore/if/ src/contrib/hive/metastore/src/gen-py/hive_metastore/ src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/
Date Mon, 15 Sep 2008 22:55:43 GMT
Author: dhruba
Date: Mon Sep 15 15:55:43 2008
New Revision: 695679

URL: http://svn.apache.org/viewvc?rev=695679&view=rev
Log:
HADOOP-4087.  Hive metastore server to work for PHP & Python clients.
(Prasad Chakka via dhruba)


Added:
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore.py
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore.py
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/contrib/hive/build.xml
    hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
    hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore.py
    hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=695679&r1=695678&r2=695679&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Mon Sep 15 15:55:43 2008
@@ -546,6 +546,9 @@
     HADOOP-4099. Fix null pointer when using HFTP from an 0.18 server.
     (dhruba via omalley)
 
+    HADOOP-4087.  Hive metastore server to work for PHP & Python clients.
+    (Prasad Chakka via dhruba)
+
 Release 0.18.1 - 2008-09-17
 
   IMPROVEMENTS

Modified: hadoop/core/trunk/src/contrib/hive/build.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/build.xml?rev=695679&r1=695678&r2=695679&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/build.xml (original)
+++ hadoop/core/trunk/src/contrib/hive/build.xml Mon Sep 15 15:55:43 2008
@@ -130,6 +130,8 @@
     <mkdir dir="${target.example.dir}"/>
     <mkdir dir="${target.example.dir}/files"/>
     <mkdir dir="${target.example.dir}/queries"/>
+    <mkdir dir="${target.lib.dir}/py"/>
+    <mkdir dir="${target.lib.dir}/php"/>
     <exec executable="cp" failonerror="true">
       <arg line="-p '${hive.root}/bin/hive' '${target.bin.dir}'"/>
     </exec>
@@ -137,6 +139,12 @@
     <copy file="${basedir}/conf/hive-default.xml" todir="${target.conf.dir}"/>
     <copy file="${basedir}/conf/hive-log4j.properties" todir="${target.conf.dir}"/>
     <copy file="${basedir}/conf/jpox.properties" todir="${target.conf.dir}"/>
+    <copy todir="${target.lib.dir}/php">
+      <fileset dir="${hive.root}/metastore/src/gen-php"/>
+    </copy>
+    <copy todir="${target.lib.dir}/py">
+      <fileset dir="${hive.root}/metastore/src/gen-py"/>
+    </copy>
     <copy todir="${target.lib.dir}" preservelastmodified="true" flatten="true">
       <fileset dir="${hive.root}" includes="*/*.jar, */*/*.jar" excludes="**/antlr-2*,**/antlr-3*"/>
       <fileset file="${build.dir.hive}/cli/hive_cli.jar"/>

Modified: hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/conf/jpox.properties?rev=695679&r1=695678&r2=695679&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/conf/jpox.properties (original)
+++ hadoop/core/trunk/src/contrib/hive/conf/jpox.properties Mon Sep 15 15:55:43 2008
@@ -13,3 +13,5 @@
 javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=test_metastore_db;create=true
 javax.jdo.option.ConnectionUserName=APP
 javax.jdo.option.ConnectionPassword=mine
+org.jpox.cache.level2=true
+org.jpox.cache.level2.type=SOFT

Modified: hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift?rev=695679&r1=695678&r2=695679&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift (original)
+++ hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift Mon Sep 15 15:55:43
2008
@@ -88,8 +88,8 @@
 }
 
 struct FieldSchema {
-  string name,
-  string type,
+  string name, // name of the field
+  string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>,
map<TYPE_NAME, TYPE_NAME> for lists & maps 
   string comment
 }
 
@@ -124,20 +124,20 @@
 }
 
 struct StorageDescriptor {
-  list<FieldSchema> cols,
-  string location,
-  string inputFormat;
-  string outputFormat;
-  bool isCompressed;
+  list<FieldSchema> cols, // required (refer to types defined above)
+  string location, // defaults to <warehouse loc>/<db loc>/tablename
+  string inputFormat; // SequenceFileInputFormat (binary) or TextInputFormat`  or custom
format
+  string outputFormat; // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat
or custom format
+  bool isCompressed; // compressed or not
   i32 numBuckets = 32, // this must be specified if there are any dimension columns
-  SerDeInfo serdeInfo;
-  list<string> bucketCols, //reducer grouping columns and clustering columns and bucketing
columns`
-  list<Order> sortCols,
-  map<string, string> parameters
+  SerDeInfo serdeInfo; // serialization and deserialization information
+  list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing
columns`
+  list<Order> sortCols, // sort order of the data in each bucket
+  map<string, string> parameters // any user supplied key value hash
 }
 
 struct Table {
-  string tableName,
+  string tableName, 
   string database,
   string owner,
   i32 createTime,
@@ -214,7 +214,14 @@
   list<FieldSchema> get_fields(string db_name, string table_name) throws (MetaException
ouch1, UnknownTableException ouch2, UnknownDBException ouch3),
 
   // Tables
-  // create the table with the given table object in the given database
+  // create a Hive table. Following fields must be set
+  // Table.tableName
+  // Table.database (only 'default' for now until Hive QL supports databases)
+  // Table.owner (not needed, but good to have for tracking purposes)
+  // Table.sd.cols (list of field schemas)
+  // Table.sd.inputFormat ( SequenceFileInputFormat (binary like falcon tables or u_full)
or TextInputFormat)
+  // Table.sd.outputFormat ( SequenceFileInputFormat (binary) or TextInputFormat)
+  // Table.sd.serdeInfo.serializationLib (SerDe class name such as org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
   void create_table(1:Table tbl) throws(1:AlreadyExistsException ouch1, 2:InvalidObjectException
ouch2, 3:MetaException ouch3, 4:NoSuchObjectException o4)
   // drops the table and all the partitions associated with it if the table has partitions
   // delete data (including partitions) if deleteData is set to true

Modified: hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote?rev=695679&r1=695678&r2=695679&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
(original)
+++ hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
Mon Sep 15 15:55:43 2008
@@ -1,184 +0,0 @@
-#!/usr/bin/env python
-#
-# Autogenerated by Thrift
-#
-# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-#
-
-import sys
-import pprint
-from urlparse import urlparse
-from thrift.transport import TTransport
-from thrift.transport import TSocket
-from thrift.transport import THttpClient
-from thrift.protocol import TBinaryProtocol
-
-import ThriftHiveMetaStore
-from ttypes import *
-
-if len(sys.argv) <= 1 or sys.argv[1] == '--help':
-  print ''
-  print 'Usage: ' + sys.argv[0] + ' [-h host:port] [-u url] [-f[ramed]] function [arg1 [arg2...]]'
-  print ''
-  print 'Functions:'
-  print '   get_tables(string pattern)'
-  print '   get_schema(string table_name)'
-  print '   get_fields(string table_name)'
-  print '   get_partition_cols(string table_name)'
-  print '  void save_table(string table_name,  schema, bool overwrite)'
-  print '   cat(string table_name, i32 high, string partition)'
-  print '  void drop(string table_name, bool delete_data)'
-  print '  void rename(string current_table_name, string new_table_name, bool move_data)'
-  print '  bool schema_exists(string table_name)'
-  print '  bool exists(string table_name)'
-  print '  void alter_table(string table_name,  columns)'
-  print '  void create_table(string table_name,  columns,  partition_cols)'
-  print '   get_partitions(string table_name)'
-  print '  void write_log(string table_name, string logName,  new_entry)'
-  print '   read_log(string table_name, string logName)'
-  print '   get_log_names(string table_name)'
-  print ''
-  sys.exit(0)
-
-pp = pprint.PrettyPrinter(indent = 2)
-host = 'localhost'
-port = 9090
-uri = ''
-framed = False
-http = False
-argi = 1
-
-if sys.argv[argi] == '-h':
-  parts = sys.argv[argi+1].split(':') 
-  host = parts[0]
-  port = int(parts[1])
-  argi += 2
-
-if sys.argv[argi] == '-u':
-  url = urlparse(sys.argv[argi+1])
-  parts = url[1].split(':') 
-  host = parts[0]
-  if len(parts) > 1:
-    port = int(parts[1])
-  else:
-    port = 80
-  uri = url[2]
-  http = True
-  argi += 2
-
-if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
-  framed = True
-  argi += 1
-
-cmd = sys.argv[argi]
-args = sys.argv[argi+1:]
-
-if http:
-  transport = THttpClient.THttpClient(host, port, uri)
-else:
-  socket = TSocket.TSocket(host, port)
-  if framed:
-    transport = TTransport.TFramedTransport(socket)
-  else:
-    transport = TTransport.TBufferedTransport(socket)
-protocol = TBinaryProtocol.TBinaryProtocol(transport)
-client = ThriftHiveMetaStore.Client(protocol)
-transport.open()
-
-if cmd == 'get_tables':
-  if len(args) != 1:
-    print 'get_tables requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_tables(args[0],))
-
-elif cmd == 'get_schema':
-  if len(args) != 1:
-    print 'get_schema requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_schema(args[0],))
-
-elif cmd == 'get_fields':
-  if len(args) != 1:
-    print 'get_fields requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_fields(args[0],))
-
-elif cmd == 'get_partition_cols':
-  if len(args) != 1:
-    print 'get_partition_cols requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_partition_cols(args[0],))
-
-elif cmd == 'save_table':
-  if len(args) != 3:
-    print 'save_table requires 3 args'
-    sys.exit(1)
-  pp.pprint(client.save_table(args[0],eval(args[1]),eval(args[2]),))
-
-elif cmd == 'cat':
-  if len(args) != 3:
-    print 'cat requires 3 args'
-    sys.exit(1)
-  pp.pprint(client.cat(args[0],eval(args[1]),args[2],))
-
-elif cmd == 'drop':
-  if len(args) != 2:
-    print 'drop requires 2 args'
-    sys.exit(1)
-  pp.pprint(client.drop(args[0],eval(args[1]),))
-
-elif cmd == 'rename':
-  if len(args) != 3:
-    print 'rename requires 3 args'
-    sys.exit(1)
-  pp.pprint(client.rename(args[0],args[1],eval(args[2]),))
-
-elif cmd == 'schema_exists':
-  if len(args) != 1:
-    print 'schema_exists requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.schema_exists(args[0],))
-
-elif cmd == 'exists':
-  if len(args) != 1:
-    print 'exists requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.exists(args[0],))
-
-elif cmd == 'alter_table':
-  if len(args) != 2:
-    print 'alter_table requires 2 args'
-    sys.exit(1)
-  pp.pprint(client.alter_table(args[0],eval(args[1]),))
-
-elif cmd == 'create_table':
-  if len(args) != 3:
-    print 'create_table requires 3 args'
-    sys.exit(1)
-  pp.pprint(client.create_table(args[0],eval(args[1]),eval(args[2]),))
-
-elif cmd == 'get_partitions':
-  if len(args) != 1:
-    print 'get_partitions requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_partitions(args[0],))
-
-elif cmd == 'write_log':
-  if len(args) != 3:
-    print 'write_log requires 3 args'
-    sys.exit(1)
-  pp.pprint(client.write_log(args[0],args[1],eval(args[2]),))
-
-elif cmd == 'read_log':
-  if len(args) != 2:
-    print 'read_log requires 2 args'
-    sys.exit(1)
-  pp.pprint(client.read_log(args[0],args[1],))
-
-elif cmd == 'get_log_names':
-  if len(args) != 1:
-    print 'get_log_names requires 1 args'
-    sys.exit(1)
-  pp.pprint(client.get_log_names(args[0],))
-
-transport.close()



Mime
View raw message