hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Luis Gonzalez (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HIVE-12955) avro.schema.literal don't support more than 50 fields.
Date Thu, 28 Jan 2016 15:53:40 GMT

     [ https://issues.apache.org/jira/browse/HIVE-12955?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Luis Gonzalez updated HIVE-12955:
---------------------------------
    Description: 
Hi!

we think we have hitted a bug. We have tested this in many ways for a couple of hours now
and there is apparently a problem with avro.schema.literal when you specify more than 50 fields.
In our tests we have found that regarless the avro file (table) we want to load, it fails
with 60 fields.. some tables we uses have more than 400 fields and have the same problem.

For instance if we launch the command 

{code:none}
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive> 
    > CREATE EXTERNAL TABLE tableName
    > ROW FORMAT
    > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
    > WITH SERDEPROPERTIES ('avro.schema.literal'='
    > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ 
    > {"name": "Id", "type":["null", "string"],"default":null}, 
    > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
    > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
    > {"name": "Name", "type":["null", "string"],"default":null}, 
    > {"name": "Type", "type":["null", "string"],"default":null}, 
    > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
    > {"name": "ParentId", "type":["null", "string"],"default":null}, 
    > {"name": "Phone", "type":["null", "string"],"default":null}, 
    > {"name": "Fax", "type":["null", "string"],"default":null}, 
    > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
    > {"name": "Website", "type":["null", "string"],"default":null}, 
    > {"name": "Industry", "type":["null", "string"],"default":null}, 
    > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
    > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
    > {"name": "Description", "type":["null", "string"],"default":null}, 
    > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
    > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
    > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
    > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
    > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
    > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
    > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
    > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
    > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
    > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

    > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
    > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
    > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

    > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
    > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, 
    > {"name": "Division__c", "type":["null", "string"],"default":null}, 
    > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
    > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
    > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
    > {"name": "Market_Country__c", "type":["null", "string"],"default":null}
    > ] }
    > ')
    > STORED AS
    > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
    > LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive> 
    > select * from tableName limit 10;
OK
{code}

but when using the same AVRO file and more fields it fails
{code:none}
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive> 
    > CREATE EXTERNAL TABLE tableName
    > ROW FORMAT
    > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
    > WITH SERDEPROPERTIES ('avro.schema.literal'='
    > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ 
    > {"name": "Id", "type":["null", "string"],"default":null}, 
    > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
    > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
    > {"name": "Name", "type":["null", "string"],"default":null}, 
    > {"name": "Type", "type":["null", "string"],"default":null}, 
    > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
    > {"name": "ParentId", "type":["null", "string"],"default":null}, 
    > {"name": "Phone", "type":["null", "string"],"default":null}, 
    > {"name": "Fax", "type":["null", "string"],"default":null}, 
    > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
    > {"name": "Website", "type":["null", "string"],"default":null}, 
    > {"name": "Industry", "type":["null", "string"],"default":null}, 
    > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
    > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
    > {"name": "Description", "type":["null", "string"],"default":null}, 
    > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
    > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
    > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
    > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
    > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
    > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
    > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
    > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
    > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
    > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

    > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
    > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
    > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

    > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
    > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, 
    > {"name": "Division__c", "type":["null", "string"],"default":null}, 
    > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
    > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
    > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
    > {"name": "Market_Country__c", "type":["null", "string"],"default":null}, 
    > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},

    > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},

    > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},

    > {"name": "Comments__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null}, 
    > {"name": "Timezone__c", "type":["null", "string"],"default":null}, 
    > {"name": "Billing_Language__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
    > ] }
    > ')
    > STORED AS
    > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
    > LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive> 
    > select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account,
expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
{code}

This problem doesn't affect when we store the avro schema with 400 fields in a file in s3
and use the avro.schema.url field.


  was:
Hi!

we think we have hitted a bug. We have tested this in many ways for a couple of hours now
and there is apparently a problem with avro.schema.literal when you specify more than 50 fields.
In our tests we have found that regarless the avro file (table) we want to load, it fails
with 60 fields.. some tables we uses have more than 400 fields and have the same problem.

For instance if we launch the command 

{code:shell}
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive> 
    > CREATE EXTERNAL TABLE tableName
    > ROW FORMAT
    > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
    > WITH SERDEPROPERTIES ('avro.schema.literal'='
    > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ 
    > {"name": "Id", "type":["null", "string"],"default":null}, 
    > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
    > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
    > {"name": "Name", "type":["null", "string"],"default":null}, 
    > {"name": "Type", "type":["null", "string"],"default":null}, 
    > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
    > {"name": "ParentId", "type":["null", "string"],"default":null}, 
    > {"name": "Phone", "type":["null", "string"],"default":null}, 
    > {"name": "Fax", "type":["null", "string"],"default":null}, 
    > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
    > {"name": "Website", "type":["null", "string"],"default":null}, 
    > {"name": "Industry", "type":["null", "string"],"default":null}, 
    > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
    > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
    > {"name": "Description", "type":["null", "string"],"default":null}, 
    > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
    > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
    > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
    > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
    > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
    > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
    > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
    > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
    > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
    > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

    > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
    > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
    > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

    > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
    > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, 
    > {"name": "Division__c", "type":["null", "string"],"default":null}, 
    > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
    > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
    > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
    > {"name": "Market_Country__c", "type":["null", "string"],"default":null}
    > ] }
    > ')
    > STORED AS
    > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
    > LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive> 
    > select * from tableName limit 10;
OK
{code}

but when using the same AVRO file and more fields it fails
{code:shell}
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive> 
    > CREATE EXTERNAL TABLE tableName
    > ROW FORMAT
    > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
    > WITH SERDEPROPERTIES ('avro.schema.literal'='
    > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [ 
    > {"name": "Id", "type":["null", "string"],"default":null}, 
    > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
    > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
    > {"name": "Name", "type":["null", "string"],"default":null}, 
    > {"name": "Type", "type":["null", "string"],"default":null}, 
    > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
    > {"name": "ParentId", "type":["null", "string"],"default":null}, 
    > {"name": "Phone", "type":["null", "string"],"default":null}, 
    > {"name": "Fax", "type":["null", "string"],"default":null}, 
    > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
    > {"name": "Website", "type":["null", "string"],"default":null}, 
    > {"name": "Industry", "type":["null", "string"],"default":null}, 
    > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
    > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
    > {"name": "Description", "type":["null", "string"],"default":null}, 
    > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
    > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
    > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
    > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
    > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
    > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
    > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
    > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
    > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
    > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
    > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
    > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

    > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
    > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
    > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

    > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
    > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null}, 
    > {"name": "Division__c", "type":["null", "string"],"default":null}, 
    > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
    > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
    > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
    > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
    > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
    > {"name": "Market_Country__c", "type":["null", "string"],"default":null}, 
    > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},

    > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},

    > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},

    > {"name": "Comments__c", "type":["null", "string"],"default":null}, 
    > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "City_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null}, 
    > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null}, 
    > {"name": "Timezone__c", "type":["null", "string"],"default":null}, 
    > {"name": "Billing_Language__c", "type":["null", "string"],"default":null}, 
    > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
    > ] }
    > ')
    > STORED AS
    > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
    > LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive> 
    > select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account,
expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
{code}

This problem doesn't affect when we store the avro schema with 400 fields in a file in s3
and use the avro.schema.url field.



>  avro.schema.literal don't support more than 50 fields.
> -------------------------------------------------------
>
>                 Key: HIVE-12955
>                 URL: https://issues.apache.org/jira/browse/HIVE-12955
>             Project: Hive
>          Issue Type: Bug
>          Components: Hive
>    Affects Versions: 1.0.0
>            Reporter: Luis Gonzalez
>            Priority: Minor
>
> Hi!
> we think we have hitted a bug. We have tested this in many ways for a couple of hours
now and there is apparently a problem with avro.schema.literal when you specify more than
50 fields. In our tests we have found that regarless the avro file (table) we want to load,
it fails with 60 fields.. some tables we uses have more than 400 fields and have the same
problem.
> For instance if we launch the command 
> {code:none}
> hive> drop table tableName;
> OK
> Time taken: 0.162 seconds
> hive> 
>     > CREATE EXTERNAL TABLE tableName
>     > ROW FORMAT
>     > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
>     > WITH SERDEPROPERTIES ('avro.schema.literal'='
>     > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [

>     > {"name": "Id", "type":["null", "string"],"default":null}, 
>     > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
>     > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
>     > {"name": "Name", "type":["null", "string"],"default":null}, 
>     > {"name": "Type", "type":["null", "string"],"default":null}, 
>     > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
>     > {"name": "ParentId", "type":["null", "string"],"default":null}, 
>     > {"name": "Phone", "type":["null", "string"],"default":null}, 
>     > {"name": "Fax", "type":["null", "string"],"default":null}, 
>     > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
>     > {"name": "Website", "type":["null", "string"],"default":null}, 
>     > {"name": "Industry", "type":["null", "string"],"default":null}, 
>     > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
>     > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
>     > {"name": "Description", "type":["null", "string"],"default":null}, 
>     > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
>     > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
>     > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
>     > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
>     > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
>     > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
>     > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
>     > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
>     > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
>     > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
>     > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
>     > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},

>     > {"name": "Division__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
>     > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
>     > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Market_Country__c", "type":["null", "string"],"default":null}
>     > ] }
>     > ')
>     > STORED AS
>     > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
>     > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
>     > LOCATION 's3://bucket.../path/to/avro';
> OK
> Time taken: 0.412 seconds
> hive> 
>     > select * from tableName limit 10;
> OK
> {code}
> but when using the same AVRO file and more fields it fails
> {code:none}
> hive> drop table tableName;
> OK
> Time taken: 0.146 seconds
> hive> 
>     > CREATE EXTERNAL TABLE tableName
>     > ROW FORMAT
>     > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
>     > WITH SERDEPROPERTIES ('avro.schema.literal'='
>     > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [

>     > {"name": "Id", "type":["null", "string"],"default":null}, 
>     > {"name": "IsDeleted", "type":["null", "boolean"],"default":null}, 
>     > {"name": "MasterRecordId", "type":["null", "string"],"default":null}, 
>     > {"name": "Name", "type":["null", "string"],"default":null}, 
>     > {"name": "Type", "type":["null", "string"],"default":null}, 
>     > {"name": "RecordTypeId", "type":["null", "string"],"default":null}, 
>     > {"name": "ParentId", "type":["null", "string"],"default":null}, 
>     > {"name": "Phone", "type":["null", "string"],"default":null}, 
>     > {"name": "Fax", "type":["null", "string"],"default":null}, 
>     > {"name": "AccountNumber", "type":["null", "string"],"default":null}, 
>     > {"name": "Website", "type":["null", "string"],"default":null}, 
>     > {"name": "Industry", "type":["null", "string"],"default":null}, 
>     > {"name": "AnnualRevenue", "type":["null", "double"],"default":null}, 
>     > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null}, 
>     > {"name": "Description", "type":["null", "string"],"default":null}, 
>     > {"name": "OwnerId", "type":["null", "string"],"default":null}, 
>     > {"name": "CreatedDate", "type":["null", "string"],"default":null}, 
>     > {"name": "CreatedById", "type":["null", "string"],"default":null}, 
>     > {"name": "LastModifiedDate", "type":["null", "string"],"default":null}, 
>     > {"name": "LastModifiedById", "type":["null", "string"],"default":null}, 
>     > {"name": "SystemModstamp", "type":["null", "string"],"default":null}, 
>     > {"name": "LastActivityDate", "type":["null", "string"],"default":null}, 
>     > {"name": "IsPartner", "type":["null", "boolean"],"default":null}, 
>     > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null}, 
>     > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null}, 
>     > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null}, 
>     > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Customer_Type__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Shortname__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Account_Status__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},

>     > {"name": "Office_Code__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},

>     > {"name": "Division__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null}, 
>     > {"name": "SAP_Code__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "City_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null}, 
>     > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Relationship__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Market_Country__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},

>     > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},

>     > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},

>     > {"name": "Comments__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null},

>     > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null},

>     > {"name": "City_Commercial__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null},

>     > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null},

>     > {"name": "Timezone__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Billing_Language__c", "type":["null", "string"],"default":null}, 
>     > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
>     > ] }
>     > ')
>     > STORED AS
>     > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
>     > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
>     > LOCATION 's3://bucket/path/to/avro';
> OK
> Time taken: 0.48 seconds
> hive> 
>     > select * from tableName limit 10;
> OK
> Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account,
expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
> Time taken: 0.028 seconds
> {code}
> This problem doesn't affect when we store the avro schema with 400 fields in a file in
s3 and use the avro.schema.url field.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message