iceberg-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [iceberg] rdblue commented on a change in pull request #1140: ORC: skip non-iceberg columns when converting schema to Iceberg
Date Tue, 30 Jun 2020 22:56:50 GMT

rdblue commented on a change in pull request #1140:
URL: https://github.com/apache/iceberg/pull/1140#discussion_r448024156



##########
File path: orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
##########
@@ -403,114 +405,150 @@ private static boolean isRequired(TypeDescription orcType) {
         Types.NestedField.optional(icebergID, name, type);
   }
 
-  private static Types.NestedField convertOrcToIceberg(TypeDescription orcType, String name,
-                                                       TypeUtil.NextID nextID) {
+  private static class OrcToIcebergVisitor extends OrcSchemaVisitor<Optional<Types.NestedField>>
{
 
-    final int icebergID = icebergID(orcType).orElseGet(nextID::get);
-    final boolean isRequired = isRequired(orcType);
+    private final Map<Integer, OrcField> icebergToOrcMapping;
 
-    switch (orcType.getCategory()) {
-      case BOOLEAN:
-        return getIcebergType(icebergID, name, Types.BooleanType.get(), isRequired);
-      case BYTE:
-      case SHORT:
-      case INT:
-        return getIcebergType(icebergID, name, Types.IntegerType.get(), isRequired);
-      case LONG:
-        String longAttributeValue = orcType.getAttributeValue(ICEBERG_LONG_TYPE_ATTRIBUTE);
-        LongType longType = longAttributeValue == null ? LongType.LONG : LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return getIcebergType(icebergID, name, Types.TimeType.get(), isRequired);
-          case LONG:
-            return getIcebergType(icebergID, name, Types.LongType.get(), isRequired);
-          default:
-            throw new IllegalStateException("Invalid Long type found in ORC type attribute");
-        }
-      case FLOAT:
-        return getIcebergType(icebergID, name, Types.FloatType.get(), isRequired);
-      case DOUBLE:
-        return getIcebergType(icebergID, name, Types.DoubleType.get(), isRequired);
-      case STRING:
-      case CHAR:
-      case VARCHAR:
-        return getIcebergType(icebergID, name, Types.StringType.get(), isRequired);
-      case BINARY:
-        String binaryAttributeValue = orcType.getAttributeValue(ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        BinaryType binaryType = binaryAttributeValue == null ? BinaryType.BINARY :
-            BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return getIcebergType(icebergID, name, Types.UUIDType.get(), isRequired);
-          case FIXED:
-            int fixedLength = Integer.parseInt(orcType.getAttributeValue(ICEBERG_FIELD_LENGTH));
-            return getIcebergType(icebergID, name, Types.FixedType.ofLength(fixedLength),
isRequired);
-          case BINARY:
-            return getIcebergType(icebergID, name, Types.BinaryType.get(), isRequired);
-          default:
-            throw new IllegalStateException("Invalid Binary type found in ORC type attribute");
-        }
-      case DATE:
-        return getIcebergType(icebergID, name, Types.DateType.get(), isRequired);
-      case TIMESTAMP:
-        return getIcebergType(icebergID, name, Types.TimestampType.withoutZone(), isRequired);
-      case TIMESTAMP_INSTANT:
-        return getIcebergType(icebergID, name, Types.TimestampType.withZone(), isRequired);
-      case DECIMAL:
-        return getIcebergType(icebergID, name,
-            Types.DecimalType.of(orcType.getPrecision(), orcType.getScale()),
-            isRequired);
-      case STRUCT: {
-        List<String> fieldNames = orcType.getFieldNames();
-        List<TypeDescription> fieldTypes = orcType.getChildren();
-        List<Types.NestedField> fields = new ArrayList<>(fieldNames.size());
-        for (int c = 0; c < fieldNames.size(); ++c) {
-          String childName = fieldNames.get(c);
-          TypeDescription type = fieldTypes.get(c);
-          Types.NestedField field = convertOrcToIceberg(type, childName, nextID);
-          fields.add(field);
-        }
+    OrcToIcebergVisitor(Map<Integer, OrcField> icebergToOrcMapping) {
+      this.icebergToOrcMapping = icebergToOrcMapping;
+    }
 
-        return getIcebergType(icebergID, name, Types.StructType.of(fields), isRequired);
-      }
-      case LIST: {
-        TypeDescription elementType = orcType.getChildren().get(0);
-        Types.NestedField element = convertOrcToIceberg(elementType, "element", nextID);
-
-        Types.ListType listTypeWithElem = isRequired(elementType) ?
-            Types.ListType.ofRequired(element.fieldId(), element.type()) :
-            Types.ListType.ofOptional(element.fieldId(), element.type());
-        return isRequired ?
-            Types.NestedField.required(icebergID, name, listTypeWithElem) :
-            Types.NestedField.optional(icebergID, name, listTypeWithElem);
+    @Override
+    public Optional<Types.NestedField> record(TypeDescription record, List<String>
names,
+                                              List<Optional<Types.NestedField>>
fields) {
+      boolean isRequired = isRequired(record);
+      Optional<Integer> icebergIdOpt = icebergID(record);
+      if (!icebergIdOpt.isPresent() || fields.size() == 0) {

Review comment:
       Should this also check that any of the fields are defined?
   
   ```
   if (!icebergIdOpt.isPresent() || !fields.stream().anyMatch(Optional::isPresent)) {
     return Optional.empty();
   }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org


Mime
View raw message