gobblin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [1/3] incubator-gobblin git commit: [GOBBLIN-226] Nested schema support in JsonStringToJsonIntermediateConverter and JsonIntermediateToAvroConverter
Date Wed, 18 Oct 2017 05:34:04 GMT
Repository: incubator-gobblin
Updated Branches:
  refs/heads/master f0582115b -> 6dd36a506


http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/schema.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/schema.json b/gobblin-core/src/test/resources/converter/schema.json
index 6526cf9..9a8bed8 100644
--- a/gobblin-core/src/test/resources/converter/schema.json
+++ b/gobblin-core/src/test/resources/converter/schema.json
@@ -1,376 +1,400 @@
-[
-   {
-      "columnName":"Id",
-      "dataType":{
-         "type":"string"
+{
+  "schema": [
+    {
+      "columnName": "Id",
+      "dataType": {
+        "type": "string"
       },
-      "waterMark":false,
-      "primaryKey":1,
-      "length":18,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"Account ID",
-      "unique":false
-   },
-   {
-      "columnName":"IsDeleted",
-      "dataType":{
-         "type":"boolean"
+      "waterMark": false,
+      "primaryKey": 1,
+      "length": 18,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "Account ID",
+      "unique": false
+    },
+    {
+      "columnName": "IsDeleted",
+      "dataType": {
+        "type": "boolean"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"Deleted",
-      "unique":false
-   },
-   {
-      "columnName":"Salutation",
-      "dataType":{
-         "items":"string",
-         "type":"array"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "Deleted",
+      "unique": false
+    },
+    {
+      "columnName": "Salutation",
+      "dataType": {
+        "items": "string",
+        "type": "array"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":40,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Salutation",
-      "unique":false
-   },
-   {
-      "columnName":"MapAccount",
-      "dataType":{
-         "values":"string",
-         "type":"map"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 40,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Salutation",
+      "unique": false
+    },
+    {
+      "columnName": "MapAccount",
+      "dataType": {
+        "values": "string",
+        "type": "map"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":40,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Salutation",
-      "unique":false
-   },
-   {
-      "columnName":"Industry",
-      "dataType":{
-         "symbols":[
-            "IT",
-            "SALES"
-         ],
-         "name":"Industry",
-         "type":"enum"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 40,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Salutation",
+      "unique": false
+    },
+    {
+      "columnName": "Industry",
+      "dataType": {
+        "symbols": [
+          "IT",
+          "SALES"
+        ],
+        "name": "Industry",
+        "type": "enum"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":40,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Salutation",
-      "unique":false
-   },
-   {
-      "columnName":"LastModifiedDate",
-      "dataType":{
-         "type":"timestamp"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 40,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Salutation",
+      "unique": false
+    },
+    {
+      "columnName": "LastModifiedDate",
+      "dataType": {
+        "type": "timestamp"
       },
-      "waterMark":true,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"Last Modified Date",
-      "unique":false
-   },
-   {
-      "columnName":"null_this",
-      "dataType":{
-         "type":"String"
+      "waterMark": true,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "Last Modified Date",
+      "unique": false
+    },
+    {
+      "columnName": "null_this",
+      "dataType": {
+        "type": "String"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"test null",
-      "unique":false
-   },
-   {
-      "columnName":"date_type",
-      "dataType":{
-         "type":"date"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "test null",
+      "unique": false
+    },
+    {
+      "columnName": "date_type",
+      "dataType": {
+        "type": "date"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"time_type",
-      "dataType":{
-         "type":"time"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "time_type",
+      "dataType": {
+        "type": "time"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"bytes_type",
-      "dataType":{
-         "type":"bytes"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "bytes_type",
+      "dataType": {
+        "type": "bytes"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"int_type",
-      "dataType":{
-         "type":"int"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "int_type",
+      "dataType": {
+        "type": "int"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"long_type",
-      "dataType":{
-         "type":"long"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "long_type",
+      "dataType": {
+        "type": "long"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"float_type",
-      "dataType":{
-         "type":"float"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "float_type",
+      "dataType": {
+        "type": "float"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"double_type",
-      "dataType":{
-         "type":"double"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "double_type",
+      "dataType": {
+        "type": "double"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":false,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"string_type_null",
-      "dataType":{
-         "type":"string"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": false,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "string_type_null",
+      "dataType": {
+        "type": "string"
       },
-      "waterMark":false,
-      "primaryKey":1,
-      "length":18,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Account ID",
-      "unique":false
-   },
-   {
-      "columnName":"boolean_type_null",
-      "dataType":{
-         "type":"boolean"
+      "waterMark": false,
+      "primaryKey": 1,
+      "length": 18,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Account ID",
+      "unique": false
+    },
+    {
+      "columnName": "boolean_type_null",
+      "dataType": {
+        "type": "boolean"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Deleted",
-      "unique":false
-   },
-   {
-      "columnName":"list_type_null",
-      "dataType":{
-         "items":"string",
-         "type":"array"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Deleted",
+      "unique": false
+    },
+    {
+      "columnName": "list_type_null",
+      "dataType": {
+        "items": "string",
+        "type": "array"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":40,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Salutation",
-      "unique":false
-   },
-   {
-      "columnName":"map_type_null",
-      "dataType":{
-         "values":"string",
-         "type":"map"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 40,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Salutation",
+      "unique": false
+    },
+    {
+      "columnName": "map_type_null",
+      "dataType": {
+        "values": "string",
+        "type": "map"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":40,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Salutation",
-      "unique":false
-   },
-
-   {
-      "columnName":"timestamp_type_null",
-      "dataType":{
-         "type":"timestamp"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 40,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Salutation",
+      "unique": false
+    },
+    {
+      "columnName": "timestamp_type_null",
+      "dataType": {
+        "type": "timestamp"
       },
-      "waterMark":true,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"Last Modified Date",
-      "unique":false
-   },
-   {
-      "columnName":"date_type_null",
-      "dataType":{
-         "type":"date"
+      "waterMark": true,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "Last Modified Date",
+      "unique": false
+    },
+    {
+      "columnName": "date_type_null",
+      "dataType": {
+        "type": "date"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"time_type_null",
-      "dataType":{
-         "type":"time"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "time_type_null",
+      "dataType": {
+        "type": "time"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"bytes_type_null",
-      "dataType":{
-         "type":"bytes"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "bytes_type_null",
+      "dataType": {
+        "type": "bytes"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"int_type_null",
-      "dataType":{
-         "type":"int"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "int_type_null",
+      "dataType": {
+        "type": "int"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"long_type_null",
-      "dataType":{
-         "type":"long"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "long_type_null",
+      "dataType": {
+        "type": "long"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"float_type_null",
-      "dataType":{
-         "type":"float"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "float_type_null",
+      "dataType": {
+        "type": "float"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   },
-   {
-      "columnName":"double_type_null",
-      "dataType":{
-         "type":"double"
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    },
+    {
+      "columnName": "double_type_null",
+      "dataType": {
+        "type": "double"
       },
-      "waterMark":false,
-      "primaryKey":0,
-      "length":0,
-      "precision":0,
-      "scale":0,
-      "isNullable":true,
-      "comment":"testing datatypes",
-      "unique":false
-   }
-]
\ No newline at end of file
+      "waterMark": false,
+      "primaryKey": 0,
+      "length": 0,
+      "precision": 0,
+      "scale": 0,
+      "isNullable": true,
+      "comment": "testing datatypes",
+      "unique": false
+    }
+  ],
+  "record": {
+    "Id": "232498",
+    "IsDeleted": false,
+    "Salutation": [
+      "test",
+      "test1",
+      "test2"
+    ],
+    "MapAccount": {
+      "test": "test",
+      "test1": "test",
+      "test2": "test"
+    },
+    "Industry": "IT",
+    "LastModifiedDate": "2014-01-02 12:23:37",
+    "date_type": "2014-01-02 12:23:37",
+    "time_type": "12:23:37",
+    "bytes_type": "dfkljafnio3ebpoahdjhjh",
+    "int_type": 3,
+    "long_type": 7899083748927,
+    "float_type": 4.3,
+    "double_type": 9.763649736
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-docs/user-guide/Configuration-Properties-Glossary.md
----------------------------------------------------------------------
diff --git a/gobblin-docs/user-guide/Configuration-Properties-Glossary.md b/gobblin-docs/user-guide/Configuration-Properties-Glossary.md
index 363d873..988991b 100644
--- a/gobblin-docs/user-guide/Configuration-Properties-Glossary.md
+++ b/gobblin-docs/user-guide/Configuration-Properties-Glossary.md
@@ -28,7 +28,8 @@ Gobblin also allows you to specify a global configuration file that contains
com
     * [SftpExtractor Properties](#SftpExtractor-Properties)  
 * [Converter Properties](#Converter-Properties)
   * [CsvToJsonConverter Properties](#CsvToJsonConverter-Properties)    
-  * [JsonIntermediateToAvroConverter Properties](#JsonIntermediateToAvroConverter-Properties)
 
+  * [JsonIntermediateToAvroConverter Properties](#JsonIntermediateToAvroConverter-Properties)
+  * [JsonStringToJsonIntermediateConverter Properties](#JsonStringToJsonIntermediateConverter-Properties)
   * [AvroFilterConverter Properties](#AvroFilterConverter-Properties)  
   * [AvroFieldRetrieverConverter Properties](#AvroFieldRetrieverConverter-Properties)  
   * [AvroFieldsPickConverter Properties](#AvroFieldsPickConverter-Properties)  
@@ -821,28 +822,28 @@ None
 Yes
 ## JsonIntermediateToAvroConverter Properties <a name="JsonIntermediateToAvroConverter-Properties"></a>
 This converter takes in JSON data in a specific schema, and converts it to Avro data.
-#### converter.avro.date.format 
+#### converter.avro.date.format
 ###### Description
 Source format of the date columns for Avro-related converters.
 ###### Default Value
 None
 ###### Required
 No
-#### converter.avro.timestamp.format 
+#### converter.avro.timestamp.format
 ###### Description
 Source format of the timestamp columns for Avro-related converters.
 ###### Default Value
 None
 ###### Required
 No
-#### converter.avro.time.format 
+#### converter.avro.time.format
 ###### Description
 Source format of the time columns for Avro-related converters.
 ###### Default Value
 None
 ###### Required
 No
-#### converter.avro.binary.charset 
+#### converter.avro.binary.charset
 ###### Description
 Source format of the time columns for Avro-related converters.
 ###### Default Value
@@ -863,6 +864,28 @@ This converter is will fail for this many number of records before throwing
an e
 0
 ###### Required
 No
+#### converter.avro.nullify.fields.enabled
+###### Description
+Generate new avro schema by nullifying fields that previously existed but not in the current
schema.
+###### Default Value
+false
+###### Required
+No
+#### converter.avro.nullify.fields.original.schema.path
+###### Description
+Path of the original avro schema which will be used for merging and nullify fields.
+###### Default Value
+None
+###### Required
+No
+## JsonStringToJsonIntermediateConverter Properties <a name="JsonStringToJsonIntermediateConverter-Properties"></a>
+#### gobblin.converter.jsonStringToJsonIntermediate.unpackComplexSchemas
+###### Description
+Parse nested JSON record using source.schema.
+###### Default Value
+True
+###### Required
+No
 ## AvroFilterConverter Properties <a name="AvroFilterConverter-Properties"></a>
 This converter takes in an Avro record, and filters out records by performing an equality
operation on the value of the field specified by converter.filter.field and the value specified
in converter.filter.value. It returns the record unmodified if the equality operation evaluates
to true, false otherwise.
 #### converter.filter.field


Mime
View raw message