gobblin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [2/3] incubator-gobblin git commit: [GOBBLIN-226] Nested schema support in JsonStringToJsonIntermediateConverter and JsonIntermediateToAvroConverter
Date Wed, 18 Oct 2017 05:34:05 GMT
http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/JsonElementConversionFactoryTest.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/JsonElementConversionFactoryTest.json b/gobblin-core/src/test/resources/converter/JsonElementConversionFactoryTest.json
new file mode 100644
index 0000000..e1fc2cd
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/JsonElementConversionFactoryTest.json
@@ -0,0 +1,856 @@
+{
+  "schemaWithArrayOfMaps": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "array",
+        "items": {
+          "dataType": {
+            "type": "map",
+            "values": "string"
+          }
+        }
+      }
+    },
+    {
+      "type": "array",
+      "items": {
+        "type": "map",
+        "values": {
+          "type": "string",
+          "source.type": "string"
+        },
+        "source.type": "map"
+      },
+      "source.type": "array"
+    }
+  ],
+  "schemaWithArrayOfRecords": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "array",
+        "items": {
+          "dataType": {
+            "type": "record",
+            "namespace": "org.foo",
+            "values": [
+              {
+                "columnName": "name",
+                "dataType": {
+                  "type": "string"
+                }
+              },
+              {
+                "columnName": "c",
+                "dataType": {
+                  "type": "long"
+                }
+              },
+              {
+                "columnName": "cc",
+                "dataType": {
+                  "type": "array",
+                  "items": "int"
+                }
+              }
+            ]
+          }
+        }
+      }
+    },
+    {
+      "type": "array",
+      "items": {
+        "type": "record",
+        "doc": "",
+        "fields": [
+          {
+            "name": "name",
+            "type": {
+              "type": "string",
+              "source.type": "string"
+            },
+            "doc": "",
+            "source.type": "string"
+          },
+          {
+            "name": "c",
+            "type": {
+              "type": "long",
+              "source.type": "long"
+            },
+            "doc": "",
+            "source.type": "long"
+          },
+          {
+            "name": "cc",
+            "type": {
+              "type": "array",
+              "items": {
+                "type": "int",
+                "source.type": "int"
+              },
+              "source.type": "array"
+            },
+            "doc": "",
+            "source.type": "array"
+          }
+        ],
+        "source.type": "record"
+      },
+      "source.type": "array"
+    }
+  ],
+  "schemaWithRecord": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "c",
+            "dataType": {
+              "type": "string"
+            }
+          },
+          {
+            "columnName": "d",
+            "dataType": {
+              "type": "int"
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "doc": "",
+      "fields": [
+        {
+          "name": "c",
+          "type": {
+            "type": "string",
+            "source.type": "string"
+          },
+          "doc": "",
+          "source.type": "string"
+        },
+        {
+          "name": "d",
+          "type": {
+            "type": "int",
+            "source.type": "int"
+          },
+          "doc": "",
+          "source.type": "int"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithArrayOfInts": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "array",
+        "items": "int"
+      }
+    },
+    {
+      "type": "array",
+      "items": {
+        "type": "int",
+        "source.type": "int"
+      },
+      "source.type": "array"
+    }
+  ],
+  "schemaWithArrayOfEnums": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "array",
+        "items": {
+          "dataType": {
+            "type": "enum",
+            "namespace": "org.foo",
+            "name": "choice",
+            "symbols": [
+              "YES",
+              "NO"
+            ]
+          }
+        }
+      }
+    },
+    {
+      "type": "array",
+      "items": {
+        "type": "enum",
+        "name": "choice",
+        "doc": "",
+        "symbols": [
+          "YES",
+          "NO"
+        ],
+        "source.type": "enum"
+      },
+      "source.type": "array"
+    }
+  ],
+  "schemaWithMap": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": "map",
+        "values": "string"
+      }
+    },
+    {
+      "type": "map",
+      "values": {
+        "type": "string",
+        "source.type": "string"
+      },
+      "source.type": "map"
+    }
+  ],
+  "schemaWithMapOfRecords": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "map",
+        "values": {
+          "dataType": {
+            "type": "record",
+            "values": [
+              {
+                "columnName": "name",
+                "dataType": {
+                  "type": "string"
+                }
+              },
+              {
+                "columnName": "age",
+                "dataType": {
+                  "type": "int"
+                }
+              }
+            ]
+          }
+        }
+      }
+    },
+    {
+      "type": "map",
+      "values": {
+        "type": "record",
+        "doc": "",
+        "fields": [
+          {
+            "name": "name",
+            "type": {
+              "type": "string",
+              "source.type": "string"
+            },
+            "doc": "",
+            "source.type": "string"
+          },
+          {
+            "name": "age",
+            "type": {
+              "type": "int",
+              "source.type": "int"
+            },
+            "doc": "",
+            "source.type": "int"
+          }
+        ],
+        "source.type": "record"
+      },
+      "source.type": "map"
+    }
+  ],
+  "schemaWithMapOfArrays": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "map",
+        "values": {
+          "dataType": {
+            "type": "array",
+            "items": "int"
+          }
+        }
+      }
+    },
+    {
+      "type": "map",
+      "values": {
+        "type": "array",
+        "items": {
+          "type": "int",
+          "source.type": "int"
+        },
+        "source.type": "array"
+      },
+      "source.type": "map"
+    }
+  ],
+  "schemaWithMapOfEnum": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "map",
+        "values": {
+          "dataType": {
+            "type": "enum",
+            "name": "choice",
+            "symbols": [
+              "YES",
+              "NO"
+            ]
+          }
+        }
+      }
+    },
+    {
+      "type": "map",
+      "values": {
+        "type": "enum",
+        "name": "choice",
+        "doc": "",
+        "symbols": [
+          "YES",
+          "NO"
+        ],
+        "source.type": "enum"
+      },
+      "source.type": "map"
+    }
+  ],
+  "schemaWithRecordOfMap": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "type": "map",
+              "values": "string"
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "map",
+            "values": {
+              "type": "string",
+              "source.type": "string"
+            },
+            "source.type": "map"
+          },
+          "doc": "",
+          "source.type": "map"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithRecordOfArray": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "type": "array",
+              "items": "int"
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "array",
+            "items": {
+              "type": "int",
+              "source.type": "int"
+            },
+            "source.type": "array"
+          },
+          "doc": "",
+          "source.type": "array"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithRecordOfEnum": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "name": "choice",
+              "type": "enum",
+              "symbols": [
+                "YES",
+                "NO"
+              ]
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "enum",
+            "name": "choice",
+            "doc": "",
+            "symbols": [
+              "YES",
+              "NO"
+            ],
+            "source.type": "enum"
+          },
+          "doc": "",
+          "source.type": "enum"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithMapValuesAsJsonArray": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "type": "map",
+              "values": [
+                "string"
+              ]
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "schemaWithMapValuesAsJsonNull": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "type": "map",
+              "values": null
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "schemaWithRecordOfRecord": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "name": "choice",
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "s",
+                  "dataType": {
+                    "type": "int"
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "record",
+            "name": "choice",
+            "doc": "",
+            "fields": [
+              {
+                "name": "s",
+                "type": {
+                  "type": "int",
+                  "source.type": "int"
+                },
+                "doc": "",
+                "source.type": "int"
+              }
+            ],
+            "source.type": "record"
+          },
+          "doc": "",
+          "source.type": "record"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithRecordOfRecordCheckNamespace": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "name": "myrecord",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "name": "choice",
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "s",
+                  "dataType": {
+                    "type": "int"
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "name": "myrecord",
+      "namespace": "namespace.person",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "record",
+            "name": "choice",
+            "namespace": "namespace.person.myrecord",
+            "doc": "",
+            "fields": [
+              {
+                "name": "s",
+                "type": {
+                  "type": "int",
+                  "source.type": "int"
+                },
+                "doc": "",
+                "source.type": "int"
+              }
+            ],
+            "source.type": "record"
+          },
+          "doc": "",
+          "source.type": "record"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithRecordOfEnumCheckNamespace": [
+    {
+      "columnName": "persons",
+      "dataType": {
+        "type": "record",
+        "name": "myrecord",
+        "values": [
+          {
+            "columnName": "someperson",
+            "dataType": {
+              "name": "choice",
+              "type": "enum",
+              "symbols": [
+                "YES",
+                "NO"
+              ]
+            }
+          }
+        ]
+      }
+    },
+    {
+      "type": "record",
+      "name": "myrecord",
+      "namespace": "namespace.something",
+      "doc": "",
+      "fields": [
+        {
+          "name": "someperson",
+          "type": {
+            "type": "enum",
+            "name": "choice",
+            "namespace": "namespace.something.myrecord",
+            "doc": "",
+            "symbols": [
+              "YES",
+              "NO"
+            ],
+            "source.type": "enum"
+          },
+          "doc": "",
+          "source.type": "enum"
+        }
+      ],
+      "source.type": "record"
+    }
+  ],
+  "schemaWithUnion": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": [
+          "null",
+          "string"
+        ]
+      }
+    },
+    [
+      {
+        "type": "null",
+        "source.type": "null"
+      },
+      {
+        "type": "string",
+        "source.type": "string"
+      }
+    ]
+  ],
+  "schemaWithComplexUnion": [
+    {
+      "columnName": "b",
+      "dataType": {
+        "type": [
+          "null",
+          {
+            "dataType": {
+              "type": "enum",
+              "name": "someenum",
+              "symbols": [
+                "HELL",
+                "BELLS"
+              ]
+            }
+          }
+        ]
+      }
+    },
+    [
+      {
+        "type": "null",
+        "source.type": "null"
+      },
+      {
+        "type": "enum",
+        "name": "someenum",
+        "doc": "",
+        "symbols": [
+          "HELL",
+          "BELLS"
+        ],
+        "source.type": "enum"
+      }
+    ]
+  ],
+  "schemaWithIsNullable": [
+    {
+      "columnName": "b",
+      "isNullable": true,
+      "dataType": {
+        "type": "string"
+      }
+    },
+    [
+      "null",
+      {
+        "type": "string",
+        "source.type": "string"
+      }
+    ]
+  ],
+  "schemaWithRecordIsNullable": [
+    {
+      "columnName": "b",
+      "isNullable": true,
+      "dataType": {
+        "name": "first_record",
+        "type": "record",
+        "values": [
+          {
+            "columnName": "c",
+            "isNullable": true,
+            "dataType": {
+              "name": "second_record",
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "d",
+                  "dataType": {
+                    "type": "string",
+                    "isNullable": true
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    },
+    [
+      "null",
+      {
+        "type": "record",
+        "name": "first_record",
+        "namespace": "namespace.something",
+        "doc": "",
+        "fields": [
+          {
+            "name": "c",
+            "type": [
+              "null",
+              {
+                "type": "record",
+                "name": "second_record",
+                "namespace": "namespace.something.first_record",
+                "doc": "",
+                "fields": [
+                  {
+                    "name": "d",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "",
+                    "source.type": "string"
+                  }
+                ],
+                "source.type": "record"
+              }
+            ],
+            "doc": "",
+            "default": null,
+            "source.type": "record"
+          }
+        ],
+        "source.type": "record"
+      }
+    ]
+  ],
+  "schemaWithMapIsNullable": [
+    {
+      "columnName": "b",
+      "isNullable": true,
+      "dataType": {
+        "type": "map",
+        "values": "int"
+      }
+    },
+    [
+      "null",
+      {
+        "type": "map",
+        "values": {
+          "type": "int",
+          "source.type": "int"
+        },
+        "source.type": "map"
+      }
+    ]
+  ],
+  "schemaWithEnumIsNullable": [
+    {
+      "columnName": "b",
+      "isNullable": true,
+      "dataType": {
+        "type": "enum",
+        "name": "choice",
+        "symbols": [
+          "YES",
+          "NO"
+        ]
+      }
+    },
+    [
+      "null",
+      {
+        "type": "enum",
+        "name": "choice",
+        "namespace": "something",
+        "doc": "",
+        "symbols": [
+          "YES",
+          "NO"
+        ],
+        "source.type": "enum"
+      }
+    ]
+  ],
+  "schemaWithArrayIsNullable": [
+    {
+      "columnName": "b",
+      "isNullable": true,
+      "dataType": {
+        "type": "array",
+        "items": "int"
+      }
+    },
+    [
+      "null",
+      {
+        "type": "array",
+        "items": {
+          "type": "int",
+          "source.type": "int"
+        },
+        "source.type": "array"
+      }
+    ]
+  ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/JsonStringToJsonIntermediateConverter.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/JsonStringToJsonIntermediateConverter.json b/gobblin-core/src/test/resources/converter/JsonStringToJsonIntermediateConverter.json
new file mode 100644
index 0000000..3b2957d
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/JsonStringToJsonIntermediateConverter.json
@@ -0,0 +1,919 @@
+{
+  "emptyJson": [
+    {},
+    [],
+    {}
+  ],
+  "jsonWithNullValue": [
+    {
+      "a": null
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "null"
+        }
+      }
+    ],
+    {
+      "a": null
+    }
+  ],
+  "jsonWithFloat": [
+    {
+      "a": 0.8
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "float"
+        }
+      }
+    ],
+    {
+      "a": 0.8
+    }
+  ],
+  "jsonWithBytes": [
+    {
+      "a": "\\u00FF"
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "bytes"
+        }
+      }
+    ],
+    {
+      "a": "\\u00FF"
+    }
+  ],
+  "jsonWithExtraFieldsThanSchema": [
+    {
+      "a": 1,
+      "b": 6
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "int"
+        }
+      }
+    ],
+    {
+      "a": 1
+    }
+  ],
+  "jsonWithCompleteFieldsInSchema": [
+    {
+      "a": "somename",
+      "b": 6
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      }
+    ],
+    {
+      "a": "somename"
+    }
+  ],
+  "jsonWithBooleanValue": [
+    {
+      "a": "somename",
+      "b": true
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "boolean"
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": true
+    }
+  ],
+  "jsonWithArrayOfInts": [
+    {
+      "a": "somename",
+      "b": [
+        1,
+        2,
+        3
+      ]
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": "int"
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": [
+        1,
+        2,
+        3
+      ]
+    }
+  ],
+  "jsonWithSingleKVMapAsValue": [
+    {
+      "a": {
+        "b": "somename"
+      }
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "map",
+          "values": "string"
+        }
+      }
+    ],
+    {
+      "a": {
+        "b": "somename"
+      }
+    }
+  ],
+  "jsonWithTwoKVMaps": [
+    {
+      "a": {
+        "b": "somename",
+        "count": 6
+      }
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "b",
+              "dataType": {
+                "type": "string"
+              }
+            },
+            {
+              "columnName": "count",
+              "dataType": {
+                "type": "int"
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "a": {
+        "b": "somename",
+        "count": 6
+      }
+    }
+  ],
+  "jsonWithArrayOfSingleKVMap": [
+    {
+      "a": "somename",
+      "b": [
+        {
+          "c": "1"
+        },
+        {
+          "d": "1"
+        }
+      ]
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": {
+            "dataType": {
+              "type": "map",
+              "values": "string"
+            }
+          }
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": [
+        {
+          "c": "1"
+        },
+        {
+          "d": "1"
+        }
+      ]
+    }
+  ],
+  "jsonWithArrayOfTwoRecords": [
+    {
+      "a": "somename",
+      "b": [
+        {
+          "name": "me",
+          "c": 1
+        },
+        {
+          "name": "me",
+          "c": 1
+        }
+      ]
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": {
+            "dataType": {
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "name",
+                  "dataType": {
+                    "type": "string"
+                  }
+                },
+                {
+                  "columnName": "c",
+                  "dataType": {
+                    "type": "long"
+                  }
+                }
+              ]
+            }
+          }
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": [
+        {
+          "name": "me",
+          "c": 1
+        },
+        {
+          "name": "me",
+          "c": 1
+        }
+      ]
+    }
+  ],
+  "jsonWithArrayOfTwoMap": [
+    {
+      "a": "somename",
+      "b": [
+        {
+          "d": "1"
+        },
+        {
+          "d": "1"
+        }
+      ]
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": {
+            "dataType": {
+              "type": "map",
+              "values": "string"
+            }
+          }
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": [
+        {
+          "d": "1"
+        },
+        {
+          "d": "1"
+        }
+      ]
+    }
+  ],
+  "jsonWithRecord": [
+    {
+      "a": "somename",
+      "b": {
+        "c": "1",
+        "d": 1
+      }
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "c",
+              "dataType": {
+                "type": "string"
+              }
+            },
+            {
+              "columnName": "d",
+              "dataType": {
+                "type": "int"
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": {
+        "c": "1",
+        "d": 1
+      }
+    }
+  ],
+  "jsonWithRecordInSchemaButNotInData": [
+    {
+      "a": "somename",
+      "b": {}
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "c",
+              "dataType": {
+                "type": "string"
+              }
+            },
+            {
+              "columnName": "d",
+              "dataType": {
+                "type": "int"
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": {
+        "c": null,
+        "d": null
+      }
+    }
+  ],
+  "jsonWithFixedType": [
+    {
+      "a": 1,
+      "b": "hello"
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "int"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "fixed",
+          "size": 5,
+          "name": "otp"
+        }
+      }
+    ],
+    {
+      "a": 1,
+      "b": "hello"
+    }
+  ],
+  "jsonWithEnums": [
+    {
+      "a": "somename",
+      "b": "HELL"
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "enum",
+          "symbols": [
+            "HELL",
+            "BELLS"
+          ]
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": "HELL"
+    }
+  ],
+  "jsonWithMapOfRecords": [
+    {
+      "persons": {
+        "someperson": {
+          "name": "someone",
+          "age": 11
+        },
+        "otherperson": {
+          "name": "someoneelse",
+          "age": 12
+        }
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "map",
+          "values": {
+            "dataType": {
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "name",
+                  "dataType": {
+                    "type": "string"
+                  }
+                },
+                {
+                  "columnName": "age",
+                  "dataType": {
+                    "type": "int"
+                  }
+                }
+              ]
+            }
+          }
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": {
+          "name": "someone",
+          "age": 11
+        },
+        "otherperson": {
+          "name": "someoneelse",
+          "age": 12
+        }
+      }
+    }
+  ],
+  "jsonWithMapOfArray": [
+    {
+      "persons": {
+        "someperson": [
+          10,
+          20
+        ],
+        "otherperson": [
+          20,
+          50
+        ]
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "map",
+          "values": {
+            "dataType": {
+              "type": "array",
+              "items": "int"
+            }
+          }
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": [
+          10,
+          20
+        ],
+        "otherperson": [
+          20,
+          50
+        ]
+      }
+    }
+  ],
+  "jsonWithMapOfEnum": [
+    {
+      "persons": {
+        "someperson": "YES",
+        "otherperson": "NO"
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "map",
+          "values": {
+            "dataType": {
+              "name": "choice",
+              "type": "enum",
+              "symbols": [
+                "YES",
+                "NO"
+              ]
+            }
+          }
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": "YES",
+        "otherperson": "NO"
+      }
+    }
+  ],
+  "jsonWithRecordContainingArray": [
+    {
+      "persons": {
+        "someperson": [
+          10,
+          20
+        ]
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "someperson",
+              "dataType": {
+                "type": "array",
+                "items": "int"
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": [
+          10,
+          20
+        ]
+      }
+    }
+  ],
+  "jsonWithRecordContainingEnums": [
+    {
+      "persons": {
+        "someperson": "YES"
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "someperson",
+              "dataType": {
+                "name": "choice",
+                "type": "enum",
+                "symbols": [
+                  "YES",
+                  "NO"
+                ]
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": "YES"
+      }
+    }
+  ],
+  "jsonWithRecordContainingMap": [
+    {
+      "persons": {
+        "someperson": {
+          "1": "2"
+        }
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "record",
+          "values": [
+            {
+              "columnName": "someperson",
+              "dataType": {
+                "type": "map",
+                "values": "string"
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": {
+          "1": "2"
+        }
+      }
+    }
+  ],
+  "jsonWithArrayOfMapContainingRecord": [
+    {
+      "a": "somename",
+      "b": [
+        {
+          "d": {
+            "age": "10"
+          }
+        },
+        {
+          "d": {
+            "age": "1"
+          }
+        }
+      ]
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": {
+            "dataType": {
+              "type": "map",
+              "values": {
+                "dataType": {
+                  "type": "record",
+                  "values": [
+                    {
+                      "columnName": "age",
+                      "dataType": {
+                        "type": "int"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": [
+        {
+          "d": {
+            "age": "10"
+          }
+        },
+        {
+          "d": {
+            "age": "1"
+          }
+        }
+      ]
+    }
+  ],
+  "jsonWithMapOfMap": [
+    {
+      "persons": {
+        "someperson": "5"
+      }
+    },
+    [
+      {
+        "columnName": "persons",
+        "dataType": {
+          "type": "map",
+          "values": {
+            "dataType": {
+              "type": "map",
+              "values": "string"
+            }
+          }
+        }
+      }
+    ],
+    {
+      "persons": {
+        "someperson": "5"
+      }
+    }
+  ],
+  "jsonWithArrayOfNulls": [
+    {
+      "b": [
+        null,
+        null
+      ]
+    },
+    [
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": "array",
+          "items": "null"
+        }
+      }
+    ],
+    {
+      "b": [
+        null,
+        null
+      ]
+    }
+  ],
+  "jsonWithUnionType": [
+    {
+      "b": "hello"
+    },
+    [
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": [
+            "null",
+            "string"
+          ]
+        }
+      }
+    ],
+    {
+      "b": "hello"
+    }
+  ],
+  "jsonWithUnionNullAndEnums": [
+    {
+      "a": "somename",
+      "b": "HELL"
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": [
+            "null",
+            {
+              "dataType": {
+                "type": "enum",
+                "symbols": [
+                  "HELL",
+                  "BELLS"
+                ]
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": "HELL"
+    }
+  ],
+  "jsonWithUnionNullAndEnums1": [
+    {
+      "a": "somename",
+      "b": null
+    },
+    [
+      {
+        "columnName": "a",
+        "dataType": {
+          "type": "string"
+        }
+      },
+      {
+        "columnName": "b",
+        "dataType": {
+          "type": [
+            "null",
+            {
+              "dataType": {
+                "type": "enum",
+                "symbols": [
+                  "HELL",
+                  "BELLS"
+                ]
+              }
+            }
+          ]
+        }
+      }
+    ],
+    {
+      "a": "somename",
+      "b": null
+    }
+  ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/complex1.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/complex1.json b/gobblin-core/src/test/resources/converter/complex1.json
new file mode 100644
index 0000000..40335fb
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/complex1.json
@@ -0,0 +1,527 @@
+{
+  "schema": [
+    {
+      "columnName": "User",
+      "namespace": "com.example.avro",
+      "comment": "This is a user record in a fictitious to-do-list management app. It supports arbitrary grouping and nesting of items, and allows you to add items by email or by tweeting.\n\nNote this app doesn't actually exist. The schema is just a demo for [Avrodoc](https://github.com/ept/avrodoc)!",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "id",
+            "comment": "System-assigned numeric user ID. Cannot be changed by the user.",
+            "dataType": {
+              "type": "int"
+            }
+          },
+          {
+            "columnName": "username",
+            "comment": "The username chosen by the user. Can be changed by the user.",
+            "dataType": {
+              "type": "string"
+            }
+          },
+          {
+            "columnName": "passwordHash",
+            "comment": "The user's password, hashed using [scrypt](http://www.tarsnap.com/scrypt.html).",
+            "dataType": {
+              "type": "string"
+            }
+          },
+          {
+            "columnName": "signupDate",
+            "comment": "Timestamp (milliseconds since epoch) when the user signed up",
+            "dataType": {
+              "type": "long"
+            }
+          },
+          {
+            "columnName": "emailAddresses",
+            "comment": "All email addresses on the user's account",
+            "dataType": {
+              "type": "array",
+              "items": {
+                "dataType": {
+                  "type": "record",
+                  "name": "EmailAddress",
+                  "comment": "Stores details about an email address that a user has associated with their account.",
+                  "values": [
+                    {
+                      "columnName": "address",
+                      "comment": "The email address, e.g. `foo@example.com`",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "verified",
+                      "comment": "true if the user has clicked the link in a confirmation email to this address.",
+                      "dataType": {
+                        "type": "boolean"
+                      },
+                      "default": false
+                    },
+                    {
+                      "columnName": "dateAdded",
+                      "comment": "Timestamp (milliseconds since epoch) when the email address was added to the account.",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    },
+                    {
+                      "columnName": "dateBounced",
+                      "comment": "Timestamp (milliseconds since epoch) when an email sent to this address last bounced. Reset to null when the address no longer bounces.",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          },
+          {
+            "columnName": "twitterAccounts",
+            "comment": "All Twitter accounts that the user has OAuthed",
+            "dataType": {
+              "type": "array",
+              "items": {
+                "dataType": {
+                  "type": "record",
+                  "columnName": "TwitterAccount",
+                  "comment": "Stores access credentials for one Twitter account, as granted to us by the user by OAuth.",
+                  "values": [
+                    {
+                      "columnName": "status",
+                      "comment": "Indicator of whether this authorization is currently active, or has been revoked",
+                      "dataType": {
+                        "type": "enum",
+                        "name": "OAuthStatus",
+                        "comment": "* `PENDING`: the user has started authorizing, but not yet finished\n* `ACTIVE`: the token should work\n* `DENIED`: the user declined the authorization\n* `EXPIRED`: the token used to work, but now it doesn't\n* `REVOKED`: the user has explicitly revoked the token",
+                        "symbols": [
+                          "PENDING",
+                          "ACTIVE",
+                          "DENIED",
+                          "EXPIRED",
+                          "REVOKED"
+                        ]
+                      }
+                    },
+                    {
+                      "columnName": "userId",
+                      "comment": "Twitter's numeric ID for this user",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    },
+                    {
+                      "columnName": "screenName",
+                      "comment": "The twitter username for this account (can be changed by the user)",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "oauthToken",
+                      "comment": "The OAuth token for this Twitter account",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "oauthTokenSecret",
+                      "comment": "The OAuth secret, used for signing requests on behalf of this Twitter account. `null` whilst the OAuth flow is not yet complete.",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "dateAuthorized",
+                      "comment": "Timestamp (milliseconds since epoch) when the user last authorized this Twitter account",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          },
+          {
+            "columnName": "toDoItems",
+            "comment": "The top-level items in the user's to-do list",
+            "dataType": {
+              "type": "array",
+              "items": {
+                "dataType": {
+                  "type": "record",
+                  "name": "ToDoItem",
+                  "comment": "A record is one node in a To-Do item tree (every record can contain nested sub-records).",
+                  "values": [
+                    {
+                      "columnName": "status",
+                      "comment": "User-selected state for this item (e.g. whether or not it is marked as done)",
+                      "dataType": {
+                        "type": "enum",
+                        "name": "ToDoStatus",
+                        "comment": "* `HIDDEN`: not currently visible, e.g. because it becomes actionable in future\n* `ACTIONABLE`: appears in the current to-do list\n* `DONE`: marked as done, but still appears in the list\n* `ARCHIVED`: marked as done and no longer visible\n* `DELETED`: not done and removed from list (preserved for undo purposes)",
+                        "symbols": [
+                          "HIDDEN",
+                          "ACTIONABLE",
+                          "DONE",
+                          "ARCHIVED",
+                          "DELETED"
+                        ]
+                      }
+                    },
+                    {
+                      "columnName": "title",
+                      "comment": "One-line summary of the item",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "description",
+                      "comment": "Detailed description (may contain HTML markup)",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "snoozeDate",
+                      "comment": "Timestamp (milliseconds since epoch) at which the item should go from `HIDDEN` to `ACTIONABLE` status",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "record": {
+    "User": {
+      "id": 1231,
+      "username": "hewhomustnotbenamed",
+      "passwordHash": "avadakedavara",
+      "signupDate": 1503651112419,
+      "emailAddresses": [
+        {
+          "address": "vold@example.com",
+          "verified": true,
+          "dateAdded": 1503651112419,
+          "dateBounced": 1503651112419
+        }
+      ],
+      "twitterAccounts": [
+        {
+          "status": "ACTIVE",
+          "userId": 555,
+          "screenName": "hewhomustnotbenamed",
+          "oauthToken": "sdfsds",
+          "oauthTokenSecret": "dfsdsds",
+          "dateAuthorized": 1503651112419
+        }
+      ],
+      "toDoItems": [
+        {
+          "status": "HIDDEN",
+          "title": "Kill the boy",
+          "description": "AvadaKedavara",
+          "snoozeDate": 1503651112419
+        }
+      ]
+    }
+  },
+  "expectedSchema": {
+    "type": "record",
+    "name": "dummy_table",
+    "namespace": "namespace",
+    "doc": "",
+    "fields": [
+      {
+        "name": "User",
+        "type": {
+          "type": "record",
+          "namespace": "",
+          "doc": "",
+          "fields": [
+            {
+              "name": "id",
+              "type": {
+                "type": "int",
+                "source.type": "int"
+              },
+              "doc": "System-assigned numeric user ID. Cannot be changed by the user.",
+              "source.type": "int"
+            },
+            {
+              "name": "username",
+              "type": {
+                "type": "string",
+                "source.type": "string"
+              },
+              "doc": "The username chosen by the user. Can be changed by the user.",
+              "source.type": "string"
+            },
+            {
+              "name": "passwordHash",
+              "type": {
+                "type": "string",
+                "source.type": "string"
+              },
+              "doc": "The user's password, hashed using [scrypt](http://www.tarsnap.com/scrypt.html).",
+              "source.type": "string"
+            },
+            {
+              "name": "signupDate",
+              "type": {
+                "type": "long",
+                "source.type": "long"
+              },
+              "doc": "Timestamp (milliseconds since epoch) when the user signed up",
+              "source.type": "long"
+            },
+            {
+              "name": "emailAddresses",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "name": "EmailAddress",
+                  "doc": "",
+                  "fields": [
+                    {
+                      "name": "address",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The email address, e.g. `foo@example.com`",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "verified",
+                      "type": {
+                        "type": "boolean",
+                        "source.type": "boolean"
+                      },
+                      "doc": "true if the user has clicked the link in a confirmation email to this address.",
+                      "source.type": "boolean"
+                    },
+                    {
+                      "name": "dateAdded",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) when the email address was added to the account.",
+                      "source.type": "long"
+                    },
+                    {
+                      "name": "dateBounced",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) when an email sent to this address last bounced. Reset to null when the address no longer bounces.",
+                      "source.type": "long"
+                    }
+                  ],
+                  "source.type": "record"
+                },
+                "source.type": "array"
+              },
+              "doc": "All email addresses on the user's account",
+              "source.type": "array"
+            },
+            {
+              "name": "twitterAccounts",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "doc": "",
+                  "fields": [
+                    {
+                      "name": "status",
+                      "type": {
+                        "type": "enum",
+                        "name": "OAuthStatus",
+                        "doc": "",
+                        "symbols": [
+                          "PENDING",
+                          "ACTIVE",
+                          "DENIED",
+                          "EXPIRED",
+                          "REVOKED"
+                        ],
+                        "source.type": "enum"
+                      },
+                      "doc": "Indicator of whether this authorization is currently active, or has been revoked",
+                      "source.type": "enum"
+                    },
+                    {
+                      "name": "userId",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Twitter's numeric ID for this user",
+                      "source.type": "long"
+                    },
+                    {
+                      "name": "screenName",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The twitter username for this account (can be changed by the user)",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "oauthToken",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The OAuth token for this Twitter account",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "oauthTokenSecret",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The OAuth secret, used for signing requests on behalf of this Twitter account. `null` whilst the OAuth flow is not yet complete.",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "dateAuthorized",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) when the user last authorized this Twitter account",
+                      "source.type": "long"
+                    }
+                  ],
+                  "source.type": "record"
+                },
+                "source.type": "array"
+              },
+              "doc": "All Twitter accounts that the user has OAuthed",
+              "source.type": "array"
+            },
+            {
+              "name": "toDoItems",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "name": "ToDoItem",
+                  "doc": "",
+                  "fields": [
+                    {
+                      "name": "status",
+                      "type": {
+                        "type": "enum",
+                        "name": "ToDoStatus",
+                        "doc": "",
+                        "symbols": [
+                          "HIDDEN",
+                          "ACTIONABLE",
+                          "DONE",
+                          "ARCHIVED",
+                          "DELETED"
+                        ],
+                        "source.type": "enum"
+                      },
+                      "doc": "User-selected state for this item (e.g. whether or not it is marked as done)",
+                      "source.type": "enum"
+                    },
+                    {
+                      "name": "title",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "One-line summary of the item",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "description",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "Detailed description (may contain HTML markup)",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "snoozeDate",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) at which the item should go from `HIDDEN` to `ACTIONABLE` status",
+                      "source.type": "long"
+                    }
+                  ],
+                  "source.type": "record"
+                },
+                "source.type": "array"
+              },
+              "doc": "The top-level items in the user's to-do list",
+              "source.type": "array"
+            }
+          ],
+          "source.type": "record"
+        },
+        "doc": "This is a user record in a fictitious to-do-list management app. It supports arbitrary grouping and nesting of items, and allows you to add items by email or by tweeting.\n\nNote this app doesn't actually exist. The schema is just a demo for [Avrodoc](https://github.com/ept/avrodoc)!",
+        "source.type": "record"
+      }
+    ],
+    "source.type": "record"
+  },
+  "expectedRecord": {
+    "User": {
+      "id": 1231,
+      "username": "hewhomustnotbenamed",
+      "passwordHash": "avadakedavara",
+      "signupDate": 1503651112419,
+      "emailAddresses": [
+        {
+          "address": "vold@example.com",
+          "verified": true,
+          "dateAdded": 1503651112419,
+          "dateBounced": 1503651112419
+        }
+      ],
+      "twitterAccounts": [
+        {
+          "status": "ACTIVE",
+          "userId": 555,
+          "screenName": "hewhomustnotbenamed",
+          "oauthToken": "sdfsds",
+          "oauthTokenSecret": "dfsdsds",
+          "dateAuthorized": 1503651112419
+        }
+      ],
+      "toDoItems": [
+        {
+          "status": "HIDDEN",
+          "title": "Kill the boy",
+          "description": "AvadaKedavara",
+          "snoozeDate": 1503651112419
+        }
+      ]
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/complex2.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/complex2.json b/gobblin-core/src/test/resources/converter/complex2.json
new file mode 100644
index 0000000..cc7be9c
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/complex2.json
@@ -0,0 +1,186 @@
+{
+  "schema": [
+    {
+      "columnName": "protocolv1",
+      "comment": "This is a protocol description for a fictitious network service.",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "DependencyType",
+            "dataType": {
+              "type": "enum",
+              "name": "depType",
+              "symbols": [
+                "REQUIRED",
+                "OPTIONAL"
+              ]
+            }
+          },
+          {
+            "columnName": "DependencyDetail",
+            "comment": "Details about status of a dependency.",
+            "dataType": {
+              "type": "record",
+              "values": [
+                {
+                  "columnName": "name",
+                  "dataType": {
+                    "type": "string"
+                  },
+                  "comment": "A short name of the service who's status is being reported"
+                },
+                {
+                  "columnName": "descendingField",
+                  "dataType": {
+                    "type": "string"
+                  },
+                  "comment": "A descending sort order field"
+                },
+                {
+                  "columnName": "ignoredField",
+                  "dataType": {
+                    "type": "string"
+                  },
+                  "comment": "An ignored order sort field"
+                },
+                {
+                  "columnName": "status",
+                  "dataType": {
+                    "type": "string"
+                  },
+                  "comment": "A string representing the operational status for this service"
+                },
+                {
+                  "columnName": "timestamp",
+                  "dataType": {
+                    "type": "string"
+                  },
+                  "comment": "A timestamp showing the time at which this particular dependency was compiled."
+                }
+              ]
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "record": {
+    "protocolv1": {
+      "DependencyType": "REQUIRED",
+      "DependencyDetail": {
+        "name": "something",
+        "descendingField": "someorder",
+        "ignoredField": "somefield",
+        "status": "somestatus",
+        "timestamp": "2342323423"
+      }
+    }
+  },
+  "expectedSchema": {
+    "type": "record",
+    "name": "dummy_table",
+    "namespace": "namespace",
+    "doc": "",
+    "fields": [
+      {
+        "name": "protocolv1",
+        "type": {
+          "type": "record",
+          "namespace": "",
+          "doc": "",
+          "fields": [
+            {
+              "name": "DependencyType",
+              "type": {
+                "type": "enum",
+                "name": "depType",
+                "doc": "",
+                "symbols": [
+                  "REQUIRED",
+                  "OPTIONAL"
+                ],
+                "source.type": "enum"
+              },
+              "doc": "",
+              "source.type": "enum"
+            },
+            {
+              "name": "DependencyDetail",
+              "type": {
+                "type": "record",
+                "doc": "",
+                "fields": [
+                  {
+                    "name": "name",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "A short name of the service who's status is being reported",
+                    "source.type": "string"
+                  },
+                  {
+                    "name": "descendingField",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "A descending sort order field",
+                    "source.type": "string"
+                  },
+                  {
+                    "name": "ignoredField",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "An ignored order sort field",
+                    "source.type": "string"
+                  },
+                  {
+                    "name": "status",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "A string representing the operational status for this service",
+                    "source.type": "string"
+                  },
+                  {
+                    "name": "timestamp",
+                    "type": {
+                      "type": "string",
+                      "source.type": "string"
+                    },
+                    "doc": "A timestamp showing the time at which this particular dependency was compiled.",
+                    "source.type": "string"
+                  }
+                ],
+                "source.type": "record"
+              },
+              "doc": "Details about status of a dependency.",
+              "source.type": "record"
+            }
+          ],
+          "source.type": "record"
+        },
+        "doc": "This is a protocol description for a fictitious network service.",
+        "source.type": "record"
+      }
+    ],
+    "source.type": "record"
+  },
+  "expectedRecord": {
+    "protocolv1": {
+      "DependencyType": "REQUIRED",
+      "DependencyDetail": {
+        "name": "something",
+        "descendingField": "someorder",
+        "ignoredField": "somefield",
+        "status": "somestatus",
+        "timestamp": "2342323423"
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/complex3.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/complex3.json b/gobblin-core/src/test/resources/converter/complex3.json
new file mode 100644
index 0000000..6f57998
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/complex3.json
@@ -0,0 +1,548 @@
+{
+  "schema": [
+    {
+      "columnName": "User",
+      "namespace": "com.example.avro",
+      "comment": "This is a user record in a fictitious to-do-list management app. It supports arbitrary grouping and nesting of items, and allows you to add items by email or by tweeting.\n\nNote this app doesn't actually exist. The schema is just a demo for [Avrodoc](https://github.com/ept/avrodoc)!",
+      "dataType": {
+        "type": "record",
+        "values": [
+          {
+            "columnName": "id",
+            "comment": "System-assigned numeric user ID. Cannot be changed by the user.",
+            "isNullable": true,
+            "dataType": {
+              "type": [
+                "int",
+                "null"
+              ]
+            }
+          },
+          {
+            "columnName": "username",
+            "comment": "The username chosen by the user. Can be changed by the user.",
+            "dataType": {
+              "type": "string"
+            }
+          },
+          {
+            "columnName": "passwordHash",
+            "comment": "The user's password, hashed using [scrypt](http://www.tarsnap.com/scrypt.html).",
+            "isNullable": true,
+            "dataType": {
+              "type": [
+                "null",
+                "string"
+              ]
+            }
+          },
+          {
+            "columnName": "signupDate",
+            "comment": "Timestamp (milliseconds since epoch) when the user signed up",
+            "dataType": {
+              "type": "long"
+            }
+          },
+          {
+            "columnName": "emailAddresses",
+            "comment": "All email addresses on the user's account",
+            "dataType": {
+              "type": [
+                "string",
+                {
+                  "dataType": {
+                    "type": "array",
+                    "items": {
+                      "dataType": {
+                        "type": "record",
+                        "name": "EmailAddress",
+                        "comment": "Stores details about an email address that a user has associated with their account.",
+                        "values": [
+                          {
+                            "columnName": "address",
+                            "comment": "The email address, e.g. `foo@example.com`",
+                            "dataType": {
+                              "type": "string"
+                            }
+                          },
+                          {
+                            "columnName": "verified",
+                            "comment": "true if the user has clicked the link in a confirmation email to this address.",
+                            "dataType": {
+                              "type": "boolean"
+                            },
+                            "default": false
+                          },
+                          {
+                            "columnName": "dateAdded",
+                            "comment": "Timestamp (milliseconds since epoch) when the email address was added to the account.",
+                            "dataType": {
+                              "type": "long"
+                            }
+                          },
+                          {
+                            "columnName": "dateBounced",
+                            "comment": "Timestamp (milliseconds since epoch) when an email sent to this address last bounced. Reset to null when the address no longer bounces.",
+                            "dataType": {
+                              "type": "long"
+                            }
+                          }
+                        ]
+                      }
+                    }
+                  }
+                }
+              ]
+            }
+          },
+          {
+            "columnName": "twitterAccounts",
+            "comment": "All Twitter accounts that the user has OAuthed",
+            "dataType": {
+              "type": "array",
+              "items": {
+                "dataType": {
+                  "type": "record",
+                  "columnName": "TwitterAccount",
+                  "comment": "Stores access credentials for one Twitter account, as granted to us by the user by OAuth.",
+                  "values": [
+                    {
+                      "columnName": "status",
+                      "comment": "Indicator of whether this authorization is currently active, or has been revoked",
+                      "dataType": {
+                        "type": "enum",
+                        "name": "OAuthStatus",
+                        "comment": "* `PENDING`: the user has started authorizing, but not yet finished\n* `ACTIVE`: the token should work\n* `DENIED`: the user declined the authorization\n* `EXPIRED`: the token used to work, but now it doesn't\n* `REVOKED`: the user has explicitly revoked the token",
+                        "symbols": [
+                          "PENDING",
+                          "ACTIVE",
+                          "DENIED",
+                          "EXPIRED",
+                          "REVOKED"
+                        ]
+                      }
+                    },
+                    {
+                      "columnName": "userId",
+                      "comment": "Twitter's numeric ID for this user",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    },
+                    {
+                      "columnName": "screenName",
+                      "comment": "The twitter username for this account (can be changed by the user)",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "oauthToken",
+                      "comment": "The OAuth token for this Twitter account",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "oauthTokenSecret",
+                      "comment": "The OAuth secret, used for signing requests on behalf of this Twitter account. `null` whilst the OAuth flow is not yet complete.",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "dateAuthorized",
+                      "comment": "Timestamp (milliseconds since epoch) when the user last authorized this Twitter account",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          },
+          {
+            "columnName": "toDoItems",
+            "comment": "The top-level items in the user's to-do list",
+            "dataType": {
+              "type": "array",
+              "items": {
+                "dataType": {
+                  "type": "record",
+                  "name": "ToDoItem",
+                  "comment": "A record is one node in a To-Do item tree (every record can contain nested sub-records).",
+                  "values": [
+                    {
+                      "columnName": "status",
+                      "comment": "User-selected state for this item (e.g. whether or not it is marked as done)",
+                      "dataType": {
+                        "type": "enum",
+                        "name": "ToDoStatus",
+                        "comment": "* `HIDDEN`: not currently visible, e.g. because it becomes actionable in future\n* `ACTIONABLE`: appears in the current to-do list\n* `DONE`: marked as done, but still appears in the list\n* `ARCHIVED`: marked as done and no longer visible\n* `DELETED`: not done and removed from list (preserved for undo purposes)",
+                        "symbols": [
+                          "HIDDEN",
+                          "ACTIONABLE",
+                          "DONE",
+                          "ARCHIVED",
+                          "DELETED"
+                        ]
+                      }
+                    },
+                    {
+                      "columnName": "title",
+                      "comment": "One-line summary of the item",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "description",
+                      "comment": "Detailed description (may contain HTML markup)",
+                      "dataType": {
+                        "type": "string"
+                      }
+                    },
+                    {
+                      "columnName": "snoozeDate",
+                      "comment": "Timestamp (milliseconds since epoch) at which the item should go from `HIDDEN` to `ACTIONABLE` status",
+                      "dataType": {
+                        "type": "long"
+                      }
+                    }
+                  ]
+                }
+              }
+            }
+          }
+        ]
+      }
+    }
+  ],
+  "record": {
+    "User": {
+      "id": null,
+      "username": "hewhomustnotbenamed",
+      "passwordHash": "avadakedavara",
+      "signupDate": 1503651112419,
+      "emailAddresses": "vold@gmail.com",
+      "twitterAccounts": [
+        {
+          "status": "ACTIVE",
+          "userId": 555,
+          "screenName": "hewhomustnotbenamed",
+          "oauthToken": "sdfsds",
+          "oauthTokenSecret": "dfsdsds",
+          "dateAuthorized": 1503651112419
+        }
+      ],
+      "toDoItems": [
+        {
+          "status": "HIDDEN",
+          "title": "Kill the boy",
+          "description": "AvadaKedavara",
+          "snoozeDate": 1503651112419
+        }
+      ]
+    }
+  },
+  "expectedSchema": {
+    "type": "record",
+    "name": "dummy_table",
+    "namespace": "namespace",
+    "doc": "",
+    "fields": [
+      {
+        "name": "User",
+        "type": {
+          "type": "record",
+          "namespace": "",
+          "doc": "",
+          "fields": [
+            {
+              "name": "id",
+              "type": [
+                {
+                  "type": "int",
+                  "source.type": "int"
+                },
+                {
+                  "type": "null",
+                  "source.type": "null"
+                }
+              ],
+              "doc": "System-assigned numeric user ID. Cannot be changed by the user.",
+              "default": null,
+              "source.type": "union"
+            },
+            {
+              "name": "username",
+              "type": {
+                "type": "string",
+                "source.type": "string"
+              },
+              "doc": "The username chosen by the user. Can be changed by the user.",
+              "source.type": "string"
+            },
+            {
+              "name": "passwordHash",
+              "type": [
+                {
+                  "type": "null",
+                  "source.type": "null"
+                },
+                {
+                  "type": "string",
+                  "source.type": "string"
+                }
+              ],
+              "doc": "The user's password, hashed using [scrypt](http://www.tarsnap.com/scrypt.html).",
+              "default": null,
+              "source.type": "union"
+            },
+            {
+              "name": "signupDate",
+              "type": {
+                "type": "long",
+                "source.type": "long"
+              },
+              "doc": "Timestamp (milliseconds since epoch) when the user signed up",
+              "source.type": "long"
+            },
+            {
+              "name": "emailAddresses",
+              "type": [
+                {
+                  "type": "string",
+                  "source.type": "string"
+                },
+                {
+                  "type": "array",
+                  "items": {
+                    "type": "record",
+                    "name": "EmailAddress",
+                    "doc": "",
+                    "fields": [
+                      {
+                        "name": "address",
+                        "type": {
+                          "type": "string",
+                          "source.type": "string"
+                        },
+                        "doc": "The email address, e.g. `foo@example.com`",
+                        "source.type": "string"
+                      },
+                      {
+                        "name": "verified",
+                        "type": {
+                          "type": "boolean",
+                          "source.type": "boolean"
+                        },
+                        "doc": "true if the user has clicked the link in a confirmation email to this address.",
+                        "source.type": "boolean"
+                      },
+                      {
+                        "name": "dateAdded",
+                        "type": {
+                          "type": "long",
+                          "source.type": "long"
+                        },
+                        "doc": "Timestamp (milliseconds since epoch) when the email address was added to the account.",
+                        "source.type": "long"
+                      },
+                      {
+                        "name": "dateBounced",
+                        "type": {
+                          "type": "long",
+                          "source.type": "long"
+                        },
+                        "doc": "Timestamp (milliseconds since epoch) when an email sent to this address last bounced. Reset to null when the address no longer bounces.",
+                        "source.type": "long"
+                      }
+                    ],
+                    "source.type": "record"
+                  },
+                  "source.type": "array"
+                }
+              ],
+              "doc": "All email addresses on the user's account",
+              "source.type": "union"
+            },
+            {
+              "name": "twitterAccounts",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "doc": "",
+                  "fields": [
+                    {
+                      "name": "status",
+                      "type": {
+                        "type": "enum",
+                        "name": "OAuthStatus",
+                        "doc": "",
+                        "symbols": [
+                          "PENDING",
+                          "ACTIVE",
+                          "DENIED",
+                          "EXPIRED",
+                          "REVOKED"
+                        ],
+                        "source.type": "enum"
+                      },
+                      "doc": "Indicator of whether this authorization is currently active, or has been revoked",
+                      "source.type": "enum"
+                    },
+                    {
+                      "name": "userId",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Twitter's numeric ID for this user",
+                      "source.type": "long"
+                    },
+                    {
+                      "name": "screenName",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The twitter username for this account (can be changed by the user)",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "oauthToken",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The OAuth token for this Twitter account",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "oauthTokenSecret",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "The OAuth secret, used for signing requests on behalf of this Twitter account. `null` whilst the OAuth flow is not yet complete.",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "dateAuthorized",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) when the user last authorized this Twitter account",
+                      "source.type": "long"
+                    }
+                  ],
+                  "source.type": "record"
+                },
+                "source.type": "array"
+              },
+              "doc": "All Twitter accounts that the user has OAuthed",
+              "source.type": "array"
+            },
+            {
+              "name": "toDoItems",
+              "type": {
+                "type": "array",
+                "items": {
+                  "type": "record",
+                  "name": "ToDoItem",
+                  "doc": "",
+                  "fields": [
+                    {
+                      "name": "status",
+                      "type": {
+                        "type": "enum",
+                        "name": "ToDoStatus",
+                        "doc": "",
+                        "symbols": [
+                          "HIDDEN",
+                          "ACTIONABLE",
+                          "DONE",
+                          "ARCHIVED",
+                          "DELETED"
+                        ],
+                        "source.type": "enum"
+                      },
+                      "doc": "User-selected state for this item (e.g. whether or not it is marked as done)",
+                      "source.type": "enum"
+                    },
+                    {
+                      "name": "title",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "One-line summary of the item",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "description",
+                      "type": {
+                        "type": "string",
+                        "source.type": "string"
+                      },
+                      "doc": "Detailed description (may contain HTML markup)",
+                      "source.type": "string"
+                    },
+                    {
+                      "name": "snoozeDate",
+                      "type": {
+                        "type": "long",
+                        "source.type": "long"
+                      },
+                      "doc": "Timestamp (milliseconds since epoch) at which the item should go from `HIDDEN` to `ACTIONABLE` status",
+                      "source.type": "long"
+                    }
+                  ],
+                  "source.type": "record"
+                },
+                "source.type": "array"
+              },
+              "doc": "The top-level items in the user's to-do list",
+              "source.type": "array"
+            }
+          ],
+          "source.type": "record"
+        },
+        "doc": "This is a user record in a fictitious to-do-list management app. It supports arbitrary grouping and nesting of items, and allows you to add items by email or by tweeting.\n\nNote this app doesn't actually exist. The schema is just a demo for [Avrodoc](https://github.com/ept/avrodoc)!",
+        "source.type": "record"
+      }
+    ],
+    "source.type": "record"
+  },
+  "expectedRecord": {
+    "User": {
+      "id": null,
+      "username": "hewhomustnotbenamed",
+      "passwordHash": "avadakedavara",
+      "signupDate": 1503651112419,
+      "emailAddresses": "vold@gmail.com",
+      "twitterAccounts": [
+        {
+          "status": "ACTIVE",
+          "userId": 555,
+          "screenName": "hewhomustnotbenamed",
+          "oauthToken": "sdfsds",
+          "oauthTokenSecret": "dfsdsds",
+          "dateAuthorized": 1503651112419
+        }
+      ],
+      "toDoItems": [
+        {
+          "status": "HIDDEN",
+          "title": "Kill the boy",
+          "description": "AvadaKedavara",
+          "snoozeDate": 1503651112419
+        }
+      ]
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/record.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/record.json b/gobblin-core/src/test/resources/converter/record.json
deleted file mode 100644
index 4a204ff..0000000
--- a/gobblin-core/src/test/resources/converter/record.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-   "Id":"232498",
-   "IsDeleted":false,
-   "Salutation":[
-      "test",
-      "test1",
-      "test2"
-   ],
-   "MapAccount":{
-      "test":"test",
-      "test1":"test",
-      "test2":"test"
-   },
-   "Industry":"IT",
-   "LastModifiedDate":"2014-01-02 12:23:37",
-   "date_type":"2014-01-02 12:23:37",
-   "time_type":"12:23:37",
-   "bytes_type":"dfkljafnio3ebpoahdjhjh",
-   "int_type":3,
-   "long_type":7899083748927,
-   "float_type":4.3,
-   "double_type":9.763649736
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-gobblin/blob/6dd36a50/gobblin-core/src/test/resources/converter/record3.json
----------------------------------------------------------------------
diff --git a/gobblin-core/src/test/resources/converter/record3.json b/gobblin-core/src/test/resources/converter/record3.json
new file mode 100644
index 0000000..772a16b
--- /dev/null
+++ b/gobblin-core/src/test/resources/converter/record3.json
@@ -0,0 +1,27 @@
+{
+  "User": {
+    "id": null,
+    "username": "hewhomustnotbenamed",
+    "passwordHash": "avadakedavara",
+    "signupDate": 1503651112419,
+    "emailAddresses": "vold@gmail.com",
+    "twitterAccounts": [
+      {
+        "status": "ACTIVE",
+        "userId": 555,
+        "screenName": "hewhomustnotbenamed",
+        "oauthToken": "sdfsds",
+        "oauthTokenSecret": "dfsdsds",
+        "dateAuthorized": 1503651112419
+      }
+    ],
+    "toDoItems": [
+      {
+        "status": "HIDDEN",
+        "title": "Kill the boy",
+        "description": "AvadaKedavara",
+        "snoozeDate": 1503651112419
+      }
+    ]
+  }
+}
\ No newline at end of file


Mime
View raw message