sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jar...@apache.org
Subject sqoop git commit: SQOOP-2561: Special Character removal from Column name as avro data results in duplicate column and fails the import
Date Tue, 29 Mar 2016 16:27:41 GMT
Repository: sqoop
Updated Branches:
  refs/heads/trunk 0c8b10548 -> 1dd50cfb2


SQOOP-2561: Special Character removal from Column name as avro data results in duplicate column
and fails the import

(VISHNU S NAIR via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/1dd50cfb
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/1dd50cfb
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/1dd50cfb

Branch: refs/heads/trunk
Commit: 1dd50cfb2ae327b0df8393dd96d1adb86bb2f65f
Parents: 0c8b105
Author: Jarek Jarcec Cecho <jarcec@apache.org>
Authored: Tue Mar 29 09:26:47 2016 -0700
Committer: Jarek Jarcec Cecho <jarcec@apache.org>
Committed: Tue Mar 29 09:26:47 2016 -0700

----------------------------------------------------------------------
 src/java/org/apache/sqoop/avro/AvroUtil.java    |  2 +-
 src/test/com/cloudera/sqoop/TestAvroImport.java | 36 ++++++++++++++++++--
 2 files changed, 35 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/1dd50cfb/src/java/org/apache/sqoop/avro/AvroUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java
index 90cc9d0..319be0f 100644
--- a/src/java/org/apache/sqoop/avro/AvroUtil.java
+++ b/src/java/org/apache/sqoop/avro/AvroUtil.java
@@ -114,7 +114,7 @@ public final class AvroUtil {
    * Format candidate to avro specifics
    */
   public static String toAvroIdentifier(String candidate) {
-    String formattedCandidate = candidate.replaceAll("\\W+", "");
+    String formattedCandidate = candidate.replaceAll("\\W+", "_");
     if (formattedCandidate.substring(0,1).matches("[a-zA-Z_]")) {
       return formattedCandidate;
     } else {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/1dd50cfb/src/test/com/cloudera/sqoop/TestAvroImport.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/TestAvroImport.java b/src/test/com/cloudera/sqoop/TestAvroImport.java
index 00d7a95..b611627 100644
--- a/src/test/com/cloudera/sqoop/TestAvroImport.java
+++ b/src/test/com/cloudera/sqoop/TestAvroImport.java
@@ -85,20 +85,24 @@ public class TestAvroImport extends ImportJobTestCase {
   }
 
   public void testAvroImport() throws IOException {
+    this.setCurTableName("Avro_Import_Test");
     avroImportTestHelper(null, null);
   }
 
   public void testDeflateCompressedAvroImport() throws IOException {
+    this.setCurTableName("Deflate_Compressed_Avro_Import_Test_1");
     avroImportTestHelper(new String[] {"--compression-codec",
       "org.apache.hadoop.io.compress.DefaultCodec", }, "deflate");
   }
 
   public void testDefaultCompressedAvroImport() throws IOException {
+    this.setCurTableName("Deflate_Compressed_Avro_Import_Test_2");
     avroImportTestHelper(new String[] {"--compress", }, "deflate");
   }
 
   public void testUnsupportedCodec() throws IOException {
     try {
+      this.setCurTableName("Deflate_Compressed_Avro_Import_Test_3");
       avroImportTestHelper(new String[] {"--compression-codec", "foobar", },
         null);
       fail("Expected IOException");
@@ -212,6 +216,7 @@ public class TestAvroImport extends ImportJobTestCase {
     String [] names = { "avro\uC3A11" };
     String [] types = { "INT" };
     String [] vals = { "1987" };
+    this.setCurTableName("Non_Std_Character_Test");
     createTableWithColTypesAndNames(names, types, vals);
 
     runImport(getOutputArgv(true, null));
@@ -223,10 +228,10 @@ public class TestAvroImport extends ImportJobTestCase {
     List<Field> fields = schema.getFields();
     assertEquals(types.length, fields.size());
 
-    checkField(fields.get(0), "AVRO1", Type.INT);
+    checkField(fields.get(0), "AVRO_1", Type.INT);
 
     GenericRecord record1 = reader.next();
-    assertEquals("AVRO1", 1987, record1.get("AVRO1"));
+    assertEquals("AVRO_1", 1987, record1.get("AVRO_1"));
   }
 
   public void testNonIdentCharactersInColumnName() throws IOException {
@@ -250,6 +255,33 @@ public class TestAvroImport extends ImportJobTestCase {
     assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O"));
   }
 
+  /*
+   * Test Case For checking multiple columns having non standard characters in multiple columns
+   */
+  public void testNonstandardCharactersInMultipleColumns() throws IOException {
+    String[] names = { "id$1", "id1$" };
+    String[] types = { "INT", "INT" };
+    String[] vals = { "1987", "1988" };
+    this.setCurTableName("Non_Std_Character_Test_For_Multiple_Columns");
+    createTableWithColTypesAndNames(names, types, vals);
+
+    runImport(getOutputArgv(true, null));
+
+    Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
+    DataFileReader<GenericRecord> reader = read(outputFile);
+    Schema schema = reader.getSchema();
+    assertEquals(Schema.Type.RECORD, schema.getType());
+    List<Field> fields = schema.getFields();
+    assertEquals(types.length, fields.size());
+
+    checkField(fields.get(0), "ID_1", Type.INT);
+
+    GenericRecord record1 = reader.next();
+    assertEquals("ID_1", 1987, record1.get("ID_1"));
+    checkField(fields.get(1), "ID1_", Type.INT);
+    assertEquals("ID1_", 1988, record1.get("ID1_"));
+  }
+
   protected void checkField(Field field, String name, Type type) {
     assertEquals(name, field.name());
     assertEquals(Schema.Type.UNION, field.schema().getType());


Mime
View raw message