sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mau...@apache.org
Subject sqoop git commit: SQOOP-3074: Fix Avro import not to fail with Javac errors in case of non UTF-8 locale
Date Thu, 08 Dec 2016 22:08:09 GMT
Repository: sqoop
Updated Branches:
  refs/heads/trunk 44e05df21 -> 5771a2da5


SQOOP-3074: Fix Avro import not to fail with Javac
errors in case of non UTF-8 locale

(Attila Szabo)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5771a2da
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5771a2da
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5771a2da

Branch: refs/heads/trunk
Commit: 5771a2da5fc071ca8f80f222e8468a29419e845e
Parents: 44e05df
Author: Attila Szabo <maugli@apache.org>
Authored: Thu Dec 8 23:07:31 2016 +0100
Committer: Attila Szabo <maugli@apache.org>
Committed: Thu Dec 8 23:07:31 2016 +0100

----------------------------------------------------------------------
 src/java/org/apache/sqoop/avro/AvroUtil.java           |  6 +++++-
 src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java |  5 ++++-
 src/java/org/apache/sqoop/orm/ClassWriter.java         | 11 ++++++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/5771a2da/src/java/org/apache/sqoop/avro/AvroUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java
index ee29f14..8d90130 100644
--- a/src/java/org/apache/sqoop/avro/AvroUtil.java
+++ b/src/java/org/apache/sqoop/avro/AvroUtil.java
@@ -28,6 +28,7 @@ import org.apache.avro.generic.GenericFixed;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.mapred.FsInput;
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -106,7 +107,10 @@ public final class AvroUtil {
    * Convert Column name into Avro column name.
    */
   public static String toAvroColumn(String column) {
-    String candidate = ClassWriter.toJavaIdentifier(column);
+    // We're unescaping identifiers to get the real Unicode characters
+    // back, and not the escaped versions.
+    String candidate = StringEscapeUtils.unescapeJava(
+        ClassWriter.toJavaIdentifier(column));
     return toAvroIdentifier(candidate);
   }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5771a2da/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
index 3c31c43..5b1c745 100644
--- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
+++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
@@ -29,6 +29,7 @@ import org.apache.avro.LogicalType;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.Schema.Type;
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -88,7 +89,9 @@ public class AvroSchemaGenerator {
 
     List<Field> fields = new ArrayList<Field>();
     for (String columnName : columnNames) {
-      String cleanedCol = AvroUtil.toAvroIdentifier(ClassWriter.toJavaIdentifier(columnName));
+      // We're unescaping identifiers to get the real Unicode characters
+      // back, and not the escaped versions.
+      String cleanedCol = AvroUtil.toAvroIdentifier(StringEscapeUtils.unescapeJava(ClassWriter.toJavaIdentifier(columnName)));
       List<Integer> columnInfoList = columnInfo.get(columnName);
       int sqlType = columnInfoList.get(0);
       Integer precision = columnInfoList.get(1);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5771a2da/src/java/org/apache/sqoop/orm/ClassWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 6f6e66b..0c8d86d 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -284,7 +284,16 @@ public class ClassWriter {
       return "_" + output;
     }
 
-    return output;
+    // Calling StringEscapeUtils#escapeJava is required because we'd like to
+    // support Unicode characters in identifiers even if the locale of the host
+    // system is not supporting UTF-8, or by any reason the locale is different
+    // from that. Good example: if a column name would contain a \uC3A1 char
+    // in it's name, though the locale would not support Unicode characters
+    // then the generated java file would contain unrecognizable characters
+    // for the compiler, and javac would fail with a compile error. If the name
+    // of the column would be Alm\uC3A1a then it would be Alm\uC3A1a after the
+    // escaping, and this every places where it's used/
+    return StringEscapeUtils.escapeJava(output);
   }
 
   private String toJavaType(String columnName, int sqlType) {


Mime
View raw message