sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cheol...@apache.org
Subject git commit: SQOOP-654: PostgreSQL direct connector is ignoring --null(-input)string and --null(-input)-non-string arguments
Date Wed, 06 Feb 2013 22:19:01 GMT
Updated Branches:
  refs/heads/trunk 666d5499a -> a220ae469


SQOOP-654: PostgreSQL direct connector is ignoring --null(-input)string and
--null(-input)-non-string arguments

(Jarek Jarcec Cecho via Cheolsoo Park)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/a220ae46
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/a220ae46
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/a220ae46

Branch: refs/heads/trunk
Commit: a220ae46948bd388da728b3816d817b3cae31ce4
Parents: 666d549
Author: Cheolsoo Park <cheolsoo@apache.org>
Authored: Wed Feb 6 14:18:23 2013 -0800
Committer: Cheolsoo Park <cheolsoo@apache.org>
Committed: Wed Feb 6 14:18:23 2013 -0800

----------------------------------------------------------------------
 .../sqoop/manager/DirectPostgresqlManager.java     |   20 ++++-
 .../org/apache/sqoop/util/SubstitutionUtils.java   |   74 +++++++++++++++
 .../sqoop/manager/PostgresqlImportTest.java        |   31 +++++-
 .../cloudera/sqoop/util/TestSubstitutionUtils.java |   38 ++++++++
 4 files changed, 158 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java b/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java
index dba732c..a05bf60 100644
--- a/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java
+++ b/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java
@@ -30,6 +30,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sqoop.util.PostgreSQLUtils;
@@ -45,6 +46,7 @@ import com.cloudera.sqoop.util.ImportException;
 import com.cloudera.sqoop.util.JdbcUrl;
 import com.cloudera.sqoop.util.LoggingAsyncSink;
 import com.cloudera.sqoop.util.PerfCounters;
+import org.apache.sqoop.util.SubstitutionUtils;
 
 /**
  * Manages direct dumps from Postgresql databases via psql COPY TO STDOUT
@@ -257,6 +259,14 @@ public class DirectPostgresqlManager
     sb.append(" TO STDOUT WITH DELIMITER E'\\");
     sb.append(Integer.toString((int) this.options.getOutputFieldDelim(), 8));
     sb.append("' CSV ");
+
+    if (this.options.getNullStringValue() != null) {
+      sb.append("NULL AS E'");
+      sb.append(SubstitutionUtils.removeEscapeCharacters(
+        this.options.getNullStringValue()));
+      sb.append("' ");
+    }
+
     if (this.options.getOutputEnclosedBy() != '\0') {
       sb.append("QUOTE E'\\");
       sb.append(Integer.toString((int) this.options.getOutputEnclosedBy(), 8));
@@ -277,7 +287,7 @@ public class DirectPostgresqlManager
     sb.append(";");
 
     String copyCmd = sb.toString();
-    LOG.debug("Copy command is " + copyCmd);
+    LOG.info("Copy command is " + copyCmd);
     return copyCmd;
   }
 
@@ -317,6 +327,14 @@ public class DirectPostgresqlManager
       LOG.warn("Postgresql direct import; import will proceed as text files.");
     }
 
+    if (!StringUtils.equals(options.getNullStringValue(),
+      options.getNullNonStringValue())) {
+      throw new ImportException(
+        "Detected different values of --input-string and --input-non-string " +
+        "parameters. PostgreSQL direct manager do not support that. Please " +
+        "either use the same values or omit the --direct parameter.");
+    }
+
     String commandFilename = null;
     String passwordFilename = null;
     Process p = null;

http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/java/org/apache/sqoop/util/SubstitutionUtils.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/util/SubstitutionUtils.java b/src/java/org/apache/sqoop/util/SubstitutionUtils.java
new file mode 100644
index 0000000..c1a468d
--- /dev/null
+++ b/src/java/org/apache/sqoop/util/SubstitutionUtils.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Utility class for null substitution character handling.
+ *
+ */
+public final class SubstitutionUtils {
+
+  // List of items that needs to be de-escaped in order to be consistent with
+  // Sqoop interpretation of the NULL string parameters.
+  private static final Map<String, String> REMOVE_ESCAPE_CHARS;
+
+  static {
+    // Build static map of escape characters that needs to be de-escaped.
+    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6
+    REMOVE_ESCAPE_CHARS = new HashMap<String, String>();
+    REMOVE_ESCAPE_CHARS.put("\\\\b", "\b");
+    REMOVE_ESCAPE_CHARS.put("\\\\t", "\t");
+    REMOVE_ESCAPE_CHARS.put("\\\\n", "\n");
+    REMOVE_ESCAPE_CHARS.put("\\\\f", "\f");
+    REMOVE_ESCAPE_CHARS.put("\\\\'", "'");
+    REMOVE_ESCAPE_CHARS.put("\\\\\"", "\"");
+    REMOVE_ESCAPE_CHARS.put("\\\\\\\\", "\\\\");
+    // TODO(jarcec, optional): Deal with octal escape sequences?
+  }
+
+  /**
+   * De-escape all escape sequences presented in the string.
+   *
+   * Sqoop is historically using --(input)-null-(non-)string parameters directly
+   * in generated code and thus they need to be manually escaped on command
+   * line. However some connectors might need their final form, so that they
+   * can be also used outside generated code.
+   *
+   * @param string String to de-escape
+   * @return String without escape sequences
+   */
+  public static String removeEscapeCharacters(String string) {
+
+    // Our de-escaping is not suporting octal escape sequences
+    if (string.matches("\\\\[0-9]+")) {
+      throw new RuntimeException("Octal escape sequence is not supported");
+    }
+
+    for (Map.Entry<String, String> entry : REMOVE_ESCAPE_CHARS.entrySet()) {
+      string = string.replaceAll(entry.getKey(), entry.getValue());
+    }
+    return string;
+  }
+
+  private SubstitutionUtils() {
+    // This class can't be instantiated
+  }
+}

http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java b/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java
index 512b1d5..ee00c41 100644
--- a/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java
+++ b/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java
@@ -92,6 +92,7 @@ public class PostgresqlImportTest extends ImportJobTestCase {
   static final String DATABASE_USER = "sqooptest";
   static final String DATABASE_NAME = "sqooptest";
   static final String TABLE_NAME = "EMPLOYEES_PG";
+  static final String NULL_TABLE_NAME = "NULL_EMPLOYEES_PG";
   static final String SPECIAL_TABLE_NAME = "EMPLOYEES_PG's";
   static final String DIFFERENT_TABLE_NAME = "DIFFERENT_TABLE";
   static final String SCHEMA_PUBLIC = "public";
@@ -109,14 +110,15 @@ public class PostgresqlImportTest extends ImportJobTestCase {
 
     LOG.debug("Setting up another postgresql test: " + CONNECT_STRING);
 
-    setUpData(TABLE_NAME, SCHEMA_PUBLIC);
-    setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC);
-    setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL);
+    setUpData(TABLE_NAME, SCHEMA_PUBLIC, false);
+    setUpData(NULL_TABLE_NAME, SCHEMA_PUBLIC, true);
+    setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC, false);
+    setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL, false);
 
     LOG.debug("setUp complete.");
   }
 
-  public void setUpData(String tableName, String schema) {
+  public void setUpData(String tableName, String schema, boolean nullEntry) {
     SqoopOptions options = new SqoopOptions(CONNECT_STRING, tableName);
     options.setUsername(DATABASE_USER);
 
@@ -169,6 +171,11 @@ public class PostgresqlImportTest extends ImportJobTestCase {
           + " VALUES(2,'Bob','2009-04-20',400.00,'sales')");
       st.executeUpdate("INSERT INTO " + fullTableName
           + " VALUES(3,'Fred','2009-01-23',15.00,'marketing')");
+      if (nullEntry) {
+        st.executeUpdate("INSERT INTO " + fullTableName
+          + " VALUES(4,'Mike',NULL,NULL,NULL)");
+
+      }
       connection.commit();
     } catch (SQLException sqlE) {
       LOG.error("Encountered SQL Exception: " + sqlE);
@@ -347,4 +354,20 @@ public class PostgresqlImportTest extends ImportJobTestCase {
 
     doImportAndVerify(true, expectedResults, DIFFERENT_TABLE_NAME, extraArgs);
   }
+
+  @Test
+  public void testNullEscapeCharacters() throws Exception {
+     String [] expectedResults = {
+      "2,Bob,2009-04-20,400,sales",
+      "3,Fred,2009-01-23,15,marketing",
+      "4,Mike,\\N,\\N,\\N",
+    };
+
+    String [] extraArgs = {
+      "--null-string", "\\\\\\\\N",
+      "--null-non-string", "\\\\\\\\N",
+    };
+
+    doImportAndVerify(true, expectedResults, NULL_TABLE_NAME, extraArgs);
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java b/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java
new file mode 100644
index 0000000..cd13adc
--- /dev/null
+++ b/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.cloudera.sqoop.util;
+
+import junit.framework.TestCase;
+import org.apache.sqoop.util.SubstitutionUtils;
+
+/**
+ *
+ */
+public class TestSubstitutionUtils extends TestCase {
+
+  public void testRemoveEscapeCharacters() {
+    assertEquals("\\N", SubstitutionUtils.removeEscapeCharacters("\\\\N"));
+    assertEquals("\n", SubstitutionUtils.removeEscapeCharacters("\\n"));
+    assertEquals("\b", SubstitutionUtils.removeEscapeCharacters("\\b"));
+    assertEquals("\t", SubstitutionUtils.removeEscapeCharacters("\\t"));
+    assertEquals("\f", SubstitutionUtils.removeEscapeCharacters("\\f"));
+    assertEquals("\'", SubstitutionUtils.removeEscapeCharacters("\\'"));
+    assertEquals("\"", SubstitutionUtils.removeEscapeCharacters("\\\""));
+    assertEquals("sqoop", SubstitutionUtils.removeEscapeCharacters("sqoop"));
+  }
+}


Mime
View raw message