sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject sqoop git commit: SQOOP-2382: Sqoop2: Arithmetic exception in partitioner when allow null is true
Date Tue, 23 Jun 2015 20:48:18 GMT
Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 87855a376 -> 4a5bd295d


SQOOP-2382: Sqoop2: Arithmetic exception in partitioner when allow null is true

(Banmeet Singh via Abraham Elmahrek)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/4a5bd295
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/4a5bd295
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/4a5bd295

Branch: refs/heads/sqoop2
Commit: 4a5bd295d828eaa37b3401872dff83addf36193f
Parents: 87855a3
Author: Abraham Elmahrek <abe@apache.org>
Authored: Tue Jun 23 13:47:03 2015 -0700
Committer: Abraham Elmahrek <abe@apache.org>
Committed: Tue Jun 23 13:47:33 2015 -0700

----------------------------------------------------------------------
 .../sqoop/job/etl/PartitionerContext.java       | 28 +++++++++++
 .../connector/jdbc/GenericJdbcPartitioner.java  |  7 ++-
 docs/src/site/sphinx/Sqoop5MinutesDemo.rst      |  4 +-
 .../apache/sqoop/job/mr/SqoopInputFormat.java   |  4 +-
 .../jdbc/generic/FromRDBMSToHDFSTest.java       | 50 ++++++++++++++++++++
 5 files changed, 90 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java b/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java
index 67fffd6..bb52bb2 100644
--- a/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java
+++ b/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java
@@ -35,6 +35,8 @@ public class PartitionerContext extends TransferableContext {
 
   private Schema schema;
 
+  private boolean skipMaxPartitionCheck = false;
+
   public PartitionerContext(ImmutableContext context, long maxPartitions, Schema schema)
{
     super(context);
     this.maxPartitions = maxPartitions;
@@ -54,6 +56,32 @@ public class PartitionerContext extends TransferableContext {
   }
 
   /**
+   * Set flag indicating whether to skip check that number of splits
+   * < max extractors specified by user.
+   *
+   * Needed in case user specifies number of extractors as 1 as well as
+   * allows null values in partitioning column
+   *
+   * @return
+   */
+  public void setSkipMaxPartitionCheck(boolean skipMaxPartitionCheck) {
+    this.skipMaxPartitionCheck = skipMaxPartitionCheck;
+  }
+
+  /**
+   * Return flag indicating whether to skip the check that number of splits
+   * < max extractors specified by user.
+   *
+   * Needed in case user specifies number of extractors as 1 as well as
+   * allows null values in partitioning column
+   *
+   * @return
+   */
+  public boolean getSkipMaxPartitionCheck() {
+    return this.skipMaxPartitionCheck;
+  }
+
+  /**
    * Return schema associated with this step.
    *
    * @return

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java
----------------------------------------------------------------------
diff --git a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java
b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java
index 23b57c0..a99b3a9 100644
--- a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java
+++ b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java
@@ -74,7 +74,12 @@ public class GenericJdbcPartitioner extends Partitioner<LinkConfiguration,
FromJ
       GenericJdbcPartition partition = new GenericJdbcPartition();
       partition.setConditions(partitionColumnName + " IS NULL");
       partitions.add(partition);
-      numberPartitions -= 1;
+      if (numberPartitions > 1) {
+        numberPartitions -= 1;
+      }
+      else {
+        context.setSkipMaxPartitionCheck(true);
+      }
     }
 
     switch (partitionColumnType) {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/docs/src/site/sphinx/Sqoop5MinutesDemo.rst
----------------------------------------------------------------------
diff --git a/docs/src/site/sphinx/Sqoop5MinutesDemo.rst b/docs/src/site/sphinx/Sqoop5MinutesDemo.rst
index fc9a43b..19115a2 100644
--- a/docs/src/site/sphinx/Sqoop5MinutesDemo.rst
+++ b/docs/src/site/sphinx/Sqoop5MinutesDemo.rst
@@ -184,7 +184,9 @@ Next, we can use the two link Ids to associate the ``From`` and ``To``
for the j
     Loaders:(Optional) 2
     New job was successfully created with validation status OK  and persistent id 1
 
-Our new job object was created with assigned id 1.
+Our new job object was created with assigned id 1. Note that if null value is allowed for
the partition column,
+at least 2 extractors are needed for Sqoop to carry out the data transfer. On specifying
1 extractor in this
+scenario, Sqoop shall ignore this setting and continue with 2 extractors.
 
 Start Job ( a.k.a Data transfer )
 =================================

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java
----------------------------------------------------------------------
diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java
b/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java
index 732ee0a..5aef878 100644
--- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java
+++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java
@@ -80,7 +80,9 @@ public class SqoopInputFormat extends InputFormat<SqoopSplit, NullWritable>
{
       splits.add(split);
     }
 
-    if(splits.size() > maxPartitions) {
+    //SQOOP-2382: Need to skip this check in case extractors is set to 1
+    // and null values are allowed in partitioning column
+    if(splits.size() > maxPartitions && (false == partitionerContext.getSkipMaxPartitionCheck()))
{
       throw new SqoopException(MRExecutionError.MAPRED_EXEC_0025,
         String.format("Got %d, max was %d", splits.size(), maxPartitions));
     }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
index 52d62a6..1272ed9 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
@@ -270,4 +270,54 @@ public class FromRDBMSToHDFSTest extends ConnectorTestCase {
     // Clean up testing table
     dropTable();
   }
+
+  @Test
+  public void testAllowNullsWithOneExtractor() throws Exception {
+    //Integration test case for SQOOP-2382
+    //Server must not throw an exception when null values are allowed in the
+    //partitioning column and number of extractors is set to only 1
+
+    createAndLoadTableCities();
+
+    // RDBMS link
+    MLink rdbmsConnection = getClient().createLink("generic-jdbc-connector");
+    fillRdbmsLinkConfig(rdbmsConnection);
+    saveLink(rdbmsConnection);
+
+    // HDFS link
+    MLink hdfsConnection = getClient().createLink("hdfs-connector");
+    fillHdfsLink(hdfsConnection);
+    saveLink(hdfsConnection);
+
+    // Job creation
+    MJob job = getClient().createJob(rdbmsConnection.getPersistenceId(), hdfsConnection.getPersistenceId());
+
+    // Set rdbms "FROM" config
+    fillRdbmsFromConfig(job, "id");
+
+    MConfigList configs = job.getFromJobConfig();
+    configs.getBooleanInput("fromJobConfig.allowNullValueInPartitionColumn").setValue(true);
+
+    // fill the hdfs "TO" config
+    fillHdfsToConfig(job, ToFormat.TEXT_FILE);
+    // driver config
+    MDriverConfig driverConfig = job.getDriverConfig();
+    driverConfig.getIntegerInput("throttlingConfig.numExtractors").setValue(1);
+
+    saveJob(job);
+
+    executeJob(job);
+
+    // Assert correct output
+    assertTo(
+            "1,'USA','2004-10-23','San Francisco'",
+            "2,'USA','2004-10-24','Sunnyvale'",
+            "3,'Czech Republic','2004-10-25','Brno'",
+            "4,'USA','2004-10-26','Palo Alto'"
+    );
+
+    // Clean up testing table
+    dropTable();
+  }
+
 }


Mime
View raw message