sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jar...@apache.org
Subject git commit: SQOOP-830: HBase import formatting BigDecimal inconsistently
Date Thu, 31 Jan 2013 17:27:23 GMT
Updated Branches:
  refs/heads/trunk 0488503a3 -> 7c5b46fb2


SQOOP-830: HBase import formatting BigDecimal inconsistently

(David Robson via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/7c5b46fb
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/7c5b46fb
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/7c5b46fb

Branch: refs/heads/trunk
Commit: 7c5b46fb2860e7401e84542a01a61ef415cbe519
Parents: 0488503
Author: Jarek Jarcec Cecho <jarcec@apache.org>
Authored: Thu Jan 31 09:26:42 2013 -0800
Committer: Jarek Jarcec Cecho <jarcec@apache.org>
Committed: Thu Jan 31 09:26:42 2013 -0800

----------------------------------------------------------------------
 src/docs/user/import.txt                           |   34 +++++
 .../org/apache/sqoop/hbase/HBasePutProcessor.java  |    7 +
 .../apache/sqoop/hbase/ToStringPutTransformer.java |   16 ++-
 .../apache/sqoop/mapreduce/AvroImportMapper.java   |   16 ++-
 .../org/apache/sqoop/mapreduce/ImportJobBase.java  |    8 +
 src/java/org/apache/sqoop/orm/ClassWriter.java     |   11 ++
 .../cloudera/sqoop/testutil/BaseSqoopTestCase.java |   54 ++++---
 .../org/apache/sqoop/TestBigDecimalExport.java     |  112 +++++++++++++++
 .../org/apache/sqoop/TestBigDecimalImport.java     |   86 +++++++++++
 9 files changed, 315 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/docs/user/import.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt
index 82e74dd..9bc4fc9 100644
--- a/src/docs/user/import.txt
+++ b/src/docs/user/import.txt
@@ -575,6 +575,40 @@ $ sqoop import --table SomeTable --jar-file mydatatypes.jar \
 
 This command will load the +SomeTableType+ class out of +mydatatypes.jar+.
 
+Additional Import Configuration Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+There are some additional properties which can be configured by modifying
++conf/sqoop-site.xml+. Properties can be specified the same as in Hadoop
+configuration files, for example:
+
+----
+  <property>
+    <name>property.name</name>
+    <value>property.value</value>
+  </property>
+----
+
+They can also be specified on the command line in the generic arguments, for
+example:
+
+----
+sqoop import -D property.name=property.value ...
+----
+
+.Additional import configuration properties:
+[grid="all"]
+`-------------------------------------`----------------------------------------
+Argument                               Description
+-------------------------------------------------------------------------------
++sqoop.bigdecimal.format.string+       Controls how BigDecimal columns will   \
+                                       formatted when stored as a String. A   \
+                                       value of +true+ (default) will use     \
+                                       toPlainString to store them without an \
+                                       exponent component (0.0000001); while  \
+                                       a value of +false+ will use toString   \
+                                       which may include an exponent (1E-7)
+-------------------------------------------------------------------------------
+
 
 Example Invocations
 ~~~~~~~~~~~~~~~~~~~

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
index 64a1d18..cca641f 100644
--- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
+++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.sqoop.mapreduce.ImportJobBase;
 
 import com.cloudera.sqoop.lib.FieldMappable;
 import com.cloudera.sqoop.lib.FieldMapProcessor;
@@ -90,6 +91,12 @@ public class HBasePutProcessor implements Closeable, Configurable,
     this.putTransformer.setColumnFamily(conf.get(COL_FAMILY_KEY, null));
     this.putTransformer.setRowKeyColumn(conf.get(ROW_KEY_COLUMN_KEY, null));
 
+    if (this.putTransformer instanceof ToStringPutTransformer) {
+      ((ToStringPutTransformer) this.putTransformer).bigDecimalFormatString =
+          conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+              ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
+    }
+
     this.tableName = conf.get(TABLE_NAME_KEY, null);
     try {
       this.table = new HTable(conf, this.tableName);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
index 1f52ba9..131fd43 100644
--- a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
+++ b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java
@@ -19,6 +19,7 @@
 package org.apache.sqoop.hbase;
 
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -42,6 +43,7 @@ public class ToStringPutTransformer extends PutTransformer {
   // A mapping from field name -> bytes for that field name.
   // Used to cache serialization work done for fields names.
   private Map<String, byte[]> serializedFieldNames;
+  protected boolean bigDecimalFormatString;
 
   public ToStringPutTransformer() {
     serializedFieldNames = new TreeMap<String, byte[]>();
@@ -81,7 +83,7 @@ public class ToStringPutTransformer extends PutTransformer {
       return null;
     }
 
-    Put put = new Put(Bytes.toBytes(rowKey.toString()));
+    Put put = new Put(Bytes.toBytes(toHBaseString(rowKey)));
 
     for (Map.Entry<String, Object> fieldEntry : fields.entrySet()) {
       String colName = fieldEntry.getKey();
@@ -91,7 +93,7 @@ public class ToStringPutTransformer extends PutTransformer {
         Object val = fieldEntry.getValue();
         if (null != val) {
           put.add(colFamilyBytes, getFieldNameBytes(colName),
-              Bytes.toBytes(val.toString()));
+              Bytes.toBytes(toHBaseString(val)));
         }
       }
     }
@@ -99,4 +101,14 @@ public class ToStringPutTransformer extends PutTransformer {
     return Collections.singletonList(put);
   }
 
+  private String toHBaseString(Object val) {
+    String valString;
+    if (val instanceof BigDecimal && bigDecimalFormatString) {
+      valString = ((BigDecimal) val).toPlainString();
+    } else {
+      valString = val.toString();
+    }
+    return valString;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
index 30db288..289eb28 100644
--- a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java
@@ -30,6 +30,7 @@ import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.mapred.AvroWrapper;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
@@ -51,13 +52,18 @@ public class AvroImportMapper
     new AvroWrapper<GenericRecord>();
   private Schema schema;
   private LargeObjectLoader lobLoader;
+  private boolean bigDecimalFormatString;
 
   @Override
   protected void setup(Context context)
       throws IOException, InterruptedException {
-    schema = AvroJob.getMapOutputSchema(context.getConfiguration());
-    lobLoader = new LargeObjectLoader(context.getConfiguration(),
+    Configuration conf = context.getConfiguration();
+    schema = AvroJob.getMapOutputSchema(conf);
+    lobLoader = new LargeObjectLoader(conf,
         FileOutputFormat.getWorkOutputPath(context));
+    bigDecimalFormatString = conf.getBoolean(
+        ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+        ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
   }
 
   @Override
@@ -99,7 +105,11 @@ public class AvroImportMapper
    */
   private Object toAvro(Object o) {
     if (o instanceof BigDecimal) {
-      return o.toString();
+      if (bigDecimalFormatString) {
+        return ((BigDecimal)o).toPlainString();
+      } else {
+        return o.toString();
+      }
     } else if (o instanceof Date) {
       return ((Date) o).getTime();
     } else if (o instanceof Time) {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
index f6e2e72..f766532 100644
--- a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
+++ b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java
@@ -58,6 +58,14 @@ public class ImportJobBase extends JobBase {
   public static final Log LOG = LogFactory.getLog(
       ImportJobBase.class.getName());
 
+  /** Controls how java.math.BigDecimal values should be converted to Strings
+   *  If set to true (default) then will call toPlainString() method.
+   *  If set to false then will call toString() method.
+   */
+  public static final String PROPERTY_BIGDECIMAL_FORMAT =
+      "sqoop.bigdecimal.format.string";
+  public static final boolean PROPERTY_BIGDECIMAL_FORMAT_DEFAULT = true;
+
   public ImportJobBase() {
     this(null);
   }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/orm/ClassWriter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java
index 126b406..136982c 100644
--- a/src/java/org/apache/sqoop/orm/ClassWriter.java
+++ b/src/java/org/apache/sqoop/orm/ClassWriter.java
@@ -33,6 +33,7 @@ import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.sqoop.mapreduce.ImportJobBase;
 
 import com.cloudera.sqoop.SqoopOptions;
 import com.cloudera.sqoop.manager.ConnManager;
@@ -116,6 +117,7 @@ public class ClassWriter {
   private ConnManager connManager;
   private String tableName;
   private CompilationManager compileManager;
+  private boolean bigDecimalFormatString;
 
   /**
    * Creates a new ClassWriter to generate an ORM class for a table
@@ -131,6 +133,9 @@ public class ClassWriter {
     this.connManager = connMgr;
     this.tableName = table;
     this.compileManager = compMgr;
+    this.bigDecimalFormatString = this.options.getConf().getBoolean(
+        ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
+        ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
   }
 
   /**
@@ -317,6 +322,12 @@ public class ClassWriter {
       String r = colName  + "==null?\"" + this.options.getNullStringValue()
           + "\":" + colName;
       return r;
+    } else if (javaType.equals("java.math.BigDecimal")
+        && this.bigDecimalFormatString) {
+      // Use toPlainString method for BigDecimals if option is set
+      String r = colName  + "==null?\"" + this.options.getNullNonStringValue()
+          + "\":" + colName + ".toPlainString()";
+      return r;
     } else {
       // This is an object type -- just call its toString() in a null-safe way.
       // Also check if it is null, and instead write the null representation

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
index f9370c4..cf41b96 100644
--- a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
+++ b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java
@@ -312,13 +312,9 @@ public abstract class BaseSqoopTestCase extends TestCase {
         for (int i = 0; i < colTypes.length; i++) {
           String colName = BASE_COL_NAME + Integer.toString(i);
           columnDefStr += colName + " " + colTypes[i];
-          columnListStr += colName;
-          valueListStr += vals[i];
           myColNames[i] = colName;
           if (i < colTypes.length - 1) {
             columnDefStr += ", ";
-            columnListStr += ", ";
-            valueListStr += ", ";
           }
         }
 
@@ -344,27 +340,37 @@ public abstract class BaseSqoopTestCase extends TestCase {
         }
       }
 
-      try {
-        String insertValsStr = "INSERT INTO " + getTableName()
-            + "(" + columnListStr + ")"
-            + " VALUES(" + valueListStr + ")";
-        LOG.info("Inserting values: " + insertValsStr);
-        statement = conn.prepareStatement(
-            insertValsStr,
-            ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
-        statement.executeUpdate();
-      } catch (SQLException sqlException) {
-        fail("Could not create table: "
-            + StringUtils.stringifyException(sqlException));
-      } finally {
-        if (null != statement) {
-          try {
-            statement.close();
-          } catch (SQLException se) {
-            // Ignore exception on close.
+      if (vals!=null) {
+        for (int i = 0; i < colTypes.length; i++) {
+          columnListStr += myColNames[i];
+          valueListStr += vals[i];
+          if (i < colTypes.length - 1) {
+            columnListStr += ", ";
+            valueListStr += ", ";
+          }
+        }
+        try {
+          String insertValsStr = "INSERT INTO " + getTableName()
+              + "(" + columnListStr + ")"
+              + " VALUES(" + valueListStr + ")";
+          LOG.info("Inserting values: " + insertValsStr);
+          statement = conn.prepareStatement(
+              insertValsStr,
+              ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+          statement.executeUpdate();
+        } catch (SQLException sqlException) {
+          fail("Could not create table: "
+              + StringUtils.stringifyException(sqlException));
+        } finally {
+          if (null != statement) {
+            try {
+              statement.close();
+            } catch (SQLException se) {
+              // Ignore exception on close.
+            }
+
+            statement = null;
           }
-
-          statement = null;
         }
       }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalExport.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestBigDecimalExport.java b/src/test/org/apache/sqoop/TestBigDecimalExport.java
new file mode 100644
index 0000000..80cdad5
--- /dev/null
+++ b/src/test/org/apache/sqoop/TestBigDecimalExport.java
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.math.BigDecimal;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.cloudera.sqoop.testutil.CommonArgs;
+import com.cloudera.sqoop.testutil.ExportJobTestCase;
+
+/**
+ * Test exporting lines that are created via both options of
+ * sqoop.bigdecimal.format.string parameter.
+ */
+public class TestBigDecimalExport extends ExportJobTestCase {
+
+  private void runBigDecimalExport(String line)
+      throws IOException, SQLException {
+    FileSystem fs = FileSystem.get(getConf());
+    Path tablePath = getTablePath();
+    fs.mkdirs(tablePath);
+    Path filePath = getDataFilePath();
+    DataOutputStream stream = fs.create(filePath);
+    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream));
+    writer.write(line);
+    writer.close();
+    String[] types =
+      { "DECIMAL", "NUMERIC" };
+    createTableWithColTypes(types, null);
+
+    List<String> args = new ArrayList<String>();
+
+    CommonArgs.addHadoopFlags(args);
+
+    args.add("--table");
+    args.add(getTableName());
+    args.add("--export-dir");
+    args.add(tablePath.toString());
+    args.add("--connect");
+    args.add(getConnectString());
+    args.add("-m");
+    args.add("1");
+
+    runExport(args.toArray(new String[args.size()]));
+
+    BigDecimal actual1 = null;
+    BigDecimal actual2 = null;
+
+    Connection conn = getConnection();
+    try {
+      PreparedStatement stmt = conn.prepareStatement("SELECT * FROM "
+          + getTableName());
+      try {
+        ResultSet rs = stmt.executeQuery();
+        try {
+          rs.next();
+          actual1 = rs.getBigDecimal(1);
+          actual2 = rs.getBigDecimal(2);
+        } finally {
+          rs.close();
+        }
+      } finally {
+        stmt.close();
+      }
+    } finally {
+      conn.close();
+    }
+
+    BigDecimal expected1 = new BigDecimal("0.000001");
+    BigDecimal expected2 = new BigDecimal("0.0000001");
+
+    assertEquals(expected1, actual1);
+    assertEquals(expected2, actual2);
+  }
+
+  public void testBigDecimalDefault() throws IOException, SQLException {
+    runBigDecimalExport("0.000001,0.0000001");
+  }
+
+  public void testBigDecimalNoFormat() throws IOException, SQLException {
+    runBigDecimalExport("0.000001,1E-7");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalImport.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/TestBigDecimalImport.java b/src/test/org/apache/sqoop/TestBigDecimalImport.java
new file mode 100644
index 0000000..76e4704
--- /dev/null
+++ b/src/test/org/apache/sqoop/TestBigDecimalImport.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.sqoop;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.cloudera.sqoop.testutil.CommonArgs;
+import com.cloudera.sqoop.testutil.ImportJobTestCase;
+
+/**
+ * Test the sqoop.bigdecimal.format.string parameter default behavior and when
+ * set to false.
+ */
+public class TestBigDecimalImport extends ImportJobTestCase {
+
+  private String runBigDecimalImport(List<String> extraArgs)
+      throws IOException {
+    String[] types =
+      { "DECIMAL", "NUMERIC" };
+    String[] vals = { "0.000001", "0.0000001" };
+    createTableWithColTypes(types, vals);
+    List<String> args = new ArrayList<String>();
+
+    CommonArgs.addHadoopFlags(args);
+
+    if (extraArgs!=null) {
+      args.addAll(extraArgs);
+    }
+    args.add("--table");
+    args.add(getTableName());
+    args.add("--warehouse-dir");
+    args.add(getWarehouseDir());
+    args.add("--connect");
+    args.add(getConnectString());
+    args.add("-m");
+    args.add("1");
+
+    runImport(args.toArray(new String[args.size()]));
+
+    Path outputFile = getDataFilePath();
+    FileSystem fs = FileSystem.get(getConf());
+    DataInputStream stream = fs.open(outputFile);
+    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
+    String line = reader.readLine();
+    reader.close();
+    return line;
+  }
+
+  public void testBigDecimalDefault() throws IOException {
+    String line = runBigDecimalImport(null);
+    assertEquals("0.000001,0.0000001", line);
+  }
+
+  public void testBigDecimalNoFormat() throws IOException {
+    List<String> args = new ArrayList<String>();
+    args.add("-Dsqoop.bigdecimal.format.string=false");
+
+    String line = runBigDecimalImport(args);
+    assertEquals("0.000001,1E-7", line);
+  }
+
+}


Mime
View raw message