sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jar...@apache.org
Subject sqoop git commit: SQOOP-2998: Sqoop2: Strings with a ' (single quote) read from hdfs may be improperly escaped
Date Tue, 30 Aug 2016 15:00:18 GMT
Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 f972e2cb9 -> d3955ab57


SQOOP-2998: Sqoop2: Strings with a ' (single quote) read from hdfs may be improperly escaped

(Abraham Fine via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/d3955ab5
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/d3955ab5
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/d3955ab5

Branch: refs/heads/sqoop2
Commit: d3955ab5702cc1d9ce05d7cb7834c7200fa0d48c
Parents: f972e2c
Author: Jarek Jarcec Cecho <jarcec@apache.org>
Authored: Tue Aug 30 07:55:07 2016 -0700
Committer: Jarek Jarcec Cecho <jarcec@apache.org>
Committed: Tue Aug 30 07:55:07 2016 -0700

----------------------------------------------------------------------
 .../apache/sqoop/connector/hdfs/HdfsLoader.java | 28 +++---
 .../hdfs/hdfsWriter/GenericHdfsWriter.java      |  2 +-
 .../hdfs/hdfsWriter/HdfsParquetWriter.java      |  4 +-
 .../hdfs/hdfsWriter/HdfsSequenceWriter.java     | 15 ++-
 .../hdfs/hdfsWriter/HdfsTextWriter.java         | 12 ++-
 .../apache/sqoop/connector/hdfs/TestLoader.java | 23 ++++-
 .../java/org/apache/sqoop/test/data/Cities.java |  1 +
 .../connector/hdfs/AppendModeTest.java          |  7 +-
 .../connector/hdfs/FromHDFSToHDFSTest.java      |  3 +-
 .../connector/hdfs/HdfsIncrementalReadTest.java |  6 +-
 .../connector/hdfs/NullValueTest.java           |  9 +-
 .../connector/hdfs/OutputDirectoryTest.java     |  3 +-
 .../integration/connector/hdfs/ParquetTest.java | 98 ++++++++++++--------
 .../integration/connector/hdfs/S3Test.java      |  4 +-
 .../jdbc/generic/FromHDFSToRDBMSTest.java       |  6 +-
 .../jdbc/generic/FromRDBMSToHDFSTest.java       | 15 ++-
 .../connector/kite/FromRDBMSToKiteTest.java     |  3 +-
 17 files changed, 156 insertions(+), 83 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/HdfsLoader.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/HdfsLoader.java
b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/HdfsLoader.java
index 7cef93c..05c132e 100644
--- a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/HdfsLoader.java
+++ b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/HdfsLoader.java
@@ -92,24 +92,20 @@ public class HdfsLoader extends Loader<LinkConfiguration, ToJobConfiguration>
{
 
           filewriter.initialize(filepath, context.getSchema(), conf, codec);
 
-      if (!HdfsUtils.hasCustomFormat(linkConfiguration, toJobConfig) || (context.getSchema()
instanceof ByteArraySchema)) {
-        String record;
-        while ((record = reader.readTextRecord()) != null) {
-          if (context.getSchema() instanceof ByteArraySchema) {
-            filewriter.write(SqoopIDFUtils.toText(record));
+
+          String nullValue;
+          if (!HdfsUtils.hasCustomFormat(linkConfiguration, toJobConfig) || (context.getSchema()
instanceof ByteArraySchema)) {
+            nullValue = SqoopIDFUtils.DEFAULT_NULL_VALUE;
           } else {
-            filewriter.write(record);
+            nullValue = toJobConfig.toJobConfig.nullValue;
           }
-          rowsWritten++;
-        }
-      } else {
-        Object[] record;
-        while ((record = reader.readArrayRecord()) != null) {
-          filewriter.write(SqoopIDFUtils.toCSV(record, context.getSchema(), toJobConfig.toJobConfig.nullValue));
-          rowsWritten++;
-        }
-      }
-      filewriter.destroy();
+
+          Object[] record;
+          while ((record = reader.readArrayRecord()) != null) {
+            filewriter.write(record, nullValue);
+            rowsWritten++;
+          }
+          filewriter.destroy();
 
         } catch (IOException e) {
           throw new SqoopException(HdfsConnectorError.GENERIC_HDFS_CONNECTOR_0005, e);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/GenericHdfsWriter.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/GenericHdfsWriter.java
b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/GenericHdfsWriter.java
index 31023e7..63c5e0d 100644
--- a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/GenericHdfsWriter.java
+++ b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/GenericHdfsWriter.java
@@ -28,7 +28,7 @@ public abstract class GenericHdfsWriter {
 
   public abstract void initialize(Path filepath, Schema schema, Configuration conf, CompressionCodec
codec) throws IOException;
 
-  public abstract void write(String csv) throws IOException;
+  public abstract void write(Object[] record, String nullValue) throws IOException;
 
   public abstract void destroy() throws IOException;
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsParquetWriter.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsParquetWriter.java
b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsParquetWriter.java
index 4ec813b..a478b8a 100644
--- a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsParquetWriter.java
+++ b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsParquetWriter.java
@@ -55,8 +55,8 @@ public class HdfsParquetWriter extends GenericHdfsWriter {
   }
 
   @Override
-  public void write(String csv) throws IOException {
-    avroParquetWriter.write(avroIntermediateDataFormat.toAVRO(csv));
+  public void write(Object[] record, String nullValue) throws IOException {
+    avroParquetWriter.write(avroIntermediateDataFormat.toAVRO(record));
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsSequenceWriter.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsSequenceWriter.java
b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsSequenceWriter.java
index dcce861..482321a 100644
--- a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsSequenceWriter.java
+++ b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsSequenceWriter.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.sqoop.connector.common.SqoopIDFUtils;
+import org.apache.sqoop.schema.ByteArraySchema;
 import org.apache.sqoop.schema.Schema;
 
 import java.io.IOException;
@@ -31,9 +33,11 @@ public class HdfsSequenceWriter extends GenericHdfsWriter {
 
   private SequenceFile.Writer filewriter;
   private Text text;
+  private Schema schema;
 
   @SuppressWarnings("deprecation")
   public void initialize(Path filepath, Schema schema, Configuration conf, CompressionCodec
codec) throws IOException {
+    this.schema = schema;
     if (codec != null) {
       filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf),
               conf, filepath, Text.class, NullWritable.class,
@@ -47,9 +51,14 @@ public class HdfsSequenceWriter extends GenericHdfsWriter {
   }
 
   @Override
-  public void write(String csv) throws IOException {
-    text.set(csv);
-      filewriter.append(text, NullWritable.get());
+  public void write(Object[] record, String nullValue) throws IOException {
+    if (schema instanceof ByteArraySchema) {
+      text.set(new String(((byte[]) record[0]), SqoopIDFUtils.BYTE_FIELD_CHARSET));
+    } else {
+      text.set(SqoopIDFUtils.toCSV(record, schema, nullValue));
+    }
+
+    filewriter.append(text, NullWritable.get());
   }
 
   public void destroy() throws IOException {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsTextWriter.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsTextWriter.java
b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsTextWriter.java
index 384e330..c1bea3c 100644
--- a/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsTextWriter.java
+++ b/connector/connector-hdfs/src/main/java/org/apache/sqoop/connector/hdfs/hdfsWriter/HdfsTextWriter.java
@@ -22,7 +22,9 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.sqoop.connector.common.SqoopIDFUtils;
 import org.apache.sqoop.connector.hdfs.HdfsConstants;
+import org.apache.sqoop.schema.ByteArraySchema;
 import org.apache.sqoop.schema.Schema;
 
 import java.io.BufferedWriter;
@@ -33,9 +35,11 @@ import java.io.OutputStreamWriter;
 public class HdfsTextWriter extends GenericHdfsWriter {
 
   private BufferedWriter filewriter;
+  private Schema schema;
 
   @Override
   public void initialize(Path filepath, Schema schema, Configuration conf, CompressionCodec
codec) throws IOException {
+    this.schema = schema;
     FileSystem fs = filepath.getFileSystem(conf);
 
     DataOutputStream filestream = fs.create(filepath, false);
@@ -50,8 +54,12 @@ public class HdfsTextWriter extends GenericHdfsWriter {
   }
 
   @Override
-  public void write(String csv) throws IOException {
-    filewriter.write(csv + HdfsConstants.DEFAULT_RECORD_DELIMITER);
+  public void write(Object[] record, String nullValue) throws IOException {
+    if (schema instanceof ByteArraySchema) {
+      filewriter.write(new String(((byte[]) record[0]), SqoopIDFUtils.BYTE_FIELD_CHARSET)
+ HdfsConstants.DEFAULT_RECORD_DELIMITER);
+    } else {
+      filewriter.write(SqoopIDFUtils.toCSV(record, schema, nullValue) + HdfsConstants.DEFAULT_RECORD_DELIMITER);
+    }
 
   }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/connector/connector-hdfs/src/test/java/org/apache/sqoop/connector/hdfs/TestLoader.java
----------------------------------------------------------------------
diff --git a/connector/connector-hdfs/src/test/java/org/apache/sqoop/connector/hdfs/TestLoader.java
b/connector/connector-hdfs/src/test/java/org/apache/sqoop/connector/hdfs/TestLoader.java
index cbd555a..602f4a9 100644
--- a/connector/connector-hdfs/src/test/java/org/apache/sqoop/connector/hdfs/TestLoader.java
+++ b/connector/connector-hdfs/src/test/java/org/apache/sqoop/connector/hdfs/TestLoader.java
@@ -20,6 +20,7 @@ package org.apache.sqoop.connector.hdfs;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -53,6 +54,7 @@ import org.apache.sqoop.schema.type.FloatingPoint;
 import org.apache.sqoop.schema.type.Text;
 import org.apache.sqoop.utils.ClassUtils;
 import org.testng.Assert;
+import org.testng.ITest;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.DataProvider;
@@ -63,7 +65,7 @@ import static org.apache.sqoop.connector.hdfs.configuration.ToFormat.PARQUET_FIL
 import static org.apache.sqoop.connector.hdfs.configuration.ToFormat.SEQUENCE_FILE;
 import static org.apache.sqoop.connector.hdfs.configuration.ToFormat.TEXT_FILE;
 
-public class TestLoader extends TestHdfsBase {
+public class TestLoader extends TestHdfsBase implements ITest {
   private static final String INPUT_ROOT = System.getProperty("maven.build.directory", "/tmp")
+ "/sqoop/warehouse/";
   private static final int NUMBER_OF_ROWS_PER_FILE = 1000;
 
@@ -74,6 +76,8 @@ public class TestLoader extends TestHdfsBase {
   private String user = "test_user";
   private Schema schema;
 
+  private String methodName;
+
   @Factory(dataProvider="test-hdfs-loader")
   public TestLoader(ToFormat outputFormat,
                     ToCompression compression)
@@ -100,14 +104,21 @@ public class TestLoader extends TestHdfsBase {
     return parameters.toArray(new Object[0][]);
   }
 
-  @BeforeMethod(alwaysRun = true)
-  public void setUp() throws Exception {}
+  @BeforeMethod
+  public void findMethodName(Method method) {
+    methodName = method.getName();
+  }
 
   @AfterMethod(alwaysRun = true)
   public void tearDown() throws IOException {
     FileUtils.delete(outputDirectory);
   }
 
+  @Override
+  public String getTestName() {
+    return methodName + "[" + outputFormat.name() + ", " + compression + "]";
+  }
+
   @Test
   public void testLoader() throws Exception {
     FileSystem fs = FileSystem.get(new Configuration());
@@ -123,7 +134,11 @@ public class TestLoader extends TestHdfsBase {
       @Override
       public Object[] readArrayRecord() {
         assertTestUser(user);
-        return null;
+        if (index++ < NUMBER_OF_ROWS_PER_FILE) {
+          return new Object[] {index, (float)index, String.valueOf(index)};
+        } else {
+          return null;
+        }
       }
 
       @Override

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/main/java/org/apache/sqoop/test/data/Cities.java
----------------------------------------------------------------------
diff --git a/test/src/main/java/org/apache/sqoop/test/data/Cities.java b/test/src/main/java/org/apache/sqoop/test/data/Cities.java
index f2c69bb..a3c6e0d 100644
--- a/test/src/main/java/org/apache/sqoop/test/data/Cities.java
+++ b/test/src/main/java/org/apache/sqoop/test/data/Cities.java
@@ -51,6 +51,7 @@ public class Cities extends DataSet {
     provider.insertRow(tableBaseName, 2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"),
"Sunnyvale");
     provider.insertRow(tableBaseName, 3, "Czech Republic", Timestamp.valueOf("2004-10-25
00:00:00.000"), "Brno");
     provider.insertRow(tableBaseName, 4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"),
"Palo Alto");
+    provider.insertRow(tableBaseName, 5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"),
"Martha's Vineyard");
 
     return this;
   }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/AppendModeTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/AppendModeTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/AppendModeTest.java
index 6885525..8ff52dd 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/AppendModeTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/AppendModeTest.java
@@ -66,7 +66,8 @@ public class AppendModeTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
 
     // Second execution
@@ -76,10 +77,12 @@ public class AppendModeTest extends SqoopTestCase {
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
       "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'",
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
 
     dropTable();

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/FromHDFSToHDFSTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/FromHDFSToHDFSTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/FromHDFSToHDFSTest.java
index c6ce1e8..c2803de 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/FromHDFSToHDFSTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/FromHDFSToHDFSTest.java
@@ -43,7 +43,8 @@ public class FromHDFSToHDFSTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     };
 
     createFromFile("input-0001", sampleData);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/HdfsIncrementalReadTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/HdfsIncrementalReadTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/HdfsIncrementalReadTest.java
index 37306e2..592e8cc 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/HdfsIncrementalReadTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/HdfsIncrementalReadTest.java
@@ -88,14 +88,16 @@ public class HdfsIncrementalReadTest extends SqoopTestCase {
 
     // And last execution
     createFromFile("input-0003",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
     executeJob(job);
-    assertEquals(provider.rowCount(getTableName()), 4);
+    assertEquals(provider.rowCount(getTableName()), 5);
     assertRowInCities(1, "USA", Timestamp.valueOf("2004-10-23 00:00:00.000"), "San Francisco");
     assertRowInCities(2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"), "Sunnyvale");
     assertRowInCities(3, "Czech Republic", Timestamp.valueOf("2004-10-25 00:00:00.000"),
"Brno");
     assertRowInCities(4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"), "Palo Alto");
+    assertRowInCities(5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"), "Martha's Vineyard");
   }
 
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/NullValueTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/NullValueTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/NullValueTest.java
index 1e8c688..a196034 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/NullValueTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/NullValueTest.java
@@ -124,7 +124,8 @@ public class NullValueTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000'," + nullValue,
       "3," + nullValue + ",'2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     };
   }
 
@@ -164,7 +165,7 @@ public class NullValueTest extends SqoopTestCase {
             sqoopSchema, conf, null);
 
           for (String line : getCsv()) {
-            parquetWriter.write(line);
+            parquetWriter.write(SqoopIDFUtils.fromCSV(line, sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
           }
 
           parquetWriter.destroy();
@@ -199,11 +200,12 @@ public class NullValueTest extends SqoopTestCase {
 
     executeJob(job);
 
-    Assert.assertEquals(4L, provider.rowCount(getTableName()));
+    Assert.assertEquals(5L, provider.rowCount(getTableName()));
     assertRowInCities(1, "USA", Timestamp.valueOf("2004-10-23 00:00:00.000"), "San Francisco");
     assertRowInCities(2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"), (String) null);
     assertRowInCities(3, (String) null, Timestamp.valueOf("2004-10-25 00:00:00.000"), "Brno");
     assertRowInCities(4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"), "Palo Alto");
+    assertRowInCities(5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"), "Martha's Vineyard");
   }
 
   @Test
@@ -217,6 +219,7 @@ public class NullValueTest extends SqoopTestCase {
     provider.insertRow(getTableName(), 2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"),
(String) null);
     provider.insertRow(getTableName(), 3, (String) null, Timestamp.valueOf("2004-10-25 00:00:00.000"),
"Brno");
     provider.insertRow(getTableName(), 4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"),
"Palo Alto");
+    provider.insertRow(getTableName(), 5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"),
"Martha's Vineyard");
 
     MLink rdbmsLinkFrom = getClient().createLink("generic-jdbc-connector");
     fillRdbmsLinkConfig(rdbmsLinkFrom);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/OutputDirectoryTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/OutputDirectoryTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/OutputDirectoryTest.java
index 330da56..722c126 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/OutputDirectoryTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/OutputDirectoryTest.java
@@ -145,7 +145,8 @@ public class OutputDirectoryTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
 
     dropTable();

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/ParquetTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/ParquetTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/ParquetTest.java
index d55563d..e8f2b1a 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/ParquetTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/ParquetTest.java
@@ -17,14 +17,16 @@
  */
 package org.apache.sqoop.integration.connector.hdfs;
 
-import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Multiset;
+import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.avro.AvroParquetReader;
 import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.sqoop.connector.common.SqoopAvroUtils;
+import org.apache.sqoop.connector.common.SqoopIDFUtils;
 import org.apache.sqoop.connector.hdfs.configuration.ToFormat;
 import org.apache.sqoop.connector.hdfs.hdfsWriter.HdfsParquetWriter;
 import org.apache.sqoop.model.MJob;
@@ -40,20 +42,37 @@ import org.apache.sqoop.test.infrastructure.providers.HadoopInfrastructureProvid
 import org.apache.sqoop.test.infrastructure.providers.KdcInfrastructureProvider;
 import org.apache.sqoop.test.infrastructure.providers.SqoopInfrastructureProvider;
 import org.apache.sqoop.test.utils.HdfsUtils;
+import org.testng.Assert;
 import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.sql.Timestamp;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 
-import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.fail;
 
 @Infrastructure(dependencies = {KdcInfrastructureProvider.class, HadoopInfrastructureProvider.class,
SqoopInfrastructureProvider.class, DatabaseInfrastructureProvider.class})
 public class ParquetTest extends SqoopTestCase {
 
+  private Schema sqoopSchema;
+  private org.apache.avro.Schema avroSchema;
+
+  @BeforeClass
+  public void setUp() {
+    sqoopSchema = new Schema("ParquetTest");
+    sqoopSchema.addColumn(new FixedPoint("id", Long.valueOf(Integer.SIZE / Byte.SIZE), true));
+    sqoopSchema.addColumn(new Text("country"));
+    sqoopSchema.addColumn(new DateTime("some_date", true, false));
+    sqoopSchema.addColumn(new Text("city"));
+
+    avroSchema = SqoopAvroUtils.createAvroSchema(sqoopSchema);
+  }
+
+
   @AfterMethod
   public void dropTable() {
     super.dropTable();
@@ -94,30 +113,48 @@ public class ParquetTest extends SqoopTestCase {
     saveJob(job);
     executeJob(job);
 
-    String[] expectedOutput =
-      {"'1','USA','2004-10-23 00:00:00.000','San Francisco'",
-        "'2','USA','2004-10-24 00:00:00.000','Sunnyvale'",
-        "'3','Czech Republic','2004-10-25 00:00:00.000','Brno'",
-        "'4','USA','2004-10-26 00:00:00.000','Palo Alto'"};
-
-
-    Multiset<String> setLines = HashMultiset.create(Arrays.asList(expectedOutput));
-
-    List<String> notFound = new LinkedList<>();
+    List<GenericData.Record> expectedAvroRecords = new ArrayList<>();
+    expectedAvroRecords.addAll(Arrays.asList(
+            new GenericRecordBuilder(avroSchema)
+                    .set(SqoopAvroUtils.createAvroName("id"), 1)
+                    .set(SqoopAvroUtils.createAvroName("country"), "USA")
+                    .set(SqoopAvroUtils.createAvroName("some_date"), new org.joda.time.DateTime(2004,
10, 23, 0, 0, 0, 0).toDate().getTime())
+                    .set(SqoopAvroUtils.createAvroName("city"), "San Francisco").build(),
+            new GenericRecordBuilder(avroSchema)
+                    .set(SqoopAvroUtils.createAvroName("id"), 2)
+                    .set(SqoopAvroUtils.createAvroName("country"), "USA")
+                    .set(SqoopAvroUtils.createAvroName("some_date"), new org.joda.time.DateTime(2004,
10, 24, 0, 0, 0, 0).toDate().getTime())
+                    .set(SqoopAvroUtils.createAvroName("city"), "Sunnyvale").build(),
+            new GenericRecordBuilder(avroSchema)
+                    .set(SqoopAvroUtils.createAvroName("id"), 3)
+                    .set(SqoopAvroUtils.createAvroName("country"), "Czech Republic")
+                    .set(SqoopAvroUtils.createAvroName("some_date"), new org.joda.time.DateTime(2004,
10, 25, 0, 0, 0, 0).toDate().getTime())
+                    .set(SqoopAvroUtils.createAvroName("city"), "Brno").build(),
+            new GenericRecordBuilder(avroSchema)
+                    .set(SqoopAvroUtils.createAvroName("id"), 4)
+                    .set(SqoopAvroUtils.createAvroName("country"), "USA")
+                    .set(SqoopAvroUtils.createAvroName("some_date"), new org.joda.time.DateTime(2004,
10, 26, 0, 0, 0, 0).toDate().getTime())
+                    .set(SqoopAvroUtils.createAvroName("city"), "Palo Alto").build(),
+            new GenericRecordBuilder(avroSchema)
+                    .set(SqoopAvroUtils.createAvroName("id"), 5)
+                    .set(SqoopAvroUtils.createAvroName("country"), "USA")
+                    .set(SqoopAvroUtils.createAvroName("some_date"), new org.joda.time.DateTime(2004,
10, 27, 0, 0, 0, 0).toDate().getTime())
+                    .set(SqoopAvroUtils.createAvroName("city"), "Martha's Vineyard").build()
+    ));
+    List<GenericRecord> notFound = new LinkedList<>();
 
     Path[] files = HdfsUtils.getOutputMapreduceFiles(hdfsClient, HdfsUtils.joinPathFragments(getMapreduceDirectory(),
"TO"));
     for (Path file : files) {
       ParquetReader<GenericRecord> avroParquetReader = AvroParquetReader.builder(file).build();
       GenericRecord record;
       while ((record = avroParquetReader.read()) != null) {
-        String recordAsLine = recordToLine(record);
-        if (!setLines.remove(recordAsLine)) {
-          notFound.add(recordAsLine);
+        if (!expectedAvroRecords.remove(record)) {
+          notFound.add(record);
         }
       }
     }
 
-    if (!setLines.isEmpty() || !notFound.isEmpty()) {
+    if (!expectedAvroRecords.isEmpty() || !notFound.isEmpty()) {
       fail("Output do not match expectations.");
     }
   }
@@ -126,12 +163,6 @@ public class ParquetTest extends SqoopTestCase {
   public void fromParquetTest() throws Exception {
     createTableCities();
 
-    Schema sqoopSchema = new Schema("cities");
-    sqoopSchema.addColumn(new FixedPoint("id", Long.valueOf(Integer.SIZE), true));
-    sqoopSchema.addColumn(new Text("country"));
-    sqoopSchema.addColumn(new DateTime("some_date", true, false));
-    sqoopSchema.addColumn(new Text("city"));
-
     HdfsParquetWriter parquetWriter = new HdfsParquetWriter();
 
     Configuration conf = new Configuration();
@@ -141,8 +172,8 @@ public class ParquetTest extends SqoopTestCase {
       new Path(HdfsUtils.joinPathFragments(getMapreduceDirectory(), "input-0001.parquet")),
       sqoopSchema, conf, null);
 
-    parquetWriter.write("1,'USA','2004-10-23 00:00:00.000','San Francisco'");
-    parquetWriter.write("2,'USA','2004-10-24 00:00:00.000','Sunnyvale'");
+    parquetWriter.write(SqoopIDFUtils.fromCSV("1,'USA','2004-10-23 00:00:00.000','San Francisco'",
sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
+    parquetWriter.write(SqoopIDFUtils.fromCSV("2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
 
     parquetWriter.destroy();
 
@@ -150,8 +181,9 @@ public class ParquetTest extends SqoopTestCase {
       new Path(HdfsUtils.joinPathFragments(getMapreduceDirectory(), "input-0002.parquet")),
       sqoopSchema, conf, null);
 
-    parquetWriter.write("3,'Czech Republic','2004-10-25 00:00:00.000','Brno'");
-    parquetWriter.write("4,'USA','2004-10-26 00:00:00.000','Palo Alto'");
+    parquetWriter.write(SqoopIDFUtils.fromCSV("3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
+    parquetWriter.write(SqoopIDFUtils.fromCSV("4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
+    parquetWriter.write(SqoopIDFUtils.fromCSV("5,'USA','2004-10-27 00:00:00.000','Martha\\'s
Vineyard'", sqoopSchema), SqoopIDFUtils.DEFAULT_NULL_VALUE);
 
     parquetWriter.destroy();
 
@@ -172,20 +204,12 @@ public class ParquetTest extends SqoopTestCase {
     saveJob(job);
 
     executeJob(job);
-    assertEquals(provider.rowCount(getTableName()), 4);
+    Assert.assertEquals(provider.rowCount(getTableName()), 5);
     assertRowInCities(1, "USA", Timestamp.valueOf("2004-10-23 00:00:00.000"), "San Francisco");
     assertRowInCities(2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"), "Sunnyvale");
     assertRowInCities(3, "Czech Republic", Timestamp.valueOf("2004-10-25 00:00:00.000"),
"Brno");
     assertRowInCities(4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"), "Palo Alto");
-  }
-
-  public String recordToLine(GenericRecord genericRecord) {
-    String line = "";
-    line += "\'" + String.valueOf(genericRecord.get(0)) + "\',";
-    line += "\'" + String.valueOf(genericRecord.get(1)) + "\',";
-    line += "\'" + new Timestamp((Long)genericRecord.get(2)) + "00\',";
-    line += "\'" + String.valueOf(genericRecord.get(3)) + "\'";
-    return line;
+    assertRowInCities(5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"), "Martha's Vineyard");
   }
 
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/S3Test.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/S3Test.java b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/S3Test.java
index c857699..e2e7417 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/S3Test.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/hdfs/S3Test.java
@@ -108,7 +108,8 @@ public class S3Test extends SqoopTestCase {
       "1,'USA','2004-10-23','San Francisco'",
       "2,'USA','2004-10-24','Sunnyvale'",
       "3,'Czech Republic','2004-10-25','Brno'",
-      "4,'USA','2004-10-26','Palo Alto'"
+      "4,'USA','2004-10-26','Palo Alto'",
+      "5,'USA','2004-10-27','Martha\\'s Vineyard'"
     );
 
     // This re-creates the table completely
@@ -129,6 +130,7 @@ public class S3Test extends SqoopTestCase {
     assertRowInCities(2, "USA", "2004-10-24", "Sunnyvale");
     assertRowInCities(3, "Czech Republic", "2004-10-25", "Brno");
     assertRowInCities(4, "USA", "2004-10-26", "Palo Alto");
+    assertRowInCities(4, "USA", "2004-10-27", "Martha's Vineyard");
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromHDFSToRDBMSTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromHDFSToRDBMSTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromHDFSToRDBMSTest.java
index 933bc08..e05cbdf 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromHDFSToRDBMSTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromHDFSToRDBMSTest.java
@@ -52,7 +52,8 @@ public class FromHDFSToRDBMSTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
     // RDBMS link
     MLink rdbmsLink = getClient().createLink("generic-jdbc-connector");
@@ -80,10 +81,11 @@ public class FromHDFSToRDBMSTest extends SqoopTestCase {
 
     executeJob(job);
 
-    assertEquals(4L, provider.rowCount(getTableName()));
+    assertEquals(5L, provider.rowCount(getTableName()));
     assertRowInCities(1, "USA", Timestamp.valueOf("2004-10-23 00:00:00.000"), "San Francisco");
     assertRowInCities(2, "USA", Timestamp.valueOf("2004-10-24 00:00:00.000"), "Sunnyvale");
     assertRowInCities(3, "Czech Republic", Timestamp.valueOf("2004-10-25 00:00:00.000"),
"Brno");
     assertRowInCities(4, "USA", Timestamp.valueOf("2004-10-26 00:00:00.000"), "Palo Alto");
+    assertRowInCities(5, "USA", Timestamp.valueOf("2004-10-27 00:00:00.000"), "Martha's Vineyard");
   }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
index 7e66091..6eb7ed8 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
@@ -73,7 +73,8 @@ public class FromRDBMSToHDFSTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
 
     // Clean up testing table
@@ -157,7 +158,8 @@ public class FromRDBMSToHDFSTest extends SqoopTestCase {
       "1,'USA'",
       "2,'USA'",
       "3,'Czech Republic'",
-      "4,'USA'"
+      "4,'USA'",
+      "5,'USA'"
     );
 
     // Clean up testing table
@@ -197,7 +199,8 @@ public class FromRDBMSToHDFSTest extends SqoopTestCase {
         "1",
         "2",
         "3",
-        "4"
+        "4",
+        "5"
     );
 
     // Clean up testing table
@@ -243,7 +246,8 @@ public class FromRDBMSToHDFSTest extends SqoopTestCase {
         "1,1",
         "2,2",
         "3,3",
-        "4,4"
+        "4,4",
+        "5,5"
     );
 
     // Clean up testing table
@@ -292,7 +296,8 @@ public class FromRDBMSToHDFSTest extends SqoopTestCase {
       "1,'USA','2004-10-23 00:00:00.000','San Francisco'",
       "2,'USA','2004-10-24 00:00:00.000','Sunnyvale'",
       "3,'Czech Republic','2004-10-25 00:00:00.000','Brno'",
-      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'"
+      "4,'USA','2004-10-26 00:00:00.000','Palo Alto'",
+      "5,'USA','2004-10-27 00:00:00.000','Martha\\'s Vineyard'"
     );
 
     // Clean up testing table

http://git-wip-us.apache.org/repos/asf/sqoop/blob/d3955ab5/test/src/test/java/org/apache/sqoop/integration/connector/kite/FromRDBMSToKiteTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/kite/FromRDBMSToKiteTest.java
b/test/src/test/java/org/apache/sqoop/integration/connector/kite/FromRDBMSToKiteTest.java
index 10f3614..1ffbe6f 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/kite/FromRDBMSToKiteTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/kite/FromRDBMSToKiteTest.java
@@ -100,7 +100,8 @@ public class FromRDBMSToKiteTest extends SqoopTestCase {
         "\"1\"",
         "\"2\"",
         "\"3\"",
-        "\"4\""
+        "\"4\"",
+        "\"5\""
     );
   }
 }


Mime
View raw message