sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jar...@apache.org
Subject git commit: SQOOP-693: Intermediate data format support for export
Date Fri, 16 Nov 2012 00:03:50 GMT
Updated Branches:
  refs/heads/sqoop2 ae23cb26d -> adef39bbb


SQOOP-693: Intermediate data format support for export

(Bilung Lee via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/adef39bb
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/adef39bb
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/adef39bb

Branch: refs/heads/sqoop2
Commit: adef39bbb58e70bdcaf028a183d62feaacb2e916
Parents: ae23cb2
Author: Jarek Jarcec Cecho <jarcec@apache.org>
Authored: Thu Nov 15 16:03:17 2012 -0800
Committer: Jarek Jarcec Cecho <jarcec@apache.org>
Committed: Thu Nov 15 16:03:17 2012 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/sqoop/job/io/Data.java    |  153 ++++++++++++++-
 .../java/org/apache/sqoop/job/io/TestData.java     |   59 +++++-
 2 files changed, 203 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/adef39bb/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/Data.java
----------------------------------------------------------------------
diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/Data.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/Data.java
index f6fff0b..41fceb8 100644
--- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/Data.java
+++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/Data.java
@@ -57,10 +57,16 @@ public class Data implements WritableComparable<Data> {
       stringEscape, stringDelimiter
   });
 
+  private int[] fieldTypes = null;
+
   public void setFieldDelimiter(char fieldDelimiter) {
     this.fieldDelimiter = fieldDelimiter;
   }
 
+  public void setFieldTypes(int[] fieldTypes) {
+    this.fieldTypes = fieldTypes;
+  }
+
   public void setContent(Object content, int type) {
     switch (type) {
     case EMPTY_DATA:
@@ -356,7 +362,37 @@ public class Data implements WritableComparable<Data> {
 
     case CSV_RECORD:
       ArrayList<Object> list = new ArrayList<Object>();
-      // todo: need to parse CSV into Array
+      char[] record = ((String)content).toCharArray();
+      int start = 0;
+      int position = start;
+      boolean stringDelimited = false;
+      boolean arrayDelimited = false;
+      int index = 0;
+      while (position < record.length) {
+        if (record[position] == fieldDelimiter) {
+          if (!stringDelimited && !arrayDelimited) {
+            index = parseField(list, record, start, position, index);
+            start = position + 1;
+          }
+        } else if (record[position] == stringDelimiter) {
+          if (!stringDelimited) {
+            stringDelimited = true;
+          }
+          else if (position > 0 && record[position-1] != stringEscape) {
+            stringDelimited = false;
+          }
+        } else if (record[position] == '[') {
+          if (!stringDelimited) {
+            arrayDelimited = true;
+          }
+        } else if (record[position] == ']') {
+          if (!stringDelimited) {
+            arrayDelimited = false;
+          }
+        }
+        position++;
+      }
+      parseField(list, record, start, position, index);
       return list.toArray();
 
     case ARRAY_RECORD:
@@ -367,6 +403,114 @@ public class Data implements WritableComparable<Data> {
     }
   }
 
+  private int parseField(ArrayList<Object> list, char[] record,
+      int start, int end, int index) {
+    String field = String.valueOf(record, start, end-start).trim();
+
+    int fieldType;
+    if (fieldTypes == null) {
+      fieldType = guessType(field);
+    } else {
+      fieldType = fieldTypes[index];
+    }
+
+    switch (fieldType) {
+    case FieldTypes.UTF:
+      if (field.charAt(0) != stringDelimiter ||
+          field.charAt(field.length()-1) != stringDelimiter) {
+        throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0022);
+      }
+      list.add(index, unescape(field.substring(1, field.length()-1)));
+      break;
+
+    case FieldTypes.BIN:
+      if (field.charAt(0) != '[' ||
+          field.charAt(field.length()-1) != ']') {
+        throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0022);
+      }
+      String[] splits =
+          field.substring(1, field.length()-1).split(String.valueOf(','));
+      byte[] bytes = new byte[splits.length];
+      for (int i=0; i<bytes.length; i++) {
+        bytes[i] = Byte.parseByte(splits[i].trim());
+      }
+      list.add(index, bytes);
+      break;
+
+    case FieldTypes.DOUBLE:
+      list.add(index, Double.parseDouble(field));
+      break;
+
+    case FieldTypes.FLOAT:
+      list.add(index, Float.parseFloat(field));
+      break;
+
+    case FieldTypes.LONG:
+      list.add(index, Long.parseLong(field));
+      break;
+
+    case FieldTypes.INT:
+      list.add(index, Integer.parseInt(field));
+      break;
+
+    case FieldTypes.SHORT:
+      list.add(index, Short.parseShort(field));
+      break;
+
+    case FieldTypes.CHAR:
+      list.add(index, Character.valueOf(field.charAt(0)));
+      break;
+
+    case FieldTypes.BYTE:
+      list.add(index, Byte.parseByte(field));
+      break;
+
+    case FieldTypes.BOOLEAN:
+      list.add(index, Boolean.parseBoolean(field));
+      break;
+
+    case FieldTypes.NULL:
+      list.add(index, null);
+      break;
+
+    default:
+      throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0012, String.valueOf(fieldType));
+    }
+
+    return ++index;
+  }
+
+  private int guessType(String field) {
+    char[] value = field.toCharArray();
+
+    if (value[0] == stringDelimiter) {
+      return FieldTypes.UTF;
+    }
+
+    switch (value[0]) {
+    case 'n':
+    case 'N':
+      return FieldTypes.NULL;
+    case '[':
+      return FieldTypes.BIN;
+    case 't':
+    case 'f':
+    case 'T':
+    case 'F':
+      return FieldTypes.BOOLEAN;
+    }
+
+    int position = 1;
+    while (position < value.length) {
+      switch (value[position++]) {
+      case '.':
+        return FieldTypes.DOUBLE;
+      }
+    }
+
+    return FieldTypes.LONG;
+  }
+
   private String escape(String string) {
     // TODO: Also need to escape those special characters as documented in:
     // https://cwiki.apache.org/confluence/display/SQOOP/Sqoop2+Intermediate+representation#Sqoop2Intermediaterepresentation-Intermediateformatrepresentationproposal
@@ -375,4 +519,11 @@ public class Data implements WritableComparable<Data> {
     return string.replaceAll(regex, replacement);
   }
 
+  private String unescape(String string) {
+    // TODO: Also need to unescape those special characters as documented in:
+    // https://cwiki.apache.org/confluence/display/SQOOP/Sqoop2+Intermediate+representation#Sqoop2Intermediaterepresentation-Intermediateformatrepresentationproposal
+    String regex = Matcher.quoteReplacement(escapedStringDelimiter);
+    String replacement = String.valueOf(stringDelimiter);
+    return string.replaceAll(regex, replacement);
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/adef39bb/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestData.java
----------------------------------------------------------------------
diff --git a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestData.java b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestData.java
index ea7ac70..91df426 100644
--- a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestData.java
+++ b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestData.java
@@ -34,13 +34,13 @@ public class TestData extends TestCase {
 
     // with special characters:
     expected =
-        (long) TEST_NUMBER + "," +
-        TEST_NUMBER + "," +
+        Long.valueOf((long)TEST_NUMBER) + "," +
+        Double.valueOf(TEST_NUMBER) + "," +
         "'" + String.valueOf(TEST_NUMBER) + "\\',s'" + "," +
         Arrays.toString(new byte[] {1, 2, 3, 4, 5});
     data.setContent(new Object[] {
-        (long) TEST_NUMBER,
-        TEST_NUMBER,
+        Long.valueOf((long)TEST_NUMBER),
+        Double.valueOf(TEST_NUMBER),
         String.valueOf(TEST_NUMBER) + "',s",
         new byte[] {1, 2, 3, 4, 5} },
         Data.ARRAY_RECORD);
@@ -49,13 +49,13 @@ public class TestData extends TestCase {
 
     // with null characters:
     expected =
-        (long) TEST_NUMBER + "," +
-        TEST_NUMBER + "," +
+        Long.valueOf((long)TEST_NUMBER) + "," +
+        Double.valueOf(TEST_NUMBER) + "," +
         "null" + "," +
         Arrays.toString(new byte[] {1, 2, 3, 4, 5});
     data.setContent(new Object[] {
-        (long) TEST_NUMBER,
-        TEST_NUMBER,
+        Long.valueOf((long)TEST_NUMBER),
+        Double.valueOf(TEST_NUMBER),
         null,
         new byte[] {1, 2, 3, 4, 5} },
         Data.ARRAY_RECORD);
@@ -63,6 +63,49 @@ public class TestData extends TestCase {
     assertEquals(expected, actual);
   }
 
+  @Test
+  public void testCsvToArray() throws Exception {
+    Data data = new Data();
+    Object[] expected;
+    Object[] actual;
+
+    // with special characters:
+    expected = new Object[] {
+        Long.valueOf((long)TEST_NUMBER),
+        Double.valueOf(TEST_NUMBER),
+        String.valueOf(TEST_NUMBER) + "',s",
+        new byte[] {1, 2, 3, 4, 5} };
+    data.setContent(
+        Long.valueOf((long)TEST_NUMBER) + "," +
+        Double.valueOf(TEST_NUMBER) + "," +
+        "'" + String.valueOf(TEST_NUMBER) + "\\',s'" + "," +
+        Arrays.toString(new byte[] {1, 2, 3, 4, 5}),
+        Data.CSV_RECORD);
+    actual = (Object[])data.getContent(Data.ARRAY_RECORD);
+    assertEquals(expected.length, actual.length);
+    for (int c=0; c<expected.length; c++) {
+      assertEquals(expected[c], actual[c]);
+    }
+
+    // with null characters:
+    expected = new Object[] {
+        Long.valueOf((long)TEST_NUMBER),
+        Double.valueOf(TEST_NUMBER),
+        null,
+        new byte[] {1, 2, 3, 4, 5} };
+    data.setContent(
+        Long.valueOf((long)TEST_NUMBER) + "," +
+        Double.valueOf(TEST_NUMBER) + "," +
+        "null" + "," +
+        Arrays.toString(new byte[] {1, 2, 3, 4, 5}),
+        Data.CSV_RECORD);
+    actual = (Object[])data.getContent(Data.ARRAY_RECORD);
+    assertEquals(expected.length, actual.length);
+    for (int c=0; c<expected.length; c++) {
+      assertEquals(expected[c], actual[c]);
+    }
+  }
+
   public static void assertEquals(Object expected, Object actual) {
     if (expected instanceof byte[]) {
       assertEquals(Arrays.toString((byte[])expected),


Mime
View raw message