sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject svn commit: r1190441 [3/3] - in /incubator/sqoop/trunk/src: java/com/cloudera/sqoop/mapreduce/db/ java/org/apache/sqoop/mapreduce/ java/org/apache/sqoop/mapreduce/db/ test/com/cloudera/sqoop/mapreduce/db/ test/org/apache/sqoop/ test/org/apache/sqoop/ma...
Date Fri, 28 Oct 2011 16:50:41 GMT
Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+
+import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
+import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
+import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+
+/**
+ * A RecordReader that reads records from a SQL table,
+ * using data-driven WHERE clause splits.
+ * Emits LongWritables containing the record number as
+ * key and DBWritables as value.
+ */
+public class DataDrivenDBRecordReader<T extends DBWritable>
+    extends DBRecordReader<T> {
+
+  private static final Log LOG =
+      LogFactory.getLog(DataDrivenDBRecordReader.class);
+
+  private String dbProductName; // database manufacturer string.
+
+  // CHECKSTYLE:OFF
+  // TODO(aaron): Refactor constructor to use fewer arguments.
+  /**
+   * @param split The InputSplit to read data for
+   * @throws SQLException
+   */
+  public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
+      Class<T> inputClass, Configuration conf, Connection conn,
+      DBConfiguration dbConfig, String cond, String [] fields, String table,
+      String dbProduct) throws SQLException {
+    super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
+    this.dbProductName = dbProduct;
+  }
+  // CHECKSTYLE:ON
+
+  @Override
+  /** {@inheritDoc} */
+  public float getProgress() throws IOException {
+    return isDone() ? 1.0f : 0.0f;
+  }
+
+  /** Returns the query for selecting the records,
+   * subclasses can override this for custom behaviour.*/
+  protected String getSelectQuery() {
+    StringBuilder query = new StringBuilder();
+    DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit =
+        (DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit();
+    DBConfiguration dbConf = getDBConf();
+    String [] fieldNames = getFieldNames();
+    String tableName = getTableName();
+    String conditions = getConditions();
+
+    // Build the WHERE clauses associated with the data split first.
+    // We need them in both branches of this function.
+    StringBuilder conditionClauses = new StringBuilder();
+    conditionClauses.append("( ").append(dataSplit.getLowerClause());
+    conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause());
+    conditionClauses.append(" )");
+
+    if(dbConf.getInputQuery() == null) {
+      // We need to generate the entire query.
+      query.append("SELECT ");
+
+      for (int i = 0; i < fieldNames.length; i++) {
+        query.append(fieldNames[i]);
+        if (i != fieldNames.length -1) {
+          query.append(", ");
+        }
+      }
+
+      query.append(" FROM ").append(tableName);
+      if (!dbProductName.startsWith("ORACLE")) {
+        // Seems to be necessary for hsqldb? Oracle explicitly does *not*
+        // use this clause.
+        query.append(" AS ").append(tableName);
+      }
+      query.append(" WHERE ");
+      if (conditions != null && conditions.length() > 0) {
+        // Put the user's conditions first.
+        query.append("( ").append(conditions).append(" ) AND ");
+      }
+
+      // Now append the conditions associated with our split.
+      query.append(conditionClauses.toString());
+
+    } else {
+      // User provided the query. We replace the special token with
+      // our WHERE clause.
+      String inputQuery = dbConf.getInputQuery();
+      if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
+        LOG.error("Could not find the clause substitution token "
+            + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: ["
+            + inputQuery + "]. Parallel splits may not work correctly.");
+      }
+
+      query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
+          conditionClauses.toString()));
+    }
+
+    LOG.debug("Using query: " + query.toString());
+
+    return query.toString();
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+import com.cloudera.sqoop.config.ConfigurationHelper;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
+
+/**
+ * Implement DBSplitter over date/time values.
+ * Make use of logic from IntegerSplitter, since date/time are just longs
+ * in Java.
+ */
+public class DateSplitter extends IntegerSplitter {
+
+  private static final Log LOG = LogFactory.getLog(DateSplitter.class);
+
+  public List<InputSplit> split(Configuration conf, ResultSet results,
+      String colName) throws SQLException {
+
+    long minVal;
+    long maxVal;
+
+    int sqlDataType = results.getMetaData().getColumnType(1);
+    minVal = resultSetColToLong(results, 1, sqlDataType);
+    maxVal = resultSetColToLong(results, 2, sqlDataType);
+
+    String lowClausePrefix = colName + " >= ";
+    String highClausePrefix = colName + " < ";
+
+    int numSplits = ConfigurationHelper.getConfNumMaps(conf);
+    if (numSplits < 1) {
+      numSplits = 1;
+    }
+
+    if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) {
+      // The range of acceptable dates is NULL to NULL. Just create a single
+      // split.
+      List<InputSplit> splits = new ArrayList<InputSplit>();
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+      return splits;
+    }
+
+    // Gather the split point integers
+    List<Long> splitPoints = split(numSplits, minVal, maxVal);
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+
+    // Turn the split points into a set of intervals.
+    long start = splitPoints.get(0);
+    Date startDate = longToDate(start, sqlDataType);
+    if (sqlDataType == Types.TIMESTAMP) {
+      // The lower bound's nanos value needs to match the actual lower-bound
+      // nanos.
+      try {
+        ((java.sql.Timestamp) startDate).setNanos(
+            results.getTimestamp(1).getNanos());
+      } catch (NullPointerException npe) {
+        // If the lower bound was NULL, we'll get an NPE; just ignore it and
+        // don't set nanos.
+      }
+    }
+
+    for (int i = 1; i < splitPoints.size(); i++) {
+      long end = splitPoints.get(i);
+      Date endDate = longToDate(end, sqlDataType);
+
+      if (i == splitPoints.size() - 1) {
+        if (sqlDataType == Types.TIMESTAMP) {
+          // The upper bound's nanos value needs to match the actual
+          // upper-bound nanos.
+          try {
+            ((java.sql.Timestamp) endDate).setNanos(
+                results.getTimestamp(2).getNanos());
+          } catch (NullPointerException npe) {
+            // If the upper bound was NULL, we'll get an NPE; just ignore it
+            // and don't set nanos.
+          }
+        }
+        // This is the last one; use a closed interval.
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            lowClausePrefix + dateToString(startDate),
+            colName + " <= " + dateToString(endDate)));
+      } else {
+        // Normal open-interval case.
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            lowClausePrefix + dateToString(startDate),
+            highClausePrefix + dateToString(endDate)));
+      }
+
+      start = end;
+      startDate = endDate;
+    }
+
+    if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) {
+      // Add an extra split to handle the null case that we saw.
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+    }
+
+    return splits;
+  }
+
+  /**
+      Retrieve the value from the column in a type-appropriate manner and
+      return its timestamp since the epoch. If the column is null, then return
+      Long.MIN_VALUE.  This will cause a special split to be generated for the
+      NULL case, but may also cause poorly-balanced splits if most of the
+      actual dates are positive time since the epoch, etc.
+    */
+  private long resultSetColToLong(ResultSet rs, int colNum, int sqlDataType)
+      throws SQLException {
+    try {
+      switch (sqlDataType) {
+      case Types.DATE:
+        return rs.getDate(colNum).getTime();
+      case Types.TIME:
+        return rs.getTime(colNum).getTime();
+      case Types.TIMESTAMP:
+        return rs.getTimestamp(colNum).getTime();
+      default:
+        throw new SQLException("Not a date-type field");
+      }
+    } catch (NullPointerException npe) {
+      // null column. return minimum long value.
+      LOG.warn("Encountered a NULL date in the split column. "
+          + "Splits may be poorly balanced.");
+      return Long.MIN_VALUE;
+    }
+  }
+
+  /**  Parse the long-valued timestamp into the appropriate SQL date type. */
+  private Date longToDate(long val, int sqlDataType) {
+    switch (sqlDataType) {
+    case Types.DATE:
+      return new java.sql.Date(val);
+    case Types.TIME:
+      return new java.sql.Time(val);
+    case Types.TIMESTAMP:
+      return new java.sql.Timestamp(val);
+    default: // Shouldn't ever hit this case.
+      return null;
+    }
+  }
+
+  /**
+   * Given a Date 'd', format it as a string for use in a SQL date
+   * comparison operation.
+   * @param d the date to format.
+   * @return the string representing this date in SQL with any appropriate
+   * quotation characters, etc.
+   */
+  protected String dateToString(Date d) {
+    return "'" + d.toString() + "'";
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+import com.cloudera.sqoop.config.ConfigurationHelper;
+import com.cloudera.sqoop.mapreduce.db.DBSplitter;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+
+public class FloatSplitter implements DBSplitter  {
+
+  private static final Log LOG = LogFactory.getLog(FloatSplitter.class);
+
+  private static final double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
+
+  public List<InputSplit> split(Configuration conf, ResultSet results,
+      String colName) throws SQLException {
+
+    LOG.warn("Generating splits for a floating-point index column. Due to the");
+    LOG.warn("imprecise representation of floating-point values in Java, this");
+    LOG.warn("may result in an incomplete import.");
+    LOG.warn("You are strongly encouraged to choose an integral split column.");
+
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+
+    if (results.getString(1) == null && results.getString(2) == null) {
+      // Range is null to null. Return a null split accordingly.
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+      return splits;
+    }
+
+    double minVal = results.getDouble(1);
+    double maxVal = results.getDouble(2);
+
+    // Use this as a hint. May need an extra task if the size doesn't
+    // divide cleanly.
+    int numSplits = ConfigurationHelper.getConfNumMaps(conf);
+    double splitSize = (maxVal - minVal) / (double) numSplits;
+
+    if (splitSize < MIN_INCREMENT) {
+      splitSize = MIN_INCREMENT;
+    }
+
+    String lowClausePrefix = colName + " >= ";
+    String highClausePrefix = colName + " < ";
+
+    double curLower = minVal;
+    double curUpper = curLower + splitSize;
+
+    while (curUpper < maxVal) {
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          lowClausePrefix + Double.toString(curLower),
+          highClausePrefix + Double.toString(curUpper)));
+
+      curLower = curUpper;
+      curUpper += splitSize;
+    }
+
+    // Catch any overage and create the closed interval for the last split.
+    if (curLower <= maxVal || splits.size() == 1) {
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          lowClausePrefix + Double.toString(curUpper),
+          colName + " <= " + Double.toString(maxVal)));
+    }
+
+    if (results.getString(1) == null || results.getString(2) == null) {
+      // At least one extrema is null; add a null split.
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+    }
+
+    return splits;
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+import com.cloudera.sqoop.config.ConfigurationHelper;
+import com.cloudera.sqoop.mapreduce.db.DBSplitter;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+
+/**
+ * Implement DBSplitter over integer values.
+ */
+public class IntegerSplitter implements DBSplitter  {
+  public static final Log LOG =
+      LogFactory.getLog(IntegerSplitter.class.getName());
+
+    public List<InputSplit> split(Configuration conf, ResultSet results,
+        String colName) throws SQLException {
+
+      long minVal = results.getLong(1);
+      long maxVal = results.getLong(2);
+
+      String lowClausePrefix = colName + " >= ";
+      String highClausePrefix = colName + " < ";
+
+      int numSplits = ConfigurationHelper.getConfNumMaps(conf);
+      if (numSplits < 1) {
+        numSplits = 1;
+      }
+
+      if (results.getString(1) == null && results.getString(2) == null) {
+        // Range is null to null. Return a null split accordingly.
+        List<InputSplit> splits = new ArrayList<InputSplit>();
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            colName + " IS NULL", colName + " IS NULL"));
+        return splits;
+      }
+
+      // Get all the split points together.
+      List<Long> splitPoints = split(numSplits, minVal, maxVal);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("Splits: [%,28d to %,28d] into %d parts",
+            minVal, maxVal, numSplits));
+        for (int i = 0; i < splitPoints.size(); i++) {
+          LOG.debug(String.format("%,28d", splitPoints.get(i)));
+        }
+      }
+      List<InputSplit> splits = new ArrayList<InputSplit>();
+
+      // Turn the split points into a set of intervals.
+      long start = splitPoints.get(0);
+      for (int i = 1; i < splitPoints.size(); i++) {
+        long end = splitPoints.get(i);
+
+        if (i == splitPoints.size() - 1) {
+          // This is the last one; use a closed interval.
+          splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+              lowClausePrefix + Long.toString(start),
+              colName + " <= " + Long.toString(end)));
+        } else {
+          // Normal open-interval case.
+          splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+              lowClausePrefix + Long.toString(start),
+              highClausePrefix + Long.toString(end)));
+        }
+
+        start = end;
+      }
+
+      if (results.getString(1) == null || results.getString(2) == null) {
+        // At least one extrema is null; add a null split.
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            colName + " IS NULL", colName + " IS NULL"));
+      }
+
+      return splits;
+    }
+
+    /**
+     * Returns a list of longs one element longer than the list of input splits.
+     * This represents the boundaries between input splits.
+     * All splits are open on the top end, except the last one.
+     *
+     * So the list [0, 5, 8, 12, 18] would represent splits capturing the
+     * intervals:
+     *
+     * [0, 5)
+     * [5, 8)
+     * [8, 12)
+     * [12, 18] note the closed interval for the last split.
+     */
+    protected List<Long> split(long numSplits, long minVal, long maxVal)
+        throws SQLException {
+
+      List<Long> splits = new ArrayList<Long>();
+
+      // We take the min-max interval and divide by the numSplits and also
+      // calculate a remainder.  Because of integer division rules, numsplits *
+      // splitSize + minVal will always be <= maxVal.  We then use the remainder
+      // and add 1 if the current split index is less than the < the remainder.
+      // This is guaranteed to add up to remainder and not surpass the value.
+      long splitSize = (maxVal - minVal) / numSplits;
+      long remainder = (maxVal - minVal) % numSplits;
+      long curVal = minVal;
+
+      // This will honor numSplits as long as split size > 0.  If split size is
+      // 0, it will have remainder splits.
+      for (int i = 0; i <= numSplits; i++) {
+        splits.add(curVal);
+        if (curVal >= maxVal) {
+          break;
+        }
+        curVal += splitSize;
+        curVal += (i < remainder) ? 1 : 0;
+      }
+
+      if (splits.size() == 1) {
+        // make a valid singleton split
+        splits.add(maxVal);
+      }
+
+      return splits;
+    }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+
+import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
+import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
+import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
+
+/**
+ * A RecordReader that reads records from an Oracle SQL table.
+ */
+public class OracleDBRecordReader<T extends DBWritable>
+extends DBRecordReader<T>  {
+
+  /** Configuration key to set to a timezone string. */
+  public static final String SESSION_TIMEZONE_KEY = "oracle.sessionTimeZone";
+
+  private static final Log LOG = LogFactory.getLog(OracleDBRecordReader.class);
+
+  // CHECKSTYLE:OFF
+  public OracleDBRecordReader(DBInputFormat.DBInputSplit split,
+      Class<T> inputClass, Configuration conf, Connection conn,
+      DBConfiguration dbConfig, String cond, String [] fields,
+      String table) throws SQLException {
+    super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
+    setSessionTimeZone(conf, conn);
+  }
+  // CHECKSTYLE:ON
+
+
+  /** Returns the query for selecting the records from an Oracle DB. */
+  protected String getSelectQuery() {
+    StringBuilder query = new StringBuilder();
+    DBConfiguration dbConf = getDBConf();
+    String conditions = getConditions();
+    String tableName = getTableName();
+    String [] fieldNames = getFieldNames();
+
+    // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET.
+    if(dbConf.getInputQuery() == null) {
+      query.append("SELECT ");
+
+      for (int i = 0; i < fieldNames.length; i++) {
+        query.append(fieldNames[i]);
+        if (i != fieldNames.length -1) {
+          query.append(", ");
+        }
+      }
+
+      query.append(" FROM ").append(tableName);
+      if (conditions != null && conditions.length() > 0) {
+        query.append(" WHERE ").append(conditions);
+      }
+      String orderBy = dbConf.getInputOrderBy();
+      if (orderBy != null && orderBy.length() > 0) {
+        query.append(" ORDER BY ").append(orderBy);
+      }
+    } else {
+      //PREBUILT QUERY
+      query.append(dbConf.getInputQuery());
+    }
+
+    try {
+      DBInputFormat.DBInputSplit split = getSplit();
+      if (split.getLength() > 0 && split.getStart() > 0) {
+        String querystring = query.toString();
+
+        query = new StringBuilder();
+        query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
+        query.append(querystring);
+        query.append(" ) a WHERE rownum <= ").append(split.getStart());
+        query.append(" + ").append(split.getLength());
+        query.append(" ) WHERE dbif_rno >= ").append(split.getStart());
+      }
+    } catch (IOException ex) {
+      // ignore, will not throw.
+    }
+
+    return query.toString();
+  }
+
+  /**
+   * Set session time zone.
+   * @param conf The current configuration.
+   * We read the 'oracle.sessionTimeZone' property from here.
+   * @param conn The connection to alter the timezone properties of.
+   */
+  public static void setSessionTimeZone(Configuration conf,
+      Connection conn) throws SQLException {
+    // need to use reflection to call the method setSessionTimeZone on
+    // the OracleConnection class because oracle specific java libraries are
+    // not accessible in this context.
+    Method method;
+    try {
+      method = conn.getClass().getMethod(
+              "setSessionTimeZone", new Class [] {String.class});
+    } catch (Exception ex) {
+      LOG.error("Could not find method setSessionTimeZone in "
+          + conn.getClass().getName(), ex);
+      // rethrow SQLException
+      throw new SQLException(ex);
+    }
+
+    // Need to set the time zone in order for Java
+    // to correctly access the column "TIMESTAMP WITH LOCAL TIME ZONE".
+    // We can't easily get the correct Oracle-specific timezone string
+    // from Java; just let the user set the timezone in a property.
+    String clientTimeZone = conf.get(SESSION_TIMEZONE_KEY, "GMT");
+    try {
+      method.setAccessible(true);
+      method.invoke(conn, clientTimeZone);
+      LOG.info("Time zone has been set to " + clientTimeZone);
+    } catch (Exception ex) {
+      LOG.warn("Time zone " + clientTimeZone
+               + " could not be set on Oracle database.");
+      LOG.warn("Setting default time zone: GMT");
+      try {
+        // "GMT" timezone is guaranteed to exist.
+        method.invoke(conn, "GMT");
+      } catch (Exception ex2) {
+        LOG.error("Could not set time zone for oracle connection", ex2);
+        // rethrow SQLException
+        throw new SQLException(ex);
+      }
+    }
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.io.IOException;
+import java.sql.SQLException;
+import java.sql.Types;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+
+import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
+import com.cloudera.sqoop.mapreduce.db.DBSplitter;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+import com.cloudera.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader;
+import com.cloudera.sqoop.mapreduce.db.OracleDateSplitter;
+import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit;
+
+/**
+ * A InputFormat that reads input data from an SQL table in an Oracle db.
+ */
+public class OracleDataDrivenDBInputFormat<T extends DBWritable>
+    extends DataDrivenDBInputFormat<T> implements Configurable {
+
+  /**
+   * @return the DBSplitter implementation to use to divide the table/query
+   * into InputSplits.
+   */
+  @Override
+  protected DBSplitter getSplitter(int sqlDataType) {
+    switch (sqlDataType) {
+    case Types.DATE:
+    case Types.TIME:
+    case Types.TIMESTAMP:
+      return new OracleDateSplitter();
+
+    default:
+      return super.getSplitter(sqlDataType);
+    }
+  }
+
+  @Override
+  protected RecordReader<LongWritable, T> createDBRecordReader(
+      DBInputSplit split, Configuration conf) throws IOException {
+
+    DBConfiguration dbConf = getDBConf();
+    @SuppressWarnings("unchecked")
+    Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
+
+    try {
+      // Use Oracle-specific db reader
+      return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
+          conf, getConnection(), dbConf, dbConf.getInputConditions(),
+          dbConf.getInputFieldNames(), dbConf.getInputTableName());
+    } catch (SQLException ex) {
+      throw new IOException(ex);
+    }
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.lib.db.DBWritable;
+
+import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
+import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader;
+import com.cloudera.sqoop.mapreduce.db.OracleDBRecordReader;
+
+/**
+ * A RecordReader that reads records from a Oracle table
+ * via DataDrivenDBRecordReader.
+ */
+public class OracleDataDrivenDBRecordReader<T extends DBWritable>
+    extends DataDrivenDBRecordReader<T>  {
+
+
+  // CHECKSTYLE:OFF
+  // TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor.
+  public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
+      Class<T> inputClass, Configuration conf, Connection conn,
+      DBConfiguration dbConfig, String cond, String [] fields,
+      String table) throws SQLException {
+
+    super(split, inputClass, conf, conn, dbConfig, cond, fields, table,
+        "ORACLE");
+
+    // Must initialize the tz used by the connection for Oracle.
+    OracleDBRecordReader.setSessionTimeZone(conf, conn);
+  }
+  // CHECKSTYLE:ON
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.util.Date;
+
+import com.cloudera.sqoop.mapreduce.db.DateSplitter;
+
+/**
+ * Implement DBSplitter over date/time values returned by an Oracle db.
+ * Make use of logic from DateSplitter, since this just needs to use
+ * some Oracle-specific functions on the formatting end when generating
+ * InputSplits.
+ */
+public class OracleDateSplitter extends DateSplitter  {
+
+  @SuppressWarnings("unchecked")
+  @Override
+  protected String dateToString(Date d) {
+    // Oracle Data objects are always actually Timestamps
+    return "TO_TIMESTAMP('" + d.toString() + "', 'YYYY-MM-DD HH24:MI:SS.FF')";
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java (added)
+++ incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.math.BigDecimal;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.InputSplit;
+
+import com.cloudera.sqoop.config.ConfigurationHelper;
+import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter;
+import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
+
+/**
+ * Implement DBSplitter over text strings.
+ */
+public class TextSplitter extends BigDecimalSplitter {
+
+  private static final Log LOG = LogFactory.getLog(TextSplitter.class);
+
+  /**
+   * This method needs to determine the splits between two user-provided
+   * strings.  In the case where the user's strings are 'A' and 'Z', this is
+   * not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26
+   * splits for strings beginning with each letter, etc.
+   *
+   * If a user has provided us with the strings "Ham" and "Haze", however, we
+   * need to create splits that differ in the third letter.
+   *
+   * The algorithm used is as follows:
+   * Since there are 2**16 unicode characters, we interpret characters as
+   * digits in base 65536. Given a string 's' containing characters s_0, s_1
+   * .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in
+   * base 65536. Having mapped the low and high strings into floating-point
+   * values, we then use the BigDecimalSplitter to establish the even split
+   * points, then map the resulting floating point values back into strings.
+   */
+  public List<InputSplit> split(Configuration conf, ResultSet results,
+      String colName) throws SQLException {
+
+    LOG.warn("Generating splits for a textual index column.");
+    LOG.warn("If your database sorts in a case-insensitive order, "
+        + "this may result in a partial import or duplicate records.");
+    LOG.warn("You are strongly encouraged to choose an integral split column.");
+
+    String minString = results.getString(1);
+    String maxString = results.getString(2);
+
+    boolean minIsNull = false;
+
+    // If the min value is null, switch it to an empty string instead for
+    // purposes of interpolation. Then add [null, null] as a special case
+    // split.
+    if (null == minString) {
+      minString = "";
+      minIsNull = true;
+    }
+
+    if (null == maxString) {
+      // If the max string is null, then the min string has to be null too.
+      // Just return a special split for this case.
+      List<InputSplit> splits = new ArrayList<InputSplit>();
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+      return splits;
+    }
+
+    // Use this as a hint. May need an extra task if the size doesn't
+    // divide cleanly.
+    int numSplits = ConfigurationHelper.getConfNumMaps(conf);
+
+    String lowClausePrefix = colName + " >= '";
+    String highClausePrefix = colName + " < '";
+
+    // If there is a common prefix between minString and maxString, establish
+    // it and pull it out of minString and maxString.
+    int maxPrefixLen = Math.min(minString.length(), maxString.length());
+    int sharedLen;
+    for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) {
+      char c1 = minString.charAt(sharedLen);
+      char c2 = maxString.charAt(sharedLen);
+      if (c1 != c2) {
+        break;
+      }
+    }
+
+    // The common prefix has length 'sharedLen'. Extract it from both.
+    String commonPrefix = minString.substring(0, sharedLen);
+    minString = minString.substring(sharedLen);
+    maxString = maxString.substring(sharedLen);
+
+    List<String> splitStrings = split(numSplits, minString, maxString,
+        commonPrefix);
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+
+    // Convert the list of split point strings into an actual set of
+    // InputSplits.
+    String start = splitStrings.get(0);
+    for (int i = 1; i < splitStrings.size(); i++) {
+      String end = splitStrings.get(i);
+
+      if (i == splitStrings.size() - 1) {
+        // This is the last one; use a closed interval.
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            lowClausePrefix + start + "'", colName + " <= '" + end + "'"));
+      } else {
+        // Normal open-interval case.
+        splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+            lowClausePrefix + start + "'", highClausePrefix + end + "'"));
+      }
+
+      start = end;
+    }
+
+    if (minIsNull) {
+      // Add the special null split at the end.
+      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+          colName + " IS NULL", colName + " IS NULL"));
+    }
+
+    return splits;
+  }
+
+  protected List<String> split(int numSplits, String minString,
+      String maxString, String commonPrefix) throws SQLException {
+
+    BigDecimal minVal = stringToBigDecimal(minString);
+    BigDecimal maxVal = stringToBigDecimal(maxString);
+
+    List<BigDecimal> splitPoints = split(
+        new BigDecimal(numSplits), minVal, maxVal);
+    List<String> splitStrings = new ArrayList<String>();
+
+    // Convert the BigDecimal splitPoints into their string representations.
+    for (BigDecimal bd : splitPoints) {
+      splitStrings.add(commonPrefix + bigDecimalToString(bd));
+    }
+
+    // Make sure that our user-specified boundaries are the first and last
+    // entries in the array.
+    if (splitStrings.size() == 0
+        || !splitStrings.get(0).equals(commonPrefix + minString)) {
+      splitStrings.add(0, commonPrefix + minString);
+    }
+    if (splitStrings.size() == 1
+        || !splitStrings.get(splitStrings.size() - 1).equals(
+        commonPrefix + maxString)) {
+      splitStrings.add(commonPrefix + maxString);
+    }
+
+    return splitStrings;
+  }
+
+  private static final BigDecimal ONE_PLACE = new BigDecimal(65536);
+
+  // Maximum number of characters to convert. This is to prevent rounding
+  // errors or repeating fractions near the very bottom from getting out of
+  // control. Note that this still gives us a huge number of possible splits.
+  private static final int MAX_CHARS = 8;
+
+  /**
+   * Return a BigDecimal representation of string 'str' suitable for use in a
+   * numerically-sorting order.
+   */
+  protected BigDecimal stringToBigDecimal(String str) {
+    // Start with 1/65536 to compute the first digit.
+    BigDecimal curPlace = ONE_PLACE;
+    BigDecimal result = BigDecimal.ZERO;
+
+    int len = Math.min(str.length(), MAX_CHARS);
+
+    for (int i = 0; i < len; i++) {
+      int codePoint = str.codePointAt(i);
+      result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
+      // advance to the next less significant place. e.g., 1/(65536^2) for the
+      // second char.
+      curPlace = curPlace.multiply(ONE_PLACE);
+    }
+
+    return result;
+  }
+
+  /**
+   * Return the string encoded in a BigDecimal.
+   * Repeatedly multiply the input value by 65536; the integer portion after
+   * such a multiplication represents a single character in base 65536.
+   * Convert that back into a char and create a string out of these until we
+   * have no data left.
+   */
+  protected String bigDecimalToString(BigDecimal bd) {
+    BigDecimal cur = bd.stripTrailingZeros();
+    StringBuilder sb = new StringBuilder();
+
+    for (int numConverted = 0; numConverted < MAX_CHARS; numConverted++) {
+      cur = cur.multiply(ONE_PLACE);
+      int curCodePoint = cur.intValue();
+      if (0 == curCodePoint) {
+        break;
+      }
+
+      cur = cur.subtract(new BigDecimal(curCodePoint));
+      sb.append(Character.toChars(curCodePoint));
+    }
+
+    return sb.toString();
+  }
+}

Propchange: incubator/sqoop/trunk/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java?rev=1190441&r1=1190440&r2=1190441&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java (original)
+++ incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java Fri Oct 28 16:50:39 2011
@@ -1,6 +1,4 @@
 /**
- * Copyright 2011 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,106 +15,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package com.cloudera.sqoop.mapreduce.db;
 
-import java.sql.SQLException;
-import java.util.List;
-
-import junit.framework.TestCase;
 
 /**
  * Test that the IntegerSplitter generates sane splits.
+ *
+ * @deprecated use org.apache.sqoop.mapreduce.db.TestIntegerSplitter isntead.
+ * @see org.apache.sqoop.mapreduce.db.TestIntegerSplitter
  */
-public class TestIntegerSplitter extends TestCase {
-  private long [] toLongArray(List<Long> in) {
-    long [] out = new long[in.size()];
-    for (int i = 0; i < in.size(); i++) {
-      out[i] = in.get(i).longValue();
-    }
-
-    return out;
-  }
-
-  public String formatLongArray(long [] ar) {
-    StringBuilder sb = new StringBuilder();
-    sb.append("[");
-    boolean first = true;
-    for (long val : ar) {
-      if (!first) {
-        sb.append(", ");
-      }
-
-      sb.append(Long.toString(val));
-      first = false;
-    }
-
-    sb.append("]");
-    return sb.toString();
-  }
-
-  public void assertLongArrayEquals(long [] expected, long [] actual) {
-    for (int i = 0; i < expected.length; i++) {
-      try {
-        assertEquals("Failure at position " + i + "; got " + actual[i]
-            + " instead of " + expected[i]
-            + "; actual array is " + formatLongArray(actual),
-            expected[i], actual[i]);
-      } catch (ArrayIndexOutOfBoundsException oob) {
-        fail("Expected array with " + expected.length
-            + " elements; got " + actual.length
-            + ". Actual array is " + formatLongArray(actual));
-      }
-    }
-
-    if (actual.length > expected.length) {
-      fail("Actual array has " + actual.length
-          + " elements; expected " + expected.length
-          + ". Actual array is " + formatLongArray(actual));
-    }
-  }
-
-  public void testEvenSplits() throws SQLException {
-    List<Long> splits = new IntegerSplitter().split(10, 0, 100);
-    long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, };
-    assertLongArrayEquals(expected, toLongArray(splits));
-  }
-
-  public void testOddSplits() throws SQLException {
-    List<Long> splits = new IntegerSplitter().split(10, 0, 95);
-    long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, };
-    assertLongArrayEquals(expected, toLongArray(splits));
-  }
-
-  public void testSingletonSplit() throws SQLException {
-    List<Long> splits = new IntegerSplitter().split(1, 5, 5);
-    long [] expected = { 5, 5 };
-    assertLongArrayEquals(expected, toLongArray(splits));
-  }
-
-  public void testSingletonSplit2() throws SQLException {
-    // Same test, but overly-high numSplits
-    List<Long> splits = new IntegerSplitter().split(5, 5, 5);
-    long [] expected = { 5, 5 };
-    assertLongArrayEquals(expected, toLongArray(splits));
-  }
-
-  public void testTooManySplits() throws SQLException {
-    List<Long> splits = new IntegerSplitter().split(5, 3, 5);
-    long [] expected = { 3, 4, 5 };
-    assertLongArrayEquals(expected, toLongArray(splits));
-  }
+public class TestIntegerSplitter
+  extends org.apache.sqoop.mapreduce.db.TestIntegerSplitter {
 
-  /**
-   * This tests verifies that overflows do not happen due to the splitting
-   * algorithm.
-   *
-   * @throws SQLException
-   */
-  public void testBigIntSplits() throws SQLException {
-    List<Long> splits = new IntegerSplitter().split(4, 14,
-        7863696997872966707L);
-    assertEquals(splits.size(), 5);
+  public void testDummy() {
+    // Nothing to do
   }
 }
 

Modified: incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java?rev=1190441&r1=1190440&r2=1190441&view=diff
==============================================================================
--- incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java (original)
+++ incubator/sqoop/trunk/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java Fri Oct 28 16:50:39 2011
@@ -1,6 +1,4 @@
 /**
- * Copyright 2011 The Apache Software Foundation
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,123 +15,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package com.cloudera.sqoop.mapreduce.db;
 
-import java.math.BigDecimal;
-import java.sql.SQLException;
-import java.util.List;
-
-import junit.framework.TestCase;
 
 /**
  * Test that the TextSplitter implementation creates a sane set of splits.
+ * @deprecated use org.apache.sqoop.mapreduce.db.TestTextSplitter instead.
+ * @see org.apache.sqoop.mapreduce.db.TestTextSplitter
  */
-public class TestTextSplitter extends TestCase {
-
-  public String formatArray(Object [] ar) {
-    StringBuilder sb = new StringBuilder();
-    sb.append("[");
-    boolean first = true;
-    for (Object val : ar) {
-      if (!first) {
-        sb.append(", ");
-      }
+public class TestTextSplitter extends
+  org.apache.sqoop.mapreduce.db.TestTextSplitter {
 
-      sb.append(val.toString());
-      first = false;
-    }
-
-    sb.append("]");
-    return sb.toString();
+  public void testDummy() {
+    // Nothing to do
   }
 
-  public void assertArrayEquals(Object [] expected, Object [] actual) {
-    for (int i = 0; i < expected.length; i++) {
-      try {
-        assertEquals("Failure at position " + i + "; got " + actual[i]
-            + " instead of " + expected[i]
-            + "; actual array is " + formatArray(actual),
-            expected[i], actual[i]);
-      } catch (ArrayIndexOutOfBoundsException oob) {
-        fail("Expected array with " + expected.length
-            + " elements; got " + actual.length
-            + ". Actual array is " + formatArray(actual));
-      }
-    }
-
-    if (actual.length > expected.length) {
-      fail("Actual array has " + actual.length
-          + " elements; expected " + expected.length
-          + ". Actual array is " + formatArray(actual));
-    }
-  }
-
-  public void testStringConvertEmpty() {
-    TextSplitter splitter = new TextSplitter();
-    BigDecimal emptyBigDec = splitter.stringToBigDecimal("");
-    assertEquals(BigDecimal.ZERO, emptyBigDec);
-  }
-
-  public void testBigDecConvertEmpty() {
-    TextSplitter splitter = new TextSplitter();
-    String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO);
-    assertEquals("", emptyStr);
-  }
-
-  public void testConvertA() {
-    TextSplitter splitter = new TextSplitter();
-    String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A"));
-    assertEquals("A", out);
-  }
-
-  public void testConvertZ() {
-    TextSplitter splitter = new TextSplitter();
-    String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z"));
-    assertEquals("Z", out);
-  }
-
-  public void testConvertThreeChars() {
-    TextSplitter splitter = new TextSplitter();
-    String out = splitter.bigDecimalToString(
-        splitter.stringToBigDecimal("abc"));
-    assertEquals("abc", out);
-  }
-
-  public void testConvertStr() {
-    TextSplitter splitter = new TextSplitter();
-    String out = splitter.bigDecimalToString(
-        splitter.stringToBigDecimal("big str"));
-    assertEquals("big str", out);
-  }
-
-  public void testConvertChomped() {
-    TextSplitter splitter = new TextSplitter();
-    String out = splitter.bigDecimalToString(
-        splitter.stringToBigDecimal("AVeryLongStringIndeed"));
-    assertEquals("AVeryLon", out);
-  }
-
-  public void testAlphabetSplit() throws SQLException {
-    // This should give us 25 splits, one per letter.
-    TextSplitter splitter = new TextSplitter();
-    List<String> splits = splitter.split(25, "A", "Z", "");
-    String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I",
-        "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
-        "V", "W", "X", "Y", "Z", };
-    assertArrayEquals(expected, splits.toArray(new String [0]));
-  }
-
-  public void testCommonPrefix() throws SQLException {
-    // Splits between 'Hand' and 'Hardy'
-    TextSplitter splitter = new TextSplitter();
-    List<String> splits = splitter.split(5, "nd", "rdy", "Ha");
-    // Don't check for exact values in the middle, because the splitter
-    // generates some ugly Unicode-isms. But do check that we get multiple
-    // splits and that it starts and ends on the correct points.
-    assertEquals("Hand", splits.get(0));
-    assertEquals("Hardy", splits.get(splits.size() -1));
-    assertEquals(6, splits.size());
-  }
 }
 

Added: incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java (added)
+++ incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
+
+/**
+ * Test that the IntegerSplitter generates sane splits.
+ */
+public class TestIntegerSplitter extends TestCase {
+  private long [] toLongArray(List<Long> in) {
+    long [] out = new long[in.size()];
+    for (int i = 0; i < in.size(); i++) {
+      out[i] = in.get(i).longValue();
+    }
+
+    return out;
+  }
+
+  public String formatLongArray(long [] ar) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("[");
+    boolean first = true;
+    for (long val : ar) {
+      if (!first) {
+        sb.append(", ");
+      }
+
+      sb.append(Long.toString(val));
+      first = false;
+    }
+
+    sb.append("]");
+    return sb.toString();
+  }
+
+  public void assertLongArrayEquals(long [] expected, long [] actual) {
+    for (int i = 0; i < expected.length; i++) {
+      try {
+        assertEquals("Failure at position " + i + "; got " + actual[i]
+            + " instead of " + expected[i]
+            + "; actual array is " + formatLongArray(actual),
+            expected[i], actual[i]);
+      } catch (ArrayIndexOutOfBoundsException oob) {
+        fail("Expected array with " + expected.length
+            + " elements; got " + actual.length
+            + ". Actual array is " + formatLongArray(actual));
+      }
+    }
+
+    if (actual.length > expected.length) {
+      fail("Actual array has " + actual.length
+          + " elements; expected " + expected.length
+          + ". Actual array is " + formatLongArray(actual));
+    }
+  }
+
+  public void testEvenSplits() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(10, 0, 100);
+    long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, };
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  public void testOddSplits() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(10, 0, 95);
+    long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, };
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  public void testSingletonSplit() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(1, 5, 5);
+    long [] expected = { 5, 5 };
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  public void testSingletonSplit2() throws SQLException {
+    // Same test, but overly-high numSplits
+    List<Long> splits = new IntegerSplitter().split(5, 5, 5);
+    long [] expected = { 5, 5 };
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  public void testTooManySplits() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(5, 3, 5);
+    long [] expected = { 3, 4, 5 };
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  /**
+   * This tests verifies that overflows do not happen due to the splitting
+   * algorithm.
+   *
+   * @throws SQLException
+   */
+  public void testBigIntSplits() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(4, 14,
+        7863696997872966707L);
+    assertEquals(splits.size(), 5);
+  }
+}

Propchange: incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java
URL: http://svn.apache.org/viewvc/incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java?rev=1190441&view=auto
==============================================================================
--- incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java (added)
+++ incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java Fri Oct 28 16:50:39 2011
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.mapreduce.db;
+
+import java.math.BigDecimal;
+import java.sql.SQLException;
+import java.util.List;
+
+import com.cloudera.sqoop.mapreduce.db.TextSplitter;
+
+import junit.framework.TestCase;
+
+public class TestTextSplitter extends TestCase {
+
+  public String formatArray(Object [] ar) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("[");
+    boolean first = true;
+    for (Object val : ar) {
+      if (!first) {
+        sb.append(", ");
+      }
+
+      sb.append(val.toString());
+      first = false;
+    }
+
+    sb.append("]");
+    return sb.toString();
+  }
+
+  public void assertArrayEquals(Object [] expected, Object [] actual) {
+    for (int i = 0; i < expected.length; i++) {
+      try {
+        assertEquals("Failure at position " + i + "; got " + actual[i]
+            + " instead of " + expected[i]
+            + "; actual array is " + formatArray(actual),
+            expected[i], actual[i]);
+      } catch (ArrayIndexOutOfBoundsException oob) {
+        fail("Expected array with " + expected.length
+            + " elements; got " + actual.length
+            + ". Actual array is " + formatArray(actual));
+      }
+    }
+
+    if (actual.length > expected.length) {
+      fail("Actual array has " + actual.length
+          + " elements; expected " + expected.length
+          + ". Actual array is " + formatArray(actual));
+    }
+  }
+
+  public void testStringConvertEmpty() {
+    TextSplitter splitter = new TextSplitter();
+    BigDecimal emptyBigDec = splitter.stringToBigDecimal("");
+    assertEquals(BigDecimal.ZERO, emptyBigDec);
+  }
+
+  public void testBigDecConvertEmpty() {
+    TextSplitter splitter = new TextSplitter();
+    String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO);
+    assertEquals("", emptyStr);
+  }
+
+  public void testConvertA() {
+    TextSplitter splitter = new TextSplitter();
+    String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A"));
+    assertEquals("A", out);
+  }
+
+  public void testConvertZ() {
+    TextSplitter splitter = new TextSplitter();
+    String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z"));
+    assertEquals("Z", out);
+  }
+
+  public void testConvertThreeChars() {
+    TextSplitter splitter = new TextSplitter();
+    String out = splitter.bigDecimalToString(
+        splitter.stringToBigDecimal("abc"));
+    assertEquals("abc", out);
+  }
+
+  public void testConvertStr() {
+    TextSplitter splitter = new TextSplitter();
+    String out = splitter.bigDecimalToString(
+        splitter.stringToBigDecimal("big str"));
+    assertEquals("big str", out);
+  }
+
+  public void testConvertChomped() {
+    TextSplitter splitter = new TextSplitter();
+    String out = splitter.bigDecimalToString(
+        splitter.stringToBigDecimal("AVeryLongStringIndeed"));
+    assertEquals("AVeryLon", out);
+  }
+
+  public void testAlphabetSplit() throws SQLException {
+    // This should give us 25 splits, one per letter.
+    TextSplitter splitter = new TextSplitter();
+    List<String> splits = splitter.split(25, "A", "Z", "");
+    String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I",
+        "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
+        "V", "W", "X", "Y", "Z", };
+    assertArrayEquals(expected, splits.toArray(new String [0]));
+  }
+
+  public void testCommonPrefix() throws SQLException {
+    // Splits between 'Hand' and 'Hardy'
+    TextSplitter splitter = new TextSplitter();
+    List<String> splits = splitter.split(5, "nd", "rdy", "Ha");
+    // Don't check for exact values in the middle, because the splitter
+    // generates some ugly Unicode-isms. But do check that we get multiple
+    // splits and that it starts and ends on the correct points.
+    assertEquals("Hand", splits.get(0));
+    assertEquals("Hardy", splits.get(splits.size() -1));
+    assertEquals(6, splits.size());
+  }
+}

Propchange: incubator/sqoop/trunk/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message