hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jl...@apache.org
Subject svn commit: r1528242 - in /hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/ hadoop-mapreduce-client/hadoop-mapreduce-client-common/src...
Date Tue, 01 Oct 2013 22:40:02 GMT
Author: jlowe
Date: Tue Oct  1 22:40:01 2013
New Revision: 1528242

URL: http://svn.apache.org/r1528242
Log:
svn merge -c 1528237 FIXES: MAPREDUCE-4421. Run MapReduce framework via the distributed cache.
Contributed by Jason Lowe

Added:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm
      - copied unchanged from r1528237, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/DistributedCacheDeploy.apt.vm
Modified:
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
    hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Tue Oct  1 22:40:01
2013
@@ -25,6 +25,8 @@ Release 2.3.0 - UNRELEASED
     MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and
     Aaron Kimball via Sandy Ryza)
 
+    MAPREDUCE-4421. Run MapReduce framework via the distributed cache (jlowe)
+
   OPTIMIZATIONS
 
     MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
Tue Oct  1 22:40:01 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.u
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
@@ -133,6 +134,30 @@ public class MRApps extends Apps {
     return TaskAttemptStateUI.valueOf(attemptStateStr);
   }
 
+  // gets the base name of the MapReduce framework or null if no
+  // framework was configured
+  private static String getMRFrameworkName(Configuration conf) {
+    String frameworkName = null;
+    String framework =
+        conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, "");
+    if (!framework.isEmpty()) {
+      URI uri;
+      try {
+        uri = new URI(framework);
+      } catch (URISyntaxException e) {
+        throw new IllegalArgumentException("Unable to parse '" + framework
+            + "' as a URI, check the setting for "
+            + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
+      }
+
+      frameworkName = uri.getFragment();
+      if (frameworkName == null) {
+        frameworkName = new Path(uri).getName();
+      }
+    }
+    return frameworkName;
+  }
+
   private static void setMRFrameworkClasspath(
       Map<String, String> environment, Configuration conf) throws IOException {
     // Propagate the system classpath when using the mini cluster
@@ -141,18 +166,33 @@ public class MRApps extends Apps {
           System.getProperty("java.class.path"));
     }
 
-    // Add standard Hadoop classes
-    for (String c : conf.getStrings(
-        YarnConfiguration.YARN_APPLICATION_CLASSPATH,
-        YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
-      Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
-          .trim());
+    // if the framework is specified then only use the MR classpath
+    String frameworkName = getMRFrameworkName(conf);
+    if (frameworkName == null) {
+      // Add standard Hadoop classes
+      for (String c : conf.getStrings(
+          YarnConfiguration.YARN_APPLICATION_CLASSPATH,
+          YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
+        Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
+            .trim());
+      }
     }
+
+    boolean foundFrameworkInClasspath = (frameworkName == null);
     for (String c : conf.getStrings(
         MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
         MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) {
       Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
           .trim());
+      if (!foundFrameworkInClasspath) {
+        foundFrameworkInClasspath = c.contains(frameworkName);
+      }
+    }
+
+    if (!foundFrameworkInClasspath) {
+      throw new IllegalArgumentException(
+          "Could not locate MapReduce framework name '" + frameworkName
+          + "' in " + MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH);
     }
     // TODO: Remove duplicates.
   }

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
Tue Oct  1 22:40:01 2013
@@ -283,7 +283,46 @@ public class TestMRApps {
     assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not in the app"
       + " classpath!", expectedAppClasspath, appCp);
   }
-  
+
+  @Test (timeout = 3000000)
+  public void testSetClasspathWithFramework() throws IOException {
+    final String FRAMEWORK_NAME = "some-framework-name";
+    final String FRAMEWORK_PATH = "some-framework-path#" + FRAMEWORK_NAME;
+    Configuration conf = new Configuration();
+    conf.set(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, FRAMEWORK_PATH);
+    Map<String, String> env = new HashMap<String, String>();
+    try {
+      MRApps.setClasspath(env, conf);
+      fail("Failed to catch framework path set without classpath change");
+    } catch (IllegalArgumentException e) {
+      assertTrue("Unexpected IllegalArgumentException",
+          e.getMessage().contains("Could not locate MapReduce framework name '"
+              + FRAMEWORK_NAME + "'"));
+    }
+
+    env.clear();
+    final String FRAMEWORK_CLASSPATH = FRAMEWORK_NAME + "/*.jar";
+    conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, FRAMEWORK_CLASSPATH);
+    MRApps.setClasspath(env, conf);
+    final String stdClasspath = StringUtils.join(File.pathSeparator,
+        Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*",
+            ApplicationConstants.Environment.PWD.$() + "/*"));
+    String expectedClasspath = StringUtils.join(File.pathSeparator,
+        Arrays.asList(ApplicationConstants.Environment.PWD.$(),
+            FRAMEWORK_CLASSPATH, stdClasspath));
+    assertEquals("Incorrect classpath with framework and no user precedence",
+        expectedClasspath, env.get("CLASSPATH"));
+
+    env.clear();
+    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
+    MRApps.setClasspath(env, conf);
+    expectedClasspath = StringUtils.join(File.pathSeparator,
+        Arrays.asList(ApplicationConstants.Environment.PWD.$(),
+            stdClasspath, FRAMEWORK_CLASSPATH));
+    assertEquals("Incorrect classpath with framework and user precedence",
+        expectedClasspath, env.get("CLASSPATH"));
+  }
+
   @Test (timeout = 30000)
   public void testSetupDistributedCacheEmpty() throws IOException {
     Configuration conf = new Configuration();

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
Tue Oct  1 22:40:01 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.classification.
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -340,11 +341,12 @@ class JobSubmitter {
 
     //validate the jobs output specs 
     checkSpecs(job);
-    
-    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, 
-                                                     job.getConfiguration());
-    //configure the command line options correctly on the submitting dfs
+
     Configuration conf = job.getConfiguration();
+    addMRFrameworkToDistributedCache(conf);
+
+    Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
+    //configure the command line options correctly on the submitting dfs
     InetAddress ip = InetAddress.getLocalHost();
     if (ip != null) {
       submitHostAddress = ip.getHostAddress();
@@ -602,7 +604,6 @@ class JobSubmitter {
   }
 
   //get secret keys and tokens and store them into TokenCache
-  @SuppressWarnings("unchecked")
   private void populateTokenCache(Configuration conf, Credentials credentials) 
   throws IOException{
     readTokensFromFiles(conf, credentials);
@@ -618,4 +619,41 @@ class JobSubmitter {
       TokenCache.obtainTokensForNamenodes(credentials, ps, conf);
     }
   }
+
+  @SuppressWarnings("deprecation")
+  private static void addMRFrameworkToDistributedCache(Configuration conf)
+      throws IOException {
+    String framework =
+        conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, "");
+    if (!framework.isEmpty()) {
+      URI uri;
+      try {
+        uri = new URI(framework);
+      } catch (URISyntaxException e) {
+        throw new IllegalArgumentException("Unable to parse '" + framework
+            + "' as a URI, check the setting for "
+            + MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
+      }
+
+      String linkedName = uri.getFragment();
+
+      // resolve any symlinks in the URI path so using a "current" symlink
+      // to point to a specific version shows the specific version
+      // in the distributed cache configuration
+      FileSystem fs = FileSystem.get(conf);
+      Path frameworkPath = fs.makeQualified(
+          new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
+      FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), conf);
+      frameworkPath = fc.resolvePath(frameworkPath);
+      uri = frameworkPath.toUri();
+      try {
+        uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(),
+            null, linkedName);
+      } catch (URISyntaxException e) {
+        throw new IllegalArgumentException(e);
+      }
+
+      DistributedCache.addCacheArchive(uri, conf);
+    }
+  }
 }

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
Tue Oct  1 22:40:01 2013
@@ -645,6 +645,12 @@ public interface MRJobConfig {
       "mapreduce.application.classpath";
 
   /**
+   * Path to MapReduce framework archive
+   */
+  public static final String MAPREDUCE_APPLICATION_FRAMEWORK_PATH =
+      "mapreduce.application.framework.path";
+
+  /**
    * Default CLASSPATH for all YARN MapReduce applications.
    */
   public static final String[] DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH = {

Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml?rev=1528242&r1=1528241&r2=1528242&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
Tue Oct  1 22:40:01 2013
@@ -1458,12 +1458,32 @@
 
 <property>
   <description>CLASSPATH for MR applications. A comma-separated list
-  of CLASSPATH entries</description>
+  of CLASSPATH entries. If mapreduce.application.framework is set then this
+  must specify the appropriate classpath for that archive, and the name of
+  the archive must be present in the classpath.</description>
    <name>mapreduce.application.classpath</name>
    <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
 </property>
 
 <property>
+  <description>Path to the MapReduce framework archive. If set, the framework
+    archive will automatically be distributed along with the job, and this
+    path would normally reside in a public location in an HDFS filesystem. As
+    with distributed cache files, this can be a URL with a fragment specifying
+    the alias to use for the archive name. For example,
+    hdfs:/mapred/framework/hadoop-mapreduce-2.1.1.tar.gz#mrframework would
+    alias the localized archive as "mrframework".
+
+    Note that mapreduce.application.classpath must include the appropriate
+    classpath for the specified framework. The base name of the archive, or
+    alias of the archive if an alias is used, must appear in the specified
+    classpath.
+  </description>
+   <name>mapreduce.application.framework.path</name>
+   <value></value>
+</property>
+
+<property>
    <name>mapreduce.job.classloader</name>
    <value>false</value>
   <description>Whether to use a separate (isolated) classloader for



Mime
View raw message