Hadoop源码分析 (12作业的提交)

Hadoop 12
2013-08-16
1. jobtracker ID2. MapReduce 3.4. ID jobtracker 5. jobtracker jobtracker Hadoop wordcount

hadoop jar hadoop-examples-1.0.4.jar wordcount /usr/input /usr/output
HADOOP_HOME/BIN/hadoop
elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" # run it exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
org.apache.hadoop.util.RunJar main RunJar jar hadoop-examples-1.0.4.jar WordCountWordCount

public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text();
1
/** * Mapper map * void map(K1 key, V1 value, OutputCollector<K2,V2> output, Reporter reporter) * k/v k/v * 0 * OutputCollector Mapper Reducer <k,v> * OutputCollector collect(k, v):(k,v) output map value key key StringTokenizer write word write (,1) context */ public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); /** * Reducer reduce * void reduce(Text key, Iterable<IntWritable> values, Context context) * k/v map context,(combiner), context */ public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); }
2
} public static void main(String[] args) throws Exception { //Configurationmap/reduce j hadoop map-reduce Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count");// job job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); // job Mapper job.setCombinerClass(IntSumReducer.class); // job Combiner job.setReducerClass(IntSumReducer.class); // job Reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // map-reduce OutputFormat /** * InputFormat map-reduce job * setInputPaths(): map-reduce job * setInputPath() map-reduce job */ FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); // job job.runJob(conf); } }
Main org.apache.hadoop.mapreduce.Job waitForCompletion(true)

public class Job extends JobContext { // JobContext public static enum JobState {DEFINE, RUNNING}; //job private JobState state = JobState.DEFINE; // DEFINE private JobClient jobClient; // /** * Submit the job to the cluster and wait for it to finish. * @param verbose print the progress to the user * @return true if the job succeeded * @throws IOException thrown if the communication with the * <code>JobTracker</code> is lost */
3
public boolean waitForCompletion(boolean verbose ) throws IOException, InterruptedException, ClassNotFoundException { if (state == JobState.DEFINE) { // job DEFINE submit();// } if (verbose) {// jobClient.monitorAndPrintJob(conf, info); // } else { info.waitForCompletion();// } return isSuccessful();// } }
waitForCompletion(true)MapReduce submit():
/** * Submit the job to the cluster and return immediately. * @throws IOException */ public void submit() throws IOException, InterruptedException, ClassNotFoundException { ensureState(JobState.DEFINE); // DEFINE setUseNewAPI();// // Connect to the JobTracker and submit the job connect();// JobTracker JobClient info = jobClient.submitJobInternal(conf); // super.setJobID(info.getID());// ID state = JobState.RUNNING; // Job RUNNING }
connect() JobClient JobClient private JobSubmissionProtocol jobSubmitClient; JobTracker DataNode namenode JobClient RPC jobSubmitClient connect()-->(JobContext : UserGroupInformation) ugi doAs()
4
PrivilegedExceptionAction run() -->new JobClient((JobConf) getConfiguration())-->JobClient init(JobConf conf) -->this.jobSubmitClient = createRPCProxy(JobTracker.getAddress(conf), conf);--> (JobSubmissionProtocol) RPC.getProxy();
Job.connect() | |-->UserGroupInformation.doAs() | | | |-->PrivilegedExceptionAction<Object>() |<--| | | | |run() | | | |-->JobClient.JobClient(JobConf conf) | | | | | |-->JobClient.setConf(Configuration conf) | | |<---| | | | | | |-->JobClient.init(JobConf conf) | | | | | | | |-->JobClient.createRPCProxy(InetSocketAddress addr, | | | | Configuration conf) | | | | | | | | | |-->(JobSubmissionProtocol) RPC.getProxy() | | | | |<---------------------------------| | | | | | | | | |<--| | | |<---| | | |<---| | | | | | | | | | | | |
jobClient.submitJobInternal(conf);
/** * Internal method for submitting jobs to the system. * @param job the configuration to submit * @return a proxy object for the running job * @throws FileNotFoundException * @throws ClassNotFoundException * @throws InterruptedException * @throws IOException
5
*/ public RunningJob submitJobInternal(final JobConf job ) throws FileNotFoundException, ClassNotFoundException, InterruptedException, IOException { /* * configure the command line options correctly on the submitting dfs */ return ugi.doAs(new PrivilegedExceptionAction<RunningJob>() { public RunningJob run() throws FileNotFoundException, ClassNotFoundException, InterruptedException, IOException{ JobConf jobCopy = job; Path jobStagingArea = JobSubmissionFiles.getStagingDir(JobClient.this, jobCopy); // jobtracker ID JobID jobId = jobSubmitClient.getNewJobId(); Path submitJobDir = new Path(jobStagingArea, jobId.toString()); jobCopy.set("mapreduce.job.dir", submitJobDir.toString()); JobStatus status = null; try { populateTokenCache(jobCopy, jobCopy.getCredentials()); copyAndConfigureFiles(jobCopy, submitJobDir); // get delegation token for the dir TokenCache.obtainTokensForNamenodes(jobCopy.getCredentials(), new Path [] {submitJobDir}, jobCopy); Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir); int reduces = jobCopy.getNumReduceTasks(); InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { job.setJobSubmitHostAddress(ip.getHostAddress()); job.setJobSubmitHostName(ip.getHostName()); } JobContext context = new JobContext(jobCopy, jobId); // Check the output specification // // MapReduce
6
if (reduces == 0 ? jobCopy.getUseNewMapper() : jobCopy.getUseNewReducer()) { org.apache.hadoop.mapreduce.OutputFormat<?,?> output = ReflectionUtils.newInstance(context.getOutputFormatClass(), jobCopy); output.checkOutputSpecs(context); } else { jobCopy.getOutputFormat().checkOutputSpecs(fs, jobCopy); } jobCopy = (JobConf)context.getConfiguration(); // Create the splits for the job FileSystem fs = submitJobDir.getFileSystem(jobCopy); LOG.debug("Creating splits at " + fs.makeQualified(submitJobDir)); int maps = writeSplits(context, submitJobDir); jobCopy.setNumMapTasks(maps); // write "queue admins of the queue to which job is being submitted" // to job file. String queue = jobCopy.getQueueName(); AccessControlList acl = jobSubmitClient.getQueueAdmins(queue); jobCopy.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getACLString()); // Write job file to JobTracker's fs // ID jobtracker FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION)); try { jobCopy.writeXml(out); } finally { out.close(); } // // Now, actually submit the job (using the submit name) // jobtracker printTokens(jobId, jobCopy.getCredentials()); status = jobSubmitClient.submitJob( jobId, submitJobDir.toString(), jobCopy.getCredentials()); JobProfile prof = jobSubmitClient.getJobProfile(jobId);
7
if (status != null && prof != null) { return new NetworkedJob(status, prof, jobSubmitClient); } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { LOG.info("Cleaning up the staging area " + submitJobDir); if (fs != null && submitJobDir != null) fs.delete(submitJobDir, true); } } } }); }
jobtracker ID JobTracker getNewJobId() OutputFormat checkOutputSpecs org.apache.hadoop.mapreduce.

OutputFormat / org.apache.hadoop.mapred. OutputFormat
ID jobtracker FileSystem jobtracker jobSubmitClient.submitJob(jobId, submitJobDir.toString(), jobCopy.getCredentials());

/** * JobTracker.submitJob() kicks off a new job. * * Create a 'JobInProgress' object, which contains both JobProfile * and JobStatus. Those two sub-objects are sometimes shipped outside * of the JobTracker. But JobInProgress adds info that's useful for * the JobTracker alone. */ public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException {
9
JobInfo jobInfo = null; UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); synchronized (this) { if (jobs.containsKey(jobId)) { // job already running, don't start twice return jobs.get(jobId).getStatus(); } jobInfo = new JobInfo(jobId, new Text(ugi.getShortUserName()), new Path(jobSubmitDir)); } // Create the JobInProgress, do not lock the JobTracker since // we are about to copy job.xml from HDFS JobInProgress job = null; try { job = new JobInProgress(this, this.conf, jobInfo, 0, ts); } catch (Exception e) { throw new IOException(e); } synchronized (this) { // check if queue is RUNNING String queue = job.getProfile().getQueueName(); if (!queueManager.isRunning(queue)) { throw new IOException("Queue \"" + queue + "\" is not running"); } try { aclsManager.checkAccess(job, ugi, Operation.SUBMIT_JOB); } catch (IOException ioe) { LOG.warn("Access denied for user " + job.getJobConf().getUser() + ". Ignoring job " + jobId, ioe); job.fail(); throw ioe; } // Check the job if it cannot run in the cluster because of invalid memory // requirements. try { checkMemoryRequirements(job); } catch (IOException ioe) { throw ioe; } boolean recovered = true; // TODO: Once the Job recovery code is there, // (MAPREDUCE-873) we
10
// must pass the "recovered" flag accurately. // This is handled in the trunk/0.22 if (!recovered) { // Store the information in a file so that the job can be recovered // later (if at all) Path jobDir = getSystemDirectoryForJob(jobId); FileSystem.mkdirs(fs, jobDir, new FsPermission(SYSTEM_DIR_PERMISSION)); FSDataOutputStream out = fs.create(getSystemFileForJob(jobId)); jobInfo.write(out); out.close(); } // Submit the job JobStatus status; try { status = addJob(jobId, job); } catch (IOException ioe) { LOG.info("Job " + jobId + " submission failed!", ioe); status = job.getStatus(); status.setFailureInfo(StringUtils.stringifyException(ioe)); failJob(job); throw ioe; } return status; } }
jobtracker
11

Hadoop源码分析 (12作业的提交)

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Hadoop源码分析 (12作业的提交)

Uploaded by

Copyright:

Available Formats

Hadoop 12

1. jobtracker ID2. MapReduce 3.4. ID jobtracker 5. jobtracker jobtracker Hadoop wordcount

org.apache.hadoop.util.RunJar main RunJar jar hadoop-examples-1.0.4.jar WordCountWordCount

Main org.apache.hadoop.mapreduce.Job waitForCompletion(true)

jobtracker ID JobTracker getNewJobId() OutputFormat checkOutputSpecs org.apache.hadoop.mapreduce.

ID jobtracker FileSystem jobtracker jobSubmitClient.submitJob(jobId, submitJobDir.toString(), jobCopy.getCredentials());

You might also like