Professional Documents
Culture Documents
2013-08-11
Hadoop RPC 1. hadoop master/slave master Namenode Jobtracker slave Datanode Tasktracker 2. master ipc server slave 3. slave master 3 master heartbeat.recheck.interval master master slave 4. namenode datanode jobtracker tasktracker Datanode Namenode Datanode offerService
/** * Main loop for the DataNode. Runs until shutdown, * forever calling remote NameNode functions. */ public void offerService() throws Exception { LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + " Initial delay: " + initialBlockReportDelay + "msec"); // // Now loop for a long time.... // while (shouldRun) { try { long startTime = now();
1
// // Every so often, send heartbeat or block-report // if (startTime - lastHeartbeat > heartBeatInterval) { // // All heartbeat messages include following info: // -- Datanode name // -- data transfer port // -- Total capacity // -- Bytes remaining // lastHeartbeat = startTime; DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration, data.getCapacity(), data.getDfsUsed(), data.getRemaining(), xmitsInProgress.get(), getXceiverCount()); myMetrics.addHeartBeat(now() - startTime); //LOG.info("Just sent heartbeat, with name " + localName); if (!processCommand(cmds)) continue; } } // while (shouldRun) } // offerService
Hadoop Datanode Namenode 2 2 JVM Datanode namenode namenode public DatanodeProtocol namenode = null; NameNode
public class NameNode implements ClientProtocol, DatanodeProtocol, NamenodeProtocol, FSConstants, RefreshAuthorizationPolicyProtocol, RefreshUserMappingsProtocol
namenode Namenode Datanode Namenode RPC Datanode Namenode heartbeat 1) datanode namenode proxy 2) datanode namenode proxy sendHeartbeat 3) datanode namenode ( ) Invocation client.call 4) client call Invocation Call 5) client call namenode 6) namenode namenode Call process DatanodeCommand[] sendHeartbeat
/** * Data node notify the name node that it is alive * Return an array of block-oriented commands for the datanode to execute. * This will be either a transfer or a delete operation. */ public DatanodeCommand[] sendHeartbeat(DatanodeRegistration nodeReg,
3
long capacity, long dfsUsed, long remaining, int xmitsInProgress, int xceiverCount) throws IOException { verifyRequest(nodeReg); return namesystem.handleHeartbeat(nodeReg, capacity, dfsUsed, remaining, xceiverCount, xmitsInProgress); }
DatanodeProtocol DatanodeCommand
/** * Determines actions that data node should perform * when receiving a datanode command. */ final static int DNA_UNKNOWN = 0; // unknown action final static int DNA_TRANSFER = 1; // transfer blocks to another datanode final static int DNA_INVALIDATE = 2; // invalidate blocks final static int DNA_SHUTDOWN = 3; // shutdown node final static int DNA_REGISTER = 4; // re-register final static int DNA_FINALIZE = 5; // finalize previous upgrade
4
final static int DNA_RECOVERBLOCK = 6; // request a block recovery final static int DNA_ACCESSKEYUPDATE = 7; // update access key final static int DNA_BALANCERBANDWIDTHUPDATE = 8; // update balancer bandwidth
FSNamesystem.handleHeartbeat 1 getDatanode DatanodeDescriptor nodeinfo null NameNode StorageID DatanodeCommand.REGISTER DataNode 2 isDecommissioned DisallowedDatanodeException 3 nodeinfo DatanodeCommand.REGISTER DataNode 4 capacityTotalcapacityUsedcapacityRemaining totalLoad 5 DatanodeCommand
/** * The given node has reported in. This method should: * 1) Record the heartbeat, so the datanode isn't timed out * 2) Adjust usage stats for future block allocation * * If a substantial amount of time passed since the last datanode * heartbeat then request an immediate block report. * * @return an array of datanode commands * @throws IOException */ DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg, long capacity, long dfsUsed, long remaining, int xceiverCount, int xmitsInProgress) throws IOException { DatanodeCommand cmd = null; synchronized (heartbeats) { synchronized (datanodeMap) { DatanodeDescriptor nodeinfo = null; try { nodeinfo = getDatanode(nodeReg);
5
} catch(UnregisteredDatanodeException e) { return new DatanodeCommand[]{DatanodeCommand.REGISTER}; } // Check if this datanode should actually be shutdown instead. if (nodeinfo != null && shouldNodeShutdown(nodeinfo)) { setDatanodeDead(nodeinfo); throw new DisallowedDatanodeException(nodeinfo); } if (nodeinfo == null || !nodeinfo.isAlive) { return new DatanodeCommand[]{DatanodeCommand.REGISTER}; } updateStats(nodeinfo, false); nodeinfo.updateHeartbeat(capacity, dfsUsed, remaining, xceiverCount); updateStats(nodeinfo, true); //check lease recovery cmd = nodeinfo.getLeaseRecoveryCommand(Integer.MAX_VALUE); if (cmd != null) { return new DatanodeCommand[] {cmd}; } ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>(); //check pending replication cmd = nodeinfo.getReplicationCommand( maxReplicationStreams - xmitsInProgress); if (cmd != null) { cmds.add(cmd); } //check block invalidation cmd = nodeinfo.getInvalidateBlocks(blockInvalidateLimit); if (cmd != null) { cmds.add(cmd); } // check access key update if (isAccessTokenEnabled && nodeinfo.needKeyUpdate) { cmds.add(new KeyUpdateCommand(accessTokenHandler.exportKeys())); nodeinfo.needKeyUpdate = false; } // check for balancer bandwidth update if (nodeinfo.getBalancerBandwidth() > 0) { cmds.add(new BalancerBandwidthCommand(nodeinfo.getBalancerBandwidth()));
6
// set back to 0 to indicate that datanode has been sent the new value nodeinfo.setBalancerBandwidth(0); } if (!cmds.isEmpty()) { return cmds.toArray(new DatanodeCommand[cmds.size()]); } } } //check distributed upgrade cmd = getDistributedUpgradeCommand(); if (cmd != null) { return new DatanodeCommand[] {cmd}; } return null; }